From 2076f5123750934c501c2685c6a5f6845fc178a3 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Wed, 23 Dec 2020 12:19:23 +0000 Subject: [PATCH 01/62] started to implement rl env and extended cryptodataset for it --- config/custom/tcn_config_local.yaml | 2 +- config/rl_config.yaml | 178 ++++++++ notebooks/modelling/rl_env.ipynb | 273 ++++++++++++ src/dagobert/modelling/dl/__init__.py | 2 +- src/dagobert/modelling/dl/data.py | 44 +- src/dagobert/modelling/dl/preprocessing.py | 6 +- src/dagobert/modelling/rl/__init__.py | 0 src/dagobert/modelling/rl/env01.py | 442 +++++++++++++++++++ src/dagobert/modelling/rl/env02.py | 473 +++++++++++++++++++++ src/dagobert/modelling/rl/environment.py | 117 +++++ 10 files changed, 1528 insertions(+), 9 deletions(-) create mode 100644 config/rl_config.yaml create mode 100644 notebooks/modelling/rl_env.ipynb create mode 100644 src/dagobert/modelling/rl/__init__.py create mode 100644 src/dagobert/modelling/rl/env01.py create mode 100644 src/dagobert/modelling/rl/env02.py create mode 100644 src/dagobert/modelling/rl/environment.py diff --git a/config/custom/tcn_config_local.yaml b/config/custom/tcn_config_local.yaml index 7ede2d23..5da5c8f9 100644 --- a/config/custom/tcn_config_local.yaml +++ b/config/custom/tcn_config_local.yaml @@ -39,7 +39,7 @@ optuna_submission_delay: 30 output_size: 3 num_channels: [150, 150, 150, 150, 150, 150, 150] -kernel_size: 3 +kernel_size: 10 dropout: 0.5 use_last_timepoint: True last_y: False diff --git a/config/rl_config.yaml b/config/rl_config.yaml new file mode 100644 index 00000000..6508176d --- /dev/null +++ b/config/rl_config.yaml @@ -0,0 +1,178 @@ + + +# -------------------------------------------------------------------------------------- +# LIGHTNING +# -------------------------------------------------------------------------------------- + +gpus: 1 +pin_memory: True +profiler: True +val_check_interval: 0.5 +# enable it with 'power' or 'binsearch' +auto_scale_batch_size: +#precision: 16 + +# -------------------------------------------------------------------------------------- +# RUN +# -------------------------------------------------------------------------------------- + +log_dir: logs +num_workers: 4 +exp_name: TCN +tags: + - model1 + - ethusdt_volume500 +no_comet_logger: True +seed: 42 +batch_size: 100 + +# -------------------------------------------------------------------------------------- +# MODEL +# -------------------------------------------------------------------------------------- + +output_size: 2 +num_channels: [150, 150, 150, 150, 150, 150, 150] +kernel_size: 10 +dropout: 0.5 +use_last_timepoint: True +last_y: False +non_last_y_frac: 0.5 +regression: False +density_num: 3 +mix_density_net: False +no_class_weights: False +no_sample_weights: False + +# -------------------------------------------------------------------------------------- +# DATA +# -------------------------------------------------------------------------------------- + +data_dir: "C:/Work/dagobert/data/modelling" + +lookback: auto +mini_series_length: auto + +# If this is set to a number, then simple lookahead labelling is in place +simple_lookahead_y: +simple_lookahead_reg: False + +# If this is True, anchor is labelled before preprocessing. to_label and simple_lookahead_y cannot be used together. +to_label: False + +df_train: + anchor: std_bar_BTCUSDT_tick_1.feather + df2: std_bar_ETHUSDT_tick_1.feather +df_val: +df_test: +cols_to_model: + anchor: + - date_diff + - open + - high + - low + - close + - open_fd_0.0 + - high_fd_0.0 + - low_fd_0.0 + - close_fd_0.0 + - open_fd_tuned + - high_fd_tuned + - low_fd_tuned + - close_fd_tuned + - cum_ticks + - cum_dollar + - volume + - cum_volume_buy + - cum_volume_sell + - cum_volume_quote + - cum_volume_quote_buy + - cum_volume_quote_sell + - sin_date + - cos_date + - sin_time + - cos_time + - boll + - boll_lb + - boll_ub + - macd + - macds + - macdh + - wr_60 + - rsi_60 + - rsv_60 + - atr_60 + - cci_60 + - kdjk_60 + - kdjd_60 + - kdjj_60 + - pdi_60 + - mdi_60 + - vr_60 + df2: + - date_diff + - open + - high + - low + - close + - open_fd_0.0 + - high_fd_0.0 + - low_fd_0.0 + - close_fd_0.0 + - open_fd_tuned + - high_fd_tuned + - low_fd_tuned + - close_fd_tuned + - cum_ticks + - cum_dollar + - volume + - cum_volume_buy + - cum_volume_sell + - cum_volume_quote + - cum_volume_quote_buy + - cum_volume_quote_sell + - sin_date + - cos_date + - sin_time + - cos_time + - boll + - boll_lb + - boll_ub + - macd + - macds + - macdh + - wr_60 + - rsi_60 + - rsv_60 + - atr_60 + - cci_60 + - kdjk_60 + - kdjd_60 + - kdjj_60 + - pdi_60 + - mdi_60 + - vr_60 +target_col: close_fd_0.0 +time_feat_n: 1 +time_embed_dim: 12 + +augment_method: random_fast +augment_prob: 0 +augment_dfs: +augment_dfs_mix: 0 + +# -------------------------------------------------------------------------------------- +# PREPROCESSING +# -------------------------------------------------------------------------------------- + +train_start_date: "2018-06-01" +train_days: 730 +val_days: 60 +val_train_offset_days: 1 +val_puffer_days: 1 +test_days: 30 +test_train_offset_days: 62 +test_puffer_days: 1 + +sample_weights: +binariser_method: +scaling_method: minmax diff --git a/notebooks/modelling/rl_env.ipynb b/notebooks/modelling/rl_env.ipynb new file mode 100644 index 00000000..cd4bd631 --- /dev/null +++ b/notebooks/modelling/rl_env.ipynb @@ -0,0 +1,273 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from dagobert.io import S3Connector\n", + "from dagobert.modelling.rl.environment import PortfolioEnv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## PortfolioSim\n", + "\n", + "We go through the `step` function of the `PortfolioSim` class to understand what is it doing. " + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.48192771, 0.26506024, 0.25301205])" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eps = np.finfo(float).eps\n", + "\n", + "# orig portfolio value\n", + "p0 = 1\n", + "\n", + "# orig portfolio allocation (50% cash, nothing in 25% btc, 25% eth)\n", + "w0 = np.array([.5, .25, .25])\n", + "\n", + "# new relative price vector, expressed as returns (BTC went up 10%, ETH 5%)\n", + "y1 = np.array([1, 1.1, 1.05])\n", + "\n", + "# (eq7) since we last acted prices changed, so weights evolve (see below)\n", + "dw1 = (y1 * w0) / (np.dot(y1, w0) + eps)\n", + "dw1" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.00020481927710843396" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# new weight vector from the agent for this timestep (predicted correctly that BTC/ETH will go up and allocated more USD to them)\n", + "w1 = np.array([.4, .3, .3])\n", + "\n", + "# (eq16) cost to change portfolio:\n", + "# excluding change in cash to avoid double counting for transaction cost\n", + "mu = 0.0025 * (np.abs(dw1[1:] - w1[1:])).sum()\n", + "mu" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.045" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.dot(y1, w1)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9997951807228915" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(1 - mu)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.0447859638554216" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# (eq11) new portfolio value: see section between (eq19-20) why this works\n", + "p1 = p0 * (1 - mu) * np.dot(y1, w1)\n", + "p1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + " # (eq16) cost to change portfolio:\n", + " # excluding change in cash to avoid double counting for transaction cost\n", + " mu = self.cost * (np.abs(dw1[1:] - w1[1:])).sum()\n", + "\n", + " # (eq11) final portfolio value: see section between (eq19-20) why this works\n", + " p1 = p0 * (1 - mu) * np.dot(y1, w0)\n", + "\n", + " # (eq9 & 10) rate of return log rate of return\n", + " rho1 = p1 / p0 - 1 # rate of returns\n", + " r1 = np.log(p1 + eps) - np.log(p0 + eps)\n", + "\n", + " # (eq22) immediate reward is log rate of return scaled by episode length\n", + " reward = r1 / self.steps" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "data_dir = Path('C:/Work/dagobert/data/modelling')\n", + "instruments = ['BTC', 'ETH', 'XRP', 'LTC']\n", + "datetimes = None\n", + "\n", + "# work out the common datetimes\n", + "for instrument in instruments:\n", + " df = pd.read_feather(data_dir/f'std_bar_{instrument}USDT_tick_1.feather')\n", + " df = df.set_index('date_time')\n", + " if datetimes is not None:\n", + " datetimes = df.index.intersection(datetimes)\n", + " else:\n", + " datetimes = df.index\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# merge instruments \n", + "cols_to_select = ['open', 'low', 'high', 'close', 'volume']\n", + "history = np.empty((len(instruments), len(datetimes), len(cols_to_select)))\n", + "for i,instrument in enumerate(instruments):\n", + " df = pd.read_feather(data_dir/f'std_bar_{instrument}USDT_tick_1.feather')\n", + " history[i, :, :] = df.set_index('date_time').loc[datetimes, cols_to_select].values" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# make portfolio\n", + "portfolio = PortfolioEnv(history, instruments, len(datetimes), window_length=1440)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'PortfolioSim' object has no attribute 'p0'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mw\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m.25\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m.25\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m.25\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m.25\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mportfolio\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32mc:\\users\\danih\\dropbox\\dagobert\\dagobert\\src\\dagobert\\modelling\\rl\\environment.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m 253\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 254\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 255\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_step\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 256\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 257\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_step\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mc:\\users\\danih\\dropbox\\dagobert\\dagobert\\src\\dagobert\\modelling\\rl\\environment.py\u001b[0m in \u001b[0;36m_step\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m 296\u001b[0m \u001b[0mopen_price_vector\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mobservation\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 297\u001b[0m \u001b[0my1\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mclose_price_vector\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0mopen_price_vector\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 298\u001b[1;33m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfo\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone2\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msim\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_step\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mweights\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 299\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 300\u001b[0m \u001b[1;31m# calculate return for buy and hold a bit of each asset\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mc:\\users\\danih\\dropbox\\dagobert\\dagobert\\src\\dagobert\\modelling\\rl\\environment.py\u001b[0m in \u001b[0;36m_step\u001b[1;34m(self, w1, y1)\u001b[0m\n\u001b[0;32m 146\u001b[0m \u001b[1;32massert\u001b[0m \u001b[0my1\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m1.0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"y1[0] must be 1\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 147\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 148\u001b[1;33m \u001b[0mp0\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mp0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 149\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 150\u001b[0m \u001b[0mdw1\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0my1\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mw1\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mw1\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0meps\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# (eq7) weights evolve into\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mAttributeError\u001b[0m: 'PortfolioSim' object has no attribute 'p0'" + ] + } + ], + "source": [ + "w = np.array([0, .25, .25, .25, .25])\n", + "portfolio.step(w)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.6 64-bit ('dagobert': conda)", + "language": "python", + "name": "python37664bitdagobertconda90fcdb25face404d8cd237e8f8473045" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/src/dagobert/modelling/dl/__init__.py b/src/dagobert/modelling/dl/__init__.py index b69b500d..81ddc443 100644 --- a/src/dagobert/modelling/dl/__init__.py +++ b/src/dagobert/modelling/dl/__init__.py @@ -1,4 +1,4 @@ -from .data import CryptoDataset +from .data import CryptoDataset, PortfolioCryptoDataset from .tcn_net import TemporalConvNet from .utils import LogCoshLoss, FocalLoss, MixedNormalPDFLoss from .adabelief import AdaBelief diff --git a/src/dagobert/modelling/dl/data.py b/src/dagobert/modelling/dl/data.py index e1a29584..09825bda 100644 --- a/src/dagobert/modelling/dl/data.py +++ b/src/dagobert/modelling/dl/data.py @@ -1,5 +1,6 @@ """ -Classes defining PyTorch datasets for modelling. +Classes defining PyTorch datasets for supervised deep learning and multi-instrument +reinforcement learning. """ import logging from pathlib import Path @@ -68,12 +69,12 @@ class CryptoDataset(Dataset): - std_bar_BTCUSDT_volume_250.feather - std_bar_BTCUSDT_volume_500.feather - Here, we have the lovest granularity ETHUSDT and BTCUSDT bars simultaneously used + Here, we have the lowest granularity ETHUSDT and BTCUSDT bars simultaneously used to model whether the ETH price will be up or down in 30 minutes: simple_lookahead_y. Both input DFs use 4 columns (OHLC) and both can be augmented with a 50% chance, meaning, on average in every batch 256 samples would come from the augment_dfs. The anchor can only be augmented by one of the other less granular ETHUSDT volume - bar datasets, while the secondary input DF (BTCUSDT) chas its own two augmentation + bar datasets, while the secondary input DF (BTCUSDT) has its own two augmentation data sources. Internally these 6 DFs will be rapackaged as a single dict (`self.dfs`), where the @@ -94,7 +95,7 @@ class CryptoDataset(Dataset): The reason why we disect the data (pandas DFs) into these dicts and list of np arrays is because of huge performance gains when we do the indexing in np instead - of pandas .loc. + of using the .loc method of pandas. """ def __init__( @@ -559,3 +560,38 @@ def plot(self) -> Tuple[Figure]: plt.ylabel("Count") plt.close() return fig_close, fig_data, fig_target + + +class PortfolioCryptoDataset(CryptoDataset): + """ + This extends :class:`dagobert.modelling.dl.data.CryptoDataset` to make it + suitable for multi instrument portfolio optimization through reinforcement-learning. + + Instead of returning an array of Xs and single y, this returns only the Xs, and + uses the last time step of the Xs as the target. + """ + + def __init__(self, *args, **kw): + super().__init__(*args, **kw) + # for each instrument, fish out the index of the target_col (close_0 by default) + self.target_col_ix = [ + np.where(self.target_col == np.array(cols))[0] + for cols in self.cols_to_model.values() + ] + + def __getitem__(self, idx): + """ + We don't need to calculate or fetch y, as we can simply use the last timepoint + for that. + """ + idx = idx.tolist() if torch.is_tensor(idx) else idx + batch_dfs, batch_indices, _ = self._get_batch_dfs_indices_target() + from_idx, upto_idx = self._get_from_upto_idxs(idx, batch_indices) + Xs = self._get_Xs(batch_dfs, from_idx, upto_idx) + + # cut off last time-point from each X as use that as y. + ys = np.empty(len(Xs)) + for i, X in enumerate(Xs): + Xs[i] = X[:, :-1] + ys[i] = X[self.target_col_ix[i], -1] + return Xs, ys diff --git a/src/dagobert/modelling/dl/preprocessing.py b/src/dagobert/modelling/dl/preprocessing.py index 6c4b8d12..50efa475 100644 --- a/src/dagobert/modelling/dl/preprocessing.py +++ b/src/dagobert/modelling/dl/preprocessing.py @@ -41,9 +41,9 @@ class Preprocessing(object): @staticmethod def preprocess_augment_dfs(hparams: Namespace) -> Namespace: """ - If DFs are defined in augment_dfs we download and scale them. We only - check the scaling_method parameter of hparams for deciding if we already have - a downloaeded and processed version in the data_dir. + If DFs are defined in augment_dfs we download and scale them plus label the + anchor. We only check the labelling & scaling_method parameters of hparams for + deciding if we already have a downloaeded and processed version in the data_dir. Args: hparams: Parsed hypere parameters of the experiment. diff --git a/src/dagobert/modelling/rl/__init__.py b/src/dagobert/modelling/rl/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/dagobert/modelling/rl/env01.py b/src/dagobert/modelling/rl/env01.py new file mode 100644 index 00000000..4015cf7c --- /dev/null +++ b/src/dagobert/modelling/rl/env01.py @@ -0,0 +1,442 @@ +""" +https://github.com/wassname/rl-portfolio-management/blob/master/rl_portfolio_management/environments/portfolio.py +""" +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +from pprint import pprint +import logging +import os +import tempfile +import time +import gym +import gym.spaces + +from ..config import eps +from ..data.utils import normalize, random_shift, scale_to_start +from ..util import MDD as max_drawdown, sharpe, softmax +from ..callbacks.notebook_plot import LivePlotNotebook + +logger = logging.getLogger(__name__) + + +class DataSrc(object): + """Acts as data provider for each new episode.""" + + def __init__( + self, + df, + steps=252, + scale=True, + scale_extra_cols=True, + augment=0.00, + window_length=50, + random_reset=True, + ): + """ + DataSrc. + + df - csv for data frame index of timestamps + and multi-index columns levels=[['LTCBTC'],...],['open','low','high','close',...]] + an example is included as an hdf file in this repository + steps - total steps in episode + scale - scale the data for each episode + scale_extra_cols - scale extra columns by global mean and std + augment - fraction to augment the data by + random_reset - reset to a random time (otherwise continue through time) + """ + self.steps = steps + 1 + self.augment = augment + self.random_reset = random_reset + self.scale = scale + self.scale_extra_cols = scale_extra_cols + self.window_length = window_length + self.idx = self.window_length + + # get rid of NaN's + df = df.copy() + df.replace(np.nan, 0, inplace=True) + df = df.fillna(method="pad") + + # dataframe to matrix + self.asset_names = df.columns.levels[0].tolist() + self.features = df.columns.levels[1].tolist() + data = df.as_matrix().reshape( + (len(df), len(self.asset_names), len(self.features)) + ) + self._data = np.transpose(data, (1, 0, 2)) + self._times = df.index + + self.price_columns = ["close", "high", "low", "open"] + self.non_price_columns = set(df.columns.levels[1]) - set(self.price_columns) + + # Stats to let us normalize non price columns + if scale_extra_cols: + x = self._data.reshape((-1, len(self.features))) + self.stats = dict(mean=x.mean(0), std=x.std(0)) + # for column in self._data.columns.levels[1].tolist(): + # x = df.xs(key=column, axis=1, level='Price').as_matrix()[:, :] + # self.stats["mean"].append(x.mean()) + # = dict(mean=x.mean(), std=x.std()) + + self.reset() + + def _step(self): + # get history matrix from dataframe + data_window = self.data[:, self.step : self.step + self.window_length].copy() + + # (eq.1) prices + y1 = data_window[:, -1, 0] / data_window[:, -2, 0] + y1 = np.concatenate([[1.0], y1]) # add cash price + + # (eq 18) X: prices are divided by close price + nb_pc = len(self.price_columns) + if self.scale: + last_close_price = data_window[:, -1, 0] + data_window[:, :, :nb_pc] /= last_close_price[:, np.newaxis, np.newaxis] + + if self.scale_extra_cols: + # normalize non price columns + data_window[:, :, nb_pc:] -= self.stats["mean"][None, None, nb_pc:] + data_window[:, :, nb_pc:] /= self.stats["std"][None, None, nb_pc:] + data_window[:, :, nb_pc:] = np.clip( + data_window[:, :, nb_pc:], + self.stats["mean"][nb_pc:] - self.stats["std"][nb_pc:] * 10, + self.stats["mean"][nb_pc:] + self.stats["std"][nb_pc:] * 10, + ) + + self.step += 1 + history = data_window + done = bool(self.step >= self.steps) + + return history, y1, done + + def reset(self): + self.step = 0 + + # get data for this episode + if self.random_reset: + self.idx = np.random.randint( + low=self.window_length + 1, high=self._data.shape[1] - self.steps - 2 + ) + else: + # continue sequentially, before reseting to start + if self.idx > (self._data.shape[1] - self.steps - self.window_length - 1): + self.idx = self.window_length + 1 + else: + self.idx += self.steps + data = self._data[ + :, self.idx - self.window_length : self.idx + self.steps + 1 + ].copy() + self.times = self._times[ + self.idx - self.window_length : self.idx + self.steps + 1 + ] + + # augment data to prevent overfitting + data += np.random.normal(loc=0, scale=self.augment, size=data.shape) + + self.data = data + + +class PortfolioSim(object): + """ + Portfolio management sim. + + Params: + - cost e.g. 0.0025 is max in Poliniex + + Based of [Jiang 2017](https://arxiv.org/abs/1706.10059) + """ + + def __init__(self, asset_names=[], steps=128, trading_cost=0.0025, time_cost=0.0): + self.cost = trading_cost + self.time_cost = time_cost + self.steps = steps + self.asset_names = asset_names + self.reset() + + def _step(self, w1, y1): + """ + Step. + + w1 - new action of portfolio weights - e.g. [0.1,0.9, 0.0] + y1 - price relative vector also called return + e.g. [1.0, 0.9, 1.1] + Numbered equations are from https://arxiv.org/abs/1706.10059 + """ + w0 = self.w0 + p0 = self.p0 + + # (eq7) since we last acted prices changed, so weights evolve into + dw1 = (y1 * w0) / (np.dot(y1, w0) + eps) + + # (eq16) cost to change portfolio: p' -> mu -> pt, see Figure 1 + # excluding change in cash to avoid double counting for transaction cost + mu = self.cost * (np.abs(dw1[1:] - w1[1:])).sum() + + # (eq11) final portfolio value: see section between (eq19-20) why this works + p1 = p0 * (1 - mu) * np.dot(y1, w0) + + # (eq9 & 10) rate of return log rate of return + rho1 = p1 / p0 - 1 # rate of returns + r1 = np.log(p1 + eps) - np.log(p0 + eps) + + # (eq22) immediate reward is log rate of return scaled by episode length + reward = r1 / self.steps + + # remember for next step + self.w0 = w1 + self.p0 = p1 + + # if we run out of money, we're done + done = bool(p1 == 0) + + # should only return single values, not list + info = { + "reward": reward, + "log_return": r1, + "portfolio_value": p1, + "market_return": y1.mean(), + "rate_of_return": rho1, + "weights_mean": w1.mean(), + "weights_std": w1.std(), + "cost": mu, + } + # record weights and prices + for i, name in enumerate(["BTCBTC"] + self.asset_names): + info["weight_" + name] = w1[i] + info["price_" + name] = y1[i] + + self.infos.append(info) + return reward, info, done + + def reset(self): + self.infos = [] + self.w0 = np.array([1.0] + [0.0] * len(self.asset_names)) + self.p0 = 1.0 + + +class PortfolioEnv(gym.Env): + """ + An environment for financial portfolio management. + + Financial portfolio management is the process of constant redistribution of a fund into different + financial products. + + Based on [Jiang 2017](https://arxiv.org/abs/1706.10059) + """ + + metadata = {"render.modes": ["notebook", "ansi"]} + + def __init__( + self, + df, + steps=256, + trading_cost=0.0025, + time_cost=0.00, + window_length=50, + augment=0.00, + output_mode="EIIE", + log_dir=None, + scale=True, + scale_extra_cols=True, + random_reset=True, + ): + """ + An environment for financial portfolio management. + + Params: + df - csv for data frame index of timestamps + and multi-index columns levels=[['LTCBTC'],...],['open','low','high','close']] + steps - steps in episode + window_length - how many past observations["history"] to return + trading_cost - cost of trade as a fraction, e.g. 0.0025 corresponding to max rate of 0.25% at Poloniex (2017) + time_cost - cost of holding as a fraction + augment - fraction to randomly shift data by + output_mode: decides observation["history"] shape + - 'EIIE' for (assets, window, 3) + - 'atari' for (window, window, 3) (assets is padded) + - 'mlp' for (assets*window*3) + log_dir: directory to save plots to + scale - scales price data by last opening price on each episode (except return) + scale_extra_cols - scales non price data using mean and std for whole dataset + """ + self.src = DataSrc( + df=df, + steps=steps, + scale=scale, + scale_extra_cols=scale_extra_cols, + augment=augment, + window_length=window_length, + random_reset=random_reset, + ) + self._plot = self._plot2 = self._plot3 = None + self.output_mode = output_mode + self.sim = PortfolioSim( + asset_names=self.src.asset_names, + trading_cost=trading_cost, + time_cost=time_cost, + steps=steps, + ) + self.log_dir = log_dir + + # openai gym attributes + # action will be the portfolio weights [cash_bias,w1,w2...] where wn are [0, 1] for each asset + nb_assets = len(self.src.asset_names) + self.action_space = gym.spaces.Box(0.0, 1.0, shape=nb_assets + 1) + + # get the history space from the data min and max + if output_mode == "EIIE": + obs_shape = (nb_assets, window_length, len(self.src.features)) + elif output_mode == "atari": + obs_shape = (window_length, window_length, len(self.src.features)) + elif output_mode == "mlp": + obs_shape = (nb_assets) * window_length * (len(self.src.features)) + else: + raise Exception("Invalid value for output_mode: %s" % self.output_mode) + + self.observation_space = gym.spaces.Dict( + { + "history": gym.spaces.Box( + -10, + 20 + if scale + else 1, # if scale=True observed price changes return could be large fractions + obs_shape, + ), + "weights": self.action_space, + } + ) + self._reset() + + def _step(self, action): + """ + Step the env. + + Actions should be portfolio [w0...] + - Where wn is a portfolio weight between 0 and 1. The first (w0) is cash_bias + - cn is the portfolio conversion weights see PortioSim._step for description + """ + logger.debug("action: %s", action) + + weights = np.clip(action, 0.0, 1.0) + weights /= weights.sum() + eps + + # Sanity checks + assert self.action_space.contains( + action + ), "action should be within %r but is %r" % (self.action_space, action) + np.testing.assert_almost_equal( + np.sum(weights), + 1.0, + 3, + err_msg='weights should sum to 1. action="%s"' % weights, + ) + + history, y1, done1 = self.src._step() + + reward, info, done2 = self.sim._step(weights, y1) + + # calculate return for buy and hold a bit of each asset + info["market_value"] = np.cumprod( + [inf["market_return"] for inf in self.infos + [info]] + )[-1] + # add dates + info["date"] = self.src.times[self.src.step].timestamp() + info["steps"] = self.src.step + + self.infos.append(info) + + # reshape history according to output mode + if self.output_mode == "EIIE": + pass + elif self.output_mode == "atari": + padding = history.shape[1] - history.shape[0] + history = np.pad(history, [[0, padding], [0, 0], [0, 0]], mode="constant") + elif self.output_mode == "mlp": + history = history.flatten() + + return {"history": history, "weights": weights}, reward, done1 or done2, info + + def _reset(self): + self.sim.reset() + self.src.reset() + self.infos = [] + action = self.sim.w0 + observation, reward, done, info = self.step(action) + return observation + + def _seed(self, seed): + np.random.seed(seed) + return [seed] + + def _render(self, mode="notebook", close=False): + # if close: + # return + if mode == "ansi": + pprint(self.infos[-1]) + elif mode == "notebook": + self.plot_notebook(close) + + def plot_notebook(self, close=False): + """Live plot using the jupyter notebook rendering of matplotlib.""" + + if close: + self._plot = self._plot2 = self._plot3 = None + return + + df_info = pd.DataFrame(self.infos) + df_info.index = pd.to_datetime(df_info["date"], unit="s") + + # plot prices and performance + all_assets = ["BTCBTC"] + self.sim.asset_names + if not self._plot: + colors = [None] * len(all_assets) + ["black"] + self._plot_dir = ( + os.path.join(self.log_dir, "notebook_plot_prices_" + str(time.time())) + if self.log_dir + else None + ) + self._plot = LivePlotNotebook( + log_dir=self._plot_dir, + title="prices & performance", + labels=all_assets + ["Portfolio"], + ylabel="value", + colors=colors, + ) + x = df_info.index + y_portfolio = df_info["portfolio_value"] + y_assets = [df_info["price_" + name].cumprod() for name in all_assets] + self._plot.update(x, y_assets + [y_portfolio]) + + # plot portfolio weights + if not self._plot2: + self._plot_dir2 = ( + os.path.join(self.log_dir, "notebook_plot_weights_" + str(time.time())) + if self.log_dir + else None + ) + self._plot2 = LivePlotNotebook( + log_dir=self._plot_dir2, + labels=all_assets, + title="weights", + ylabel="weight", + ) + ys = [df_info["weight_" + name] for name in all_assets] + self._plot2.update(x, ys) + + # plot portfolio costs + if not self._plot3: + self._plot_dir3 = ( + os.path.join(self.log_dir, "notebook_plot_cost_" + str(time.time())) + if self.log_dir + else None + ) + self._plot3 = LivePlotNotebook( + log_dir=self._plot_dir3, labels=["cost"], title="costs", ylabel="cost" + ) + ys = [df_info["cost"].cumsum()] + self._plot3.update(x, ys) + + if close: + self._plot = self._plot2 = self._plot3 = None diff --git a/src/dagobert/modelling/rl/env02.py b/src/dagobert/modelling/rl/env02.py new file mode 100644 index 00000000..7dfdbd55 --- /dev/null +++ b/src/dagobert/modelling/rl/env02.py @@ -0,0 +1,473 @@ +""" +Modified from https://github.com/vermouth1992/drl-portfolio-management +""" +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +import gym +import gym.spaces + +eps = np.finfo(float).eps + + +def random_shift(x, fraction): + """ Apply a random shift to a pandas series. """ + min_x, max_x = np.min(x), np.max(x) + m = np.random.uniform(-fraction, fraction, size=x.shape) + 1 + return np.clip(x * m, min_x, max_x) + + +def scale_to_start(x): + """ Scale pandas series so that it starts at one. """ + x = (x + eps) / (x[0] + eps) + return x + + +def sharpe(returns, freq=30, rfr=0): + """ Given a set of returns, calculates naive (rfr=0) sharpe (eq 28). """ + return (np.sqrt(freq) * np.mean(returns - rfr + eps)) / np.std(returns - rfr + eps) + + +def max_drawdown(returns): + """ Max drawdown. See https://www.investopedia.com/terms/m/maximum-drawdown-mdd.asp """ + peak = returns.max() + trough = returns[returns.argmax() :].min() + return (trough - peak) / (peak + eps) + + +class DataGenerator(object): + """Acts as data provider for each new episode.""" + + def __init__( + self, + history, + abbreviation, + steps=730, + window_length=50, + start_idx=0, + start_date=None, + ): + """ + + Args: + history: (num_stocks, timestamp, 5) open, high, low, close, volume + abbreviation: a list of length num_stocks with assets name + steps: the total number of steps to simulate, default is 2 years + window_length: observation window, must be less than 50 + start_date: the date to start. Default is None and random pick one. + It should be a string e.g. '2012-08-13' + """ + assert history.shape[0] == len( + abbreviation + ), "Number of stock is not consistent" + import copy + + self.steps = steps + 1 + self.window_length = window_length + self.step = start_idx + self.start_date = start_date + + # make immutable class + self._data = history.copy() # all data + self.asset_names = copy.copy(abbreviation) + + def _step(self): + # get observation matrix from history, exclude volume, maybe volume is useful as it + # indicates how market total investment changes. Normalize could be critical here + self.step += 1 + obs = self.data[:, self.step : self.step + self.window_length, :].copy() + # normalize obs with open price + + # used for compute optimal action and sanity check + ground_truth_obs = self.data[ + :, self.step + self.window_length : self.step + self.window_length + 1, : + ].copy() + + done = self.step >= self.steps + return obs, done, ground_truth_obs + + def reset(self): + self.step = 0 + + # get data for this episode, each episode might be different. + if self.start_date is None: + self.idx = np.random.randint( + low=self.window_length, high=self._data.shape[1] - self.steps + ) + else: + raise ValueError("start_date is not yet supported / implemented") + # compute index corresponding to start_date for repeatable sequence + # self.idx = date_to_index(self.start_date) - self.start_idx + # assert ( + # self.idx >= self.window_length + # and self.idx <= self._data.shape[1] - self.steps + # ), "Invalid start date, must be window_length day after start date and simulation steps day before end date" + data = self._data[ + :, self.idx - self.window_length : self.idx + self.steps + 1, :4 + ] + # apply augmentation? + self.data = data + return ( + self.data[:, self.step : self.step + self.window_length, :].copy(), + self.data[ + :, + self.step + self.window_length : self.step + self.window_length + 1, + :, + ].copy(), + ) + + +class PortfolioSim(object): + """ + Portfolio management sim. + Params: + - cost e.g. 0.0025 is max in Poliniex + Based of [Jiang 2017](https://arxiv.org/abs/1706.10059) + """ + + def __init__( + self, asset_names=list(), steps=730, trading_cost=0.0025, time_cost=0.0 + ): + self.asset_names = asset_names + self.cost = trading_cost + self.time_cost = time_cost + self.steps = steps + self.reset() + + def _step(self, w1, y1): + """ + Step. + w1 - new action of portfolio weights - e.g. [0.1,0.9,0.0] + y1 - price relative vector also called return + e.g. [1.0, 0.9, 1.1] + Numbered equations are from https://arxiv.org/abs/1706.10059 + """ + assert w1.shape == y1.shape, "w1 and y1 must have the same shape" + assert y1[0] == 1.0, "y1[0] must be 1" + + p0 = self.p0 + + dw1 = (y1 * w1) / (np.dot(y1, w1) + eps) # (eq7) weights evolve into + + mu1 = self.cost * (np.abs(dw1 - w1)).sum() # (eq16) cost to change portfolio + + assert mu1 < 1.0, "Cost is larger than current holding" + + p1 = p0 * (1 - mu1) * np.dot(y1, w1) # (eq11) final portfolio value + + p1 = p1 * (1 - self.time_cost) # we can add a cost to holding + + rho1 = p1 / p0 - 1 # rate of returns + r1 = np.log((p1 + eps) / (p0 + eps)) # log rate of return + reward = r1 / self.steps * 1000.0 # (22) average logarithmic accumulated return + # remember for next step + self.p0 = p1 + + # if we run out of money, we're done (losing all the money) + done = p1 == 0 + + info = { + "reward": reward, + "log_return": r1, + "portfolio_value": p1, + "return": y1.mean(), + "rate_of_return": rho1, + "weights_mean": w1.mean(), + "weights_std": w1.std(), + "cost": mu1, + } + self.infos.append(info) + return reward, info, done + + def reset(self): + self.infos = [] + self.p0 = 1.0 + + +class PortfolioEnv(gym.Env): + """ + An environment for financial portfolio management. + Financial portfolio management is the process of constant redistribution of a fund into different + financial products. + Based on [Jiang 2017](https://arxiv.org/abs/1706.10059) + """ + + metadata = {"render.modes": ["human", "ansi"]} + + def __init__( + self, + history, + abbreviation, + steps=730, # 2 years + trading_cost=0.0025, + time_cost=0.00, + window_length=50, + start_idx=0, + sample_start_date=None, + ): + """ + An environment for financial portfolio management. + Params: + steps - steps in episode + scale - scale data and each episode (except return) + augment - fraction to randomly shift data by + trading_cost - cost of trade as a fraction + time_cost - cost of holding as a fraction + window_length - how many past observations to return + start_idx - The number of days from '2012-08-13' of the dataset + sample_start_date - The start date sampling from the history + """ + self.window_length = window_length + self.num_stocks = history.shape[0] + self.start_idx = start_idx + + self.src = DataGenerator( + history, + abbreviation, + steps=steps, + window_length=window_length, + start_idx=start_idx, + start_date=sample_start_date, + ) + + self.sim = PortfolioSim( + asset_names=abbreviation, + trading_cost=trading_cost, + time_cost=time_cost, + steps=steps, + ) + + # openai gym attributes + # action will be the portfolio weights from 0 to 1 for each asset + self.action_space = gym.spaces.Box( + 0, 1, shape=(len(self.src.asset_names) + 1,), dtype=np.float32 + ) # include cash + + # get the observation space from the data min and max + self.observation_space = gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=(len(abbreviation), window_length, history.shape[-1]), + dtype=np.float32, + ) + + def step(self, action): + return self._step(action) + + def _step(self, action): + """ + Step the env. + Actions should be portfolio [w0...] + - Where wn is a portfolio weight from 0 to 1. The first is cash_bias + - cn is the portfolio conversion weights see PortioSim._step for description + """ + np.testing.assert_almost_equal(action.shape, (len(self.sim.asset_names) + 1,)) + + # normalise just in case + action = np.clip(action, 0, 1) + + weights = action # np.array([cash_bias] + list(action)) # [w0, w1...] + weights /= weights.sum() + eps + weights[0] += np.clip( + 1 - weights.sum(), 0, 1 + ) # so if weights are all zeros we normalise to [1,0...] + + assert ((action >= 0) * (action <= 1)).all(), ( + "all action values should be between 0 and 1. Not %s" % action + ) + np.testing.assert_almost_equal( + np.sum(weights), + 1.0, + 3, + err_msg='weights should sum to 1. action="%s"' % weights, + ) + + observation, done1, ground_truth_obs = self.src._step() + + # concatenate observation with ones + cash_observation = np.ones((1, self.window_length, observation.shape[2])) + observation = np.concatenate((cash_observation, observation), axis=0) + + cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2])) + ground_truth_obs = np.concatenate((cash_ground_truth, ground_truth_obs), axis=0) + + # relative price vector of last observation day (close/open) + close_price_vector = observation[:, -1, 3] + open_price_vector = observation[:, -1, 0] + y1 = close_price_vector / open_price_vector + reward, info, done2 = self.sim._step(weights, y1) + + # calculate return for buy and hold a bit of each asset + info["market_value"] = np.cumprod( + [inf["return"] for inf in self.infos + [info]] + )[-1] + # add dates + info["date"] = self.start_idx + self.src.idx + self.src.step + info["steps"] = self.src.step + info["next_obs"] = ground_truth_obs + + self.infos.append(info) + + return observation, reward, done1 or done2, info + + def reset(self): + return self._reset() + + def _reset(self): + self.infos = [] + self.sim.reset() + observation, ground_truth_obs = self.src.reset() + cash_observation = np.ones((1, self.window_length, observation.shape[2])) + observation = np.concatenate((cash_observation, observation), axis=0) + cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2])) + ground_truth_obs = np.concatenate((cash_ground_truth, ground_truth_obs), axis=0) + info = {} + info["next_obs"] = ground_truth_obs + return observation, info + + def _render(self, mode="human", close=False): + if close: + return + if mode == "ansi": + print(self.infos[-1]) + elif mode == "human": + self.plot() + + def render(self, mode="human", close=False): + return self._render(mode="human", close=False) + + def plot(self): + # show a plot of portfolio vs mean market performance + df_info = pd.DataFrame(self.infos) + df_info["date"] = pd.to_datetime(df_info["date"], format="%Y-%m-%d") + df_info.set_index("date", inplace=True) + mdd = max_drawdown(df_info.rate_of_return + 1) + sharpe_ratio = sharpe(df_info.rate_of_return) + title = "max_drawdown={: 2.2%} sharpe_ratio={: 2.4f}".format(mdd, sharpe_ratio) + df_info[["portfolio_value", "market_value"]].plot( + title=title, fig=plt.gcf(), rot=30 + ) + + +class MultiActionPortfolioEnv(PortfolioEnv): + def __init__( + self, + history, + abbreviation, + model_names, + steps=730, # 2 years + trading_cost=0.0025, + time_cost=0.00, + window_length=50, + start_idx=0, + sample_start_date=None, + ): + super(MultiActionPortfolioEnv, self).__init__( + history, + abbreviation, + steps, + trading_cost, + time_cost, + window_length, + start_idx, + sample_start_date, + ) + self.model_names = model_names + # need to create each simulator for each model + self.sim = [ + PortfolioSim( + asset_names=abbreviation, + trading_cost=trading_cost, + time_cost=time_cost, + steps=steps, + ) + for _ in range(len(self.model_names)) + ] + + def _step(self, action): + """Step the environment by a vector of actions + + Args: + action: (num_models, num_stocks + 1) + + Returns: + + """ + assert ( + action.ndim == 2 + ), "Action must be a two dimensional array with shape (num_models, num_stocks + 1)" + assert action.shape[1] == len(self.sim[0].asset_names) + 1 + assert action.shape[0] == len(self.model_names) + # normalise just in case + action = np.clip(action, 0, 1) + weights = action # np.array([cash_bias] + list(action)) # [w0, w1...] + weights /= np.sum(weights, axis=1, keepdims=True) + eps + # so if weights are all zeros we normalise to [1,0...] + weights[:, 0] += np.clip(1 - np.sum(weights, axis=1), 0, 1) + assert ((action >= 0) * (action <= 1)).all(), ( + "all action values should be between 0 and 1. Not %s" % action + ) + np.testing.assert_almost_equal( + np.sum(weights, axis=1), + np.ones(shape=(weights.shape[0])), + 3, + err_msg='weights should sum to 1. action="%s"' % weights, + ) + observation, done1, ground_truth_obs = self.src._step() + + # concatenate observation with ones + cash_observation = np.ones((1, self.window_length, observation.shape[2])) + observation = np.concatenate((cash_observation, observation), axis=0) + + cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2])) + ground_truth_obs = np.concatenate((cash_ground_truth, ground_truth_obs), axis=0) + + # relative price vector of last observation day (close/open) + close_price_vector = observation[:, -1, 3] + open_price_vector = observation[:, -1, 0] + y1 = close_price_vector / open_price_vector + + rewards = np.empty(shape=(weights.shape[0])) + info = {} + dones = np.empty(shape=(weights.shape[0]), dtype=bool) + for i in range(weights.shape[0]): + reward, current_info, done2 = self.sim[i]._step(weights[i], y1) + rewards[i] = reward + info[self.model_names[i]] = current_info["portfolio_value"] + info["return"] = current_info["return"] + dones[i] = done2 + + # calculate return for buy and hold a bit of each asset + info["market_value"] = np.cumprod( + [inf["return"] for inf in self.infos + [info]] + )[-1] + # add dates + info["date"] = self.start_idx + self.src.idx + self.src.step + info["steps"] = self.src.step + info["next_obs"] = ground_truth_obs + + self.infos.append(info) + + return observation, rewards, np.all(dones) or done1, info + + def _reset(self): + self.infos = [] + for sim in self.sim: + sim.reset() + observation, ground_truth_obs = self.src.reset() + cash_observation = np.ones((1, self.window_length, observation.shape[2])) + observation = np.concatenate((cash_observation, observation), axis=0) + cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2])) + ground_truth_obs = np.concatenate((cash_ground_truth, ground_truth_obs), axis=0) + info = {} + info["next_obs"] = ground_truth_obs + return observation, info + + def plot(self): + df_info = pd.DataFrame(self.infos) + fig = plt.gcf() + title = "Trading Performance of Various Models" + df_info["date"] = pd.to_datetime(df_info["date"], format="%Y-%m-%d") + df_info.set_index("date", inplace=True) + df_info[self.model_names + ["market_value"]].plot(title=title, fig=fig, rot=30) diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py new file mode 100644 index 00000000..ce8861da --- /dev/null +++ b/src/dagobert/modelling/rl/environment.py @@ -0,0 +1,117 @@ +""" +Class defining PyTorch datasets for supervised modelling of a single instrument. +""" +import logging +from typing import List + +import gym +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +from dagobert.modelling.dl import PortfolioCryptoDataset + +logger = logging.getLogger(__name__) + + +class PortfolioSim(object): + """ + Portfolio management class, loosely based on https://arxiv.org/abs/1706.10059 + + I started with this https://github.com/wassname/rl-portfolio-management + and compared it with the article and the original implementation (see below). + + The original implementation is a bit of a shitshow but the calculation of mu at + least agrees with what we have here: + https://github.com/ZhengyaoJiang/PGPortfolio/blob/master/pgportfolio/learn/nnagent.py + however there are questions around how the code corresponds to the paper, e.g.: + https://github.com/ZhengyaoJiang/PGPortfolio/issues/99 + """ + + def __init__( + self, + asset_names: List[str], + steps: int = 128, + trading_cost: float = 0.001, + reward_type: str = "return", + ): + """ + Class constructor. + + Args: + asset_names: Names of assets in the portfolio. + steps: Maximum number of steps. + trading_cost: Commission rate, currently set to Binance's VIP0 taker level. + reward_type: Whether to use the log return as reward or the sharpe ratio, + which was found to be more stable. + """ + self.asset_names = asset_names + self.asset_num = len(asset_names) + self.steps = steps + self.trading_cost = trading_cost + self.reward_type = reward_type + self.eps = np.finfo(float).eps + self.reset() + + def _step(self, w1, y1): + """ + Step. + + w1 - new action of portfolio weights - e.g. [0.1,0.9, 0.0] + y1 - price relative vector also called return + e.g. [1.0, 0.9, 1.1] + Numbered equations are from https://arxiv.org/abs/1706.10059 + """ + w0 = self.w0 + p0 = self.p0 + + # (eq7) since we last acted prices changed, so weights evolve into + dw1 = (y1 * w0) / (np.dot(y1, w0) + self.eps) + + # (eq16) cost to change portfolio: + # excluding change in cash to avoid double counting for transaction cost + mu = self.trading_cost * (np.abs(dw1[1:] - w1[1:])).sum() + + # (eq11) final portfolio value: see section between (eq19-20) why this works + p1 = p0 * (1 - mu) * np.dot(y1, w0) + + # (eq9 & 10) rate of return log rate of return + rho1 = p1 / p0 - 1 # rate of returns + r1 = np.log(p1 + self.eps) - np.log(p0 + self.eps) + + # (eq22) immediate reward is log rate of return scaled by episode length + if self.reward_type == "return": + reward = r1 / self.steps + # TODO: implement the differentiable sharpe ratio reward like so https://quant.stackexchange.com/a/38040 + + # remember for next step + self.w0 = w1 + self.p0 = p1 + + # if we run out of money, we're done + done = p1 == 0 + + # should only return single values, not list + info = { + "reward": reward, + "log_return": r1, + "portfolio_value": p1, + "market_return": y1.mean(), + "rate_of_return": rho1, + "weights_mean": w1.mean(), + "weights_std": w1.std(), + "cost": mu, + } + # record weights and prices + for i, name in enumerate(["BTCBTC"] + self.asset_names): + info["weight_" + name] = w1[i] + info["price_" + name] = y1[i] + + self.infos.append(info) + return reward, info, done + + def reset(self): + self.infos = [] + self.w0 = np.zeros(self.asset_num) + self.w0[0] = 1 + self.p0 = 1.0 From c07fb1c7b1480fa8c99d791372d1ddfb80a0f4f5 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Wed, 23 Dec 2020 13:03:31 +0000 Subject: [PATCH 02/62] modifying the definition of p1 due to experiments in the rl notebook --- notebooks/modelling/rl_env.ipynb | 220 ++++++++++++++++++++--- src/dagobert/modelling/rl/environment.py | 10 +- 2 files changed, 199 insertions(+), 31 deletions(-) diff --git a/notebooks/modelling/rl_env.ipynb b/notebooks/modelling/rl_env.ipynb index cd4bd631..fcf19862 100644 --- a/notebooks/modelling/rl_env.ipynb +++ b/notebooks/modelling/rl_env.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 51, "metadata": {}, "outputs": [ { @@ -47,7 +47,7 @@ "array([0.48192771, 0.26506024, 0.25301205])" ] }, - "execution_count": 32, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } @@ -71,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 52, "metadata": {}, "outputs": [ { @@ -80,7 +80,7 @@ "0.00020481927710843396" ] }, - "execution_count": 42, + "execution_count": 52, "metadata": {}, "output_type": "execute_result" } @@ -97,86 +97,252 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "1.045" + "0.9997951807228915" ] }, - "execution_count": 43, + "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "np.dot(y1, w1)" + "p0 * (1 - mu)" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.9997951807228915" + "1.0375" ] }, - "execution_count": 40, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "(1 - mu)" + "np.dot(y1, w0)" ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "1.0447859638554216" + "1.0372875000000001" ] }, - "execution_count": 37, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# (eq11) new portfolio value: see section between (eq19-20) why this works\n", - "p1 = p0 * (1 - mu) * np.dot(y1, w1)\n", + "p1 = p0 * (1 - mu) * np.dot(y1, w0)\n", "p1" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.03728750000000014" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rho1 = p1 / p0 - 1 # rate of returns\n", + "rho1" + ] + }, + { + "cell_type": "code", + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ + "p0 = p1\n", + "w0 = w1" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "p1 1.0141928915662652\n", + "rho 0.014192891566265242\n", + "p1 1.0290884681079937\n", + "rho 0.014687123786407685\n", + "p1 1.0287802447893062\n", + "rho -0.0002995110024448522\n" + ] + } + ], + "source": [ + "p0 = 1\n", + "w0 = np.array([.5, .25, .25])\n", + "\n", + "def step(y1, w1, w0, p0):\n", + " dw1 = (y1 * w0) / (np.dot(y1, w0) + eps)\n", + " mu = 0.0025 * (np.abs(dw1[1:] - w1[1:])).sum()\n", + " p1 = p0 * (1 - mu) * np.dot(y1, w1)\n", + " rho1 = p1 / p0 - 1\n", + " print('p1', p1)\n", + " print('rho', rho1)\n", + " return p1\n", "\n", - " # (eq16) cost to change portfolio:\n", - " # excluding change in cash to avoid double counting for transaction cost\n", - " mu = self.cost * (np.abs(dw1[1:] - w1[1:])).sum()\n", + "# BTC, ETH is going up but the agent is selling them, so return and p value should go down due to transaction cost in last sale\n", + "y1 = np.array([1, 1.1, 1.05])\n", + "w1 = np.array([.8, .1, .1])\n", + "p1 = step(y1, w1, w0, p0)\n", "\n", - " # (eq11) final portfolio value: see section between (eq19-20) why this works\n", - " p1 = p0 * (1 - mu) * np.dot(y1, w0)\n", + "y2 = np.array([1, 1.2, 1.1])\n", + "w2 = np.array([.9, .05, .05])\n", + "p2 = step(y2, w2, w1, p1)\n", "\n", - " # (eq9 & 10) rate of return log rate of return\n", - " rho1 = p1 / p0 - 1 # rate of returns\n", - " r1 = np.log(p1 + eps) - np.log(p0 + eps)\n", "\n", - " # (eq22) immediate reward is log rate of return scaled by episode length\n", - " reward = r1 / self.steps" + "y3 = np.array([1, 1.3, 1.15])\n", + "w3 = np.array([1, 0, 0])\n", + "p3 = step(y3, w3, w2, p2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "p1 0.9843092207792208\n", + "rho -0.015690779220779216\n", + "p1 0.9618924183114448\n", + "rho -0.022774146573604104\n", + "p1 0.9253212485790699\n", + "rho -0.03802002078005118\n" + ] + } + ], + "source": [ + "# BTC, ETH is going down and the agent is buying them, so return and p value should go down fast\n", + "y1 = np.array([1, 0.9, .95])\n", + "w1 = np.array([.8, .1, .1])\n", + "\n", + "p1 = step(y1, w1, w0, p0)\n", + "\n", + "y2 = np.array([1, .9, .95])\n", + "w2 = np.array([.7, .15, .15])\n", + "p2 = step(y2, w2, w1, p1)\n", + "\n", + "y3 = np.array([1, .9, .95])\n", + "w3 = np.array([.5, .25, .25])\n", + "p3 = step(y3, w3, w2, p2)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "p1 0.9843092207792208\n", + "rho -0.015690779220779216\n", + "p1 0.976712424016802\n", + "rho -0.0077178965736039995\n", + "p1 0.9764848524822389\n", + "rho -0.0002329974811082769\n" + ] + } + ], + "source": [ + "# BTC, ETH is going down and the agent is selling them, so return and p value should go down but not as fast as in the prev example\n", + "y1 = np.array([1, 0.9, .95])\n", + "w1 = np.array([.8, .1, .1])\n", + "\n", + "p1 = step(y1, w1, w0, p0)\n", + "\n", + "y2 = np.array([1, .9, .95])\n", + "w2 = np.array([.9, .05, .05])\n", + "p2 = step(y2, w2, w1, p1)\n", + "\n", + "y3 = np.array([1, .9, .95])\n", + "w3 = np.array([1, 0, 0])\n", + "p3 = step(y3, w3, w2, p2)" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "p1 1.0092117073170732\n", + "rho 0.009211707317073214\n", + "p1 1.024114079612195\n", + "rho 0.014766349009900814\n", + "p1 1.0492192209883782\n", + "rho 0.024514008620689864\n" + ] + } + ], + "source": [ + "# BTC, ETH is going up and the agent is buying them, so return and p value should go up\n", + "y1 = np.array([1, 1.05, 1.05])\n", + "w1 = np.array([.8, .1, .1])\n", + "\n", + "p1 = step(y1, w1, w0, p0)\n", + "\n", + "y2 = np.array([1, 1.05, 1.05])\n", + "w2 = np.array([.7, .15, .15])\n", + "\n", + "p2 = step(y2, w2, w1, p1)\n", + "\n", + "y3 = np.array([1, 1.05, 1.05])\n", + "w3 = np.array([.5, .25, .25])\n", + "\n", + "p3 = step(y3, w3, w2, p2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using the environment" ] }, { diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index ce8861da..f5c3521c 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -72,8 +72,10 @@ def _step(self, w1, y1): # excluding change in cash to avoid double counting for transaction cost mu = self.trading_cost * (np.abs(dw1[1:] - w1[1:])).sum() - # (eq11) final portfolio value: see section between (eq19-20) why this works - p1 = p0 * (1 - mu) * np.dot(y1, w0) + # (eq11) final portfolio value: after lot of experiments in rl notebook and + # reading the relevant parts of the paper a ton of times, we use w1 here + # instead of w0, also bc it makes intuitive sense this way + p1 = p0 * (1 - mu) * np.dot(y1, w1) # (eq9 & 10) rate of return log rate of return rho1 = p1 / p0 - 1 # rate of returns @@ -100,10 +102,10 @@ def _step(self, w1, y1): "rate_of_return": rho1, "weights_mean": w1.mean(), "weights_std": w1.std(), - "cost": mu, + "rebalancing_cost": mu, } # record weights and prices - for i, name in enumerate(["BTCBTC"] + self.asset_names): + for i, name in enumerate(["USD"] + self.asset_names): info["weight_" + name] = w1[i] info["price_" + name] = y1[i] From 9728317d6b702a5b0c4c6409ac323676426ff987 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Wed, 23 Dec 2020 15:49:14 +0000 Subject: [PATCH 03/62] flashing out the runner and env.py a bit more --- setup.cfg | 1 + src/dagobert/modelling/rl/__init__.py | 1 + src/dagobert/modelling/rl/environment.py | 244 +++++++++++++++++++++-- src/dagobert/modelling/rl/rl.py | 5 + src/dagobert/modelling/rl/rl_runner.py | 38 ++++ 5 files changed, 276 insertions(+), 13 deletions(-) create mode 100644 src/dagobert/modelling/rl/rl.py create mode 100644 src/dagobert/modelling/rl/rl_runner.py diff --git a/setup.cfg b/setup.cfg index c845e0c5..bbc2948b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -57,6 +57,7 @@ console_scripts = dagobert-tcn = dagobert.modelling.dl.tcn_runner:run dagobert-optuna = dagobert.modelling.dl.optuna:run dagobert-s3 = dagobert.io.runner:run + dagobert-tcn = dagobert.modelling.rl.rl_runner:run [test] # py.test options when running `python setup.py test` diff --git a/src/dagobert/modelling/rl/__init__.py b/src/dagobert/modelling/rl/__init__.py index e69de29b..56d3ada1 100644 --- a/src/dagobert/modelling/rl/__init__.py +++ b/src/dagobert/modelling/rl/__init__.py @@ -0,0 +1 @@ +from .environment import RLData, RLPortfolio, RLEnv diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index f5c3521c..c8fbf72b 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -2,19 +2,71 @@ Class defining PyTorch datasets for supervised modelling of a single instrument. """ import logging -from typing import List +from typing import List, Tuple +from argparse import Namespace import gym import numpy as np import pandas as pd import matplotlib.pyplot as plt +from torch.utils.data import Dataset, DataLoader -from dagobert.modelling.dl import PortfolioCryptoDataset +from dagobert.modelling.dl import PortfolioCryptoDataset, Preprocessing logger = logging.getLogger(__name__) +eps = np.finfo(float).eps -class PortfolioSim(object): +class RLData(object): + """ + Leverages the data class and configuration methods from the `dagobert.modelling.dl` + module as much as possible. + """ + + def __init__(self, hparams: Namespace, train_val_test: str = "train"): + """ + Class constructor. + + Args: + hparams: Hyparams parsed by the rl_runner. Similar to how `TCNLightning` is + initialized. + train_val_test: Whether we are training, validating or testing, it must be + either train, val or test. + """ + RLData._pre_sanity_check(hparams) + hparams = Preprocessing().preprocess_augment_dfs(hparams) + self.hparams = Preprocessing().preprocess_train_dfs(hparams) + if train_val_test == "train": + augment_dfs = self.hparams.augment_dfs + augment_method = self.hparams.augment_method + else: + augment_dfs = None + augment_method = None + self.dataset = PortfolioCryptoDataset( + df_to_load=getattr(self.hparams, f"df_{train_val_test}"), + cols_to_model=self.hparams.cols_to_model, + target_col=self.hparams.target_col, + mini_series_length=self.hparams.mini_series_length, + data_dir=self.hparams.data_dir, + augment_method=augment_method, + augment_prob=self.hparams.augment_prob, + augment_dfs=augment_dfs, + augment_dfs_mix=self.hparams.augment_dfs_mix, + ) + self.dataloader = DataLoader(self.dataset) + + def step(self): + pass + + def reset(self): + pass + + @staticmethod + def _pre_sanity_check(hparams: Namespace): + pass + + +class RLPortfolio(object): """ Portfolio management class, loosely based on https://arxiv.org/abs/1706.10059 @@ -50,23 +102,27 @@ def __init__( self.steps = steps self.trading_cost = trading_cost self.reward_type = reward_type - self.eps = np.finfo(float).eps self.reset() - def _step(self, w1, y1): + def _step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: """ - Step. + See Figure 1 in https://arxiv.org/abs/1706.10059 to understand what this one + step corresponds to, also the equation numbers correspond to the paper's. + + Args: + w1: New weights of the portfolio. + y1: New relative price vector for the portfolio's instruments. The first + element refers to the cash asset (USD). Therefore y1[0] = 1 always. - w1 - new action of portfolio weights - e.g. [0.1,0.9, 0.0] - y1 - price relative vector also called return - e.g. [1.0, 0.9, 1.1] - Numbered equations are from https://arxiv.org/abs/1706.10059 + Returns: + Typical reward, info, done vars for an OpenAI Gym. """ w0 = self.w0 p0 = self.p0 + assert y1[0] == 1, "Cash price has to remain constant: 1." # (eq7) since we last acted prices changed, so weights evolve into - dw1 = (y1 * w0) / (np.dot(y1, w0) + self.eps) + dw1 = (y1 * w0) / (np.dot(y1, w0) + eps) # (eq16) cost to change portfolio: # excluding change in cash to avoid double counting for transaction cost @@ -79,7 +135,7 @@ def _step(self, w1, y1): # (eq9 & 10) rate of return log rate of return rho1 = p1 / p0 - 1 # rate of returns - r1 = np.log(p1 + self.eps) - np.log(p0 + self.eps) + r1 = np.log(p1 + eps) - np.log(p0 + eps) # (eq22) immediate reward is log rate of return scaled by episode length if self.reward_type == "return": @@ -91,7 +147,7 @@ def _step(self, w1, y1): self.p0 = p1 # if we run out of money, we're done - done = p1 == 0 + done = p1 <= 0 # should only return single values, not list info = { @@ -117,3 +173,165 @@ def reset(self): self.w0 = np.zeros(self.asset_num) self.w0[0] = 1 self.p0 = 1.0 + + +class RLEnv(gym.Env): + """ + A reinforcement learning environment for financial portfolio management, based on + https://arxiv.org/abs/1706.10059 and this implementation + https://github.com/wassname/rl-portfolio-management + """ + + def __init__( + self, + history, + abbreviation, + steps=730, # 2 years + trading_cost=0.0025, + time_cost=0.00, + window_length=50, + start_idx=0, + sample_start_date=None, + ): + """ + An environment for financial portfolio management. + Params: + steps - steps in episode + scale - scale data and each episode (except return) + augment - fraction to randomly shift data by + trading_cost - cost of trade as a fraction + time_cost - cost of holding as a fraction + window_length - how many past observations to return + start_idx - The number of days from '2012-08-13' of the dataset + sample_start_date - The start date sampling from the history + """ + self.window_length = window_length + self.num_stocks = history.shape[0] + self.start_idx = start_idx + + self.src = RLData( + history, + abbreviation, + steps=steps, + window_length=window_length, + start_idx=start_idx, + start_date=sample_start_date, + ) + + self.sim = RLPortfolio( + asset_names=abbreviation, + trading_cost=trading_cost, + time_cost=time_cost, + steps=steps, + ) + + # openai gym attributes + # action will be the portfolio weights from 0 to 1 for each asset + self.action_space = gym.spaces.Box( + 0, 1, shape=(len(self.src.asset_names) + 1,), dtype=np.float32 + ) # include cash + + # get the observation space from the data min and max + self.observation_space = gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=(len(abbreviation), window_length, history.shape[-1]), + dtype=np.float32, + ) + + def step(self, action): + return self._step(action) + + def _step(self, action): + """ + Step the env. + Actions should be portfolio [w0...] + - Where wn is a portfolio weight from 0 to 1. The first is cash_bias + - cn is the portfolio conversion weights see PortioSim._step for description + """ + np.testing.assert_almost_equal(action.shape, (len(self.sim.asset_names) + 1,)) + + # normalise just in case + action = np.clip(action, 0, 1) + + weights = action # np.array([cash_bias] + list(action)) # [w0, w1...] + weights /= weights.sum() + eps + weights[0] += np.clip( + 1 - weights.sum(), 0, 1 + ) # so if weights are all zeros we normalise to [1,0...] + + assert ((action >= 0) * (action <= 1)).all(), ( + "all action values should be between 0 and 1. Not %s" % action + ) + np.testing.assert_almost_equal( + np.sum(weights), + 1.0, + 3, + err_msg='weights should sum to 1. action="%s"' % weights, + ) + + observation, done1, ground_truth_obs = self.src._step() + + # concatenate observation with ones + cash_observation = np.ones((1, self.window_length, observation.shape[2])) + observation = np.concatenate((cash_observation, observation), axis=0) + + cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2])) + ground_truth_obs = np.concatenate((cash_ground_truth, ground_truth_obs), axis=0) + + # relative price vector of last observation day (close/open) + close_price_vector = observation[:, -1, 3] + open_price_vector = observation[:, -1, 0] + y1 = close_price_vector / open_price_vector + reward, info, done2 = self.sim._step(weights, y1) + + # calculate return for buy and hold a bit of each asset + info["market_value"] = np.cumprod( + [inf["return"] for inf in self.infos + [info]] + )[-1] + # add dates + info["date"] = self.start_idx + self.src.idx + self.src.step + info["steps"] = self.src.step + info["next_obs"] = ground_truth_obs + + self.infos.append(info) + + return observation, reward, done1 or done2, info + + def reset(self): + return self._reset() + + def _reset(self): + self.infos = [] + self.sim.reset() + observation, ground_truth_obs = self.src.reset() + cash_observation = np.ones((1, self.window_length, observation.shape[2])) + observation = np.concatenate((cash_observation, observation), axis=0) + cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2])) + ground_truth_obs = np.concatenate((cash_ground_truth, ground_truth_obs), axis=0) + info = {} + info["next_obs"] = ground_truth_obs + return observation, info + + def _render(self, mode="human", close=False): + if close: + return + if mode == "ansi": + print(self.infos[-1]) + elif mode == "human": + self.plot() + + def render(self, mode="human", close=False): + return self._render(mode="human", close=False) + + def plot(self): + # show a plot of portfolio vs mean market performance + df_info = pd.DataFrame(self.infos) + df_info["date"] = pd.to_datetime(df_info["date"], format="%Y-%m-%d") + df_info.set_index("date", inplace=True) + mdd = max_drawdown(df_info.rate_of_return + 1) + sharpe_ratio = sharpe(df_info.rate_of_return) + title = "max_drawdown={: 2.2%} sharpe_ratio={: 2.4f}".format(mdd, sharpe_ratio) + df_info[["portfolio_value", "market_value"]].plot( + title=title, fig=plt.gcf(), rot=30 + ) diff --git a/src/dagobert/modelling/rl/rl.py b/src/dagobert/modelling/rl/rl.py new file mode 100644 index 00000000..ca60f407 --- /dev/null +++ b/src/dagobert/modelling/rl/rl.py @@ -0,0 +1,5 @@ +from dagobert.modelling.rl import RLEnv + + +def run_rl(args): + env = RLEnv(args) diff --git a/src/dagobert/modelling/rl/rl_runner.py b/src/dagobert/modelling/rl/rl_runner.py new file mode 100644 index 00000000..8a777213 --- /dev/null +++ b/src/dagobert/modelling/rl/rl_runner.py @@ -0,0 +1,38 @@ +""" +Dagobert's runner for reinforcement learning. + +This module is driven by the `dagobert-rl` command which can be parametrised by +command line arguments, but it's much more convenient to use YAML configs for this, +see the `tcn_args.py` for more detail. +""" +import logging +from pathlib import Path + +from dagobert.utils import setup_logging +from dagobert.runner_utils import load_config, update_args +from dagobert.modelling.dl.tcn_args import get_all_args +from dagobert.modelling.rl.rl import run_rl + + +logger = logging.getLogger(__name__) + + +def run(): + """ + Run a single TCN training or parallelized hyper parameter tuning study using optuna. + """ + + # parse arguments amd setup logging + args = get_all_args() + setup_logging(logger, "dagobert-rl", logging.INFO, args.log_dir) + + # load config yaml if exist + if args.config_path != "": + config = load_config(Path(args.config_path)) + args = update_args(args, config) + + run_rl(args) + + +if __name__ == "__main__": + run() From aa12cb12f19765209cd8d9774121f049cec9862d Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Wed, 30 Dec 2020 17:11:08 +0000 Subject: [PATCH 04/62] now we have all the components in place and I know what to do, now let's make this work --- config/rl_config.yaml | 5 +- notebooks/modelling/rl_env.ipynb | 165 +++------- setup.cfg | 2 +- src/dagobert/modelling/dl/data.py | 47 ++- src/dagobert/modelling/dl/preprocessing.py | 18 +- src/dagobert/modelling/dl/tcn.py | 3 +- src/dagobert/modelling/rl/environment.py | 71 ++++- src/dagobert/modelling/rl/networks.py | 143 +++++++++ src/dagobert/modelling/rl/ppo.py | 340 +++++++++++++++++++++ src/dagobert/modelling/rl/rl.py | 5 +- src/dagobert/naming.py | 9 + 11 files changed, 654 insertions(+), 154 deletions(-) create mode 100644 src/dagobert/modelling/rl/networks.py create mode 100644 src/dagobert/modelling/rl/ppo.py diff --git a/config/rl_config.yaml b/config/rl_config.yaml index 6508176d..da8f251d 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -5,6 +5,7 @@ # -------------------------------------------------------------------------------------- gpus: 1 +multiprocessing: False pin_memory: True profiler: True val_check_interval: 0.5 @@ -40,8 +41,8 @@ non_last_y_frac: 0.5 regression: False density_num: 3 mix_density_net: False -no_class_weights: False -no_sample_weights: False +no_class_weights: True +no_sample_weights: True # -------------------------------------------------------------------------------------- # DATA diff --git a/notebooks/modelling/rl_env.ipynb b/notebooks/modelling/rl_env.ipynb index fcf19862..ea5164b2 100644 --- a/notebooks/modelling/rl_env.ipynb +++ b/notebooks/modelling/rl_env.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 7, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -23,8 +23,7 @@ "import numpy as np\n", "import pandas as pd\n", "\n", - "from dagobert.io import S3Connector\n", - "from dagobert.modelling.rl.environment import PortfolioEnv" + "from dagobert.io import S3Connector" ] }, { @@ -38,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 43, "metadata": {}, "outputs": [ { @@ -47,7 +46,7 @@ "array([0.48192771, 0.26506024, 0.25301205])" ] }, - "execution_count": 51, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -71,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 44, "metadata": {}, "outputs": [ { @@ -80,7 +79,7 @@ "0.00020481927710843396" ] }, - "execution_count": 52, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -137,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -146,7 +145,7 @@ "1.0372875000000001" ] }, - "execution_count": 55, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -159,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -168,7 +167,7 @@ "0.03728750000000014" ] }, - "execution_count": 56, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -180,7 +179,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ @@ -190,19 +189,21 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "p1 1.0141928915662652\n", - "rho 0.014192891566265242\n", - "p1 1.0290884681079937\n", - "rho 0.014687123786407685\n", - "p1 1.0287802447893062\n", - "rho -0.0002995110024448522\n" + "p1 1.0366750000000002\n", + "rho 0.036675000000000235\n", + "p1 1.0674461056875002\n", + "rho 0.029682500000000056\n", + "p1 1.0911367376956023\n", + "rho 0.022193750000000012\n", + "p1 1.0911367376956023\n", + "rho 0.0\n" ] } ], @@ -213,7 +214,7 @@ "def step(y1, w1, w0, p0):\n", " dw1 = (y1 * w0) / (np.dot(y1, w0) + eps)\n", " mu = 0.0025 * (np.abs(dw1[1:] - w1[1:])).sum()\n", - " p1 = p0 * (1 - mu) * np.dot(y1, w1)\n", + " p1 = p0 * (1 - mu) * np.dot(y1, w0)\n", " rho1 = p1 / p0 - 1\n", " print('p1', p1)\n", " print('rho', rho1)\n", @@ -231,24 +232,28 @@ "\n", "y3 = np.array([1, 1.3, 1.15])\n", "w3 = np.array([1, 0, 0])\n", - "p3 = step(y3, w3, w2, p2)\n" + "p3 = step(y3, w3, w2, p2)\n", + "\n", + "y4 = np.array([1, 1.5, 1.5])\n", + "w4 = np.array([1, 0, 0])\n", + "p4 = step(y4, w4, w3, p3)\n" ] }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "p1 0.9843092207792208\n", - "rho -0.015690779220779216\n", - "p1 0.9618924183114448\n", - "rho -0.022774146573604104\n", - "p1 0.9253212485790699\n", - "rho -0.03802002078005118\n" + "p1 0.9618249999999999\n", + "rho -0.03817500000000007\n", + "p1 0.9471319208437499\n", + "rho -0.015276250000000102\n", + "p1 0.9253212485790698\n", + "rho -0.023028125000000066\n" ] } ], @@ -270,19 +275,19 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "p1 0.9843092207792208\n", - "rho -0.015690779220779216\n", - "p1 0.976712424016802\n", - "rho -0.0077178965736039995\n", - "p1 0.9764848524822389\n", - "rho -0.0002329974811082769\n" + "p1 0.9618249999999999\n", + "rho -0.03817500000000007\n", + "p1 0.9471896303437499\n", + "rho -0.015216250000000042\n", + "p1 0.9398666705141548\n", + "rho -0.007731249999999967\n" ] } ], @@ -304,19 +309,19 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "p1 1.0092117073170732\n", - "rho 0.009211707317073214\n", - "p1 1.024114079612195\n", - "rho 0.014766349009900814\n", - "p1 1.0492192209883782\n", - "rho 0.024514008620689864\n" + "p1 1.0242\n", + "rho 0.0242\n", + "p1 1.0342038734999999\n", + "rho 0.009767499999999929\n", + "p1 1.049219220988378\n", + "rho 0.014518749999999914\n" ] } ], @@ -337,82 +342,6 @@ "\n", "p3 = step(y3, w3, w2, p2)" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Using the environment" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "data_dir = Path('C:/Work/dagobert/data/modelling')\n", - "instruments = ['BTC', 'ETH', 'XRP', 'LTC']\n", - "datetimes = None\n", - "\n", - "# work out the common datetimes\n", - "for instrument in instruments:\n", - " df = pd.read_feather(data_dir/f'std_bar_{instrument}USDT_tick_1.feather')\n", - " df = df.set_index('date_time')\n", - " if datetimes is not None:\n", - " datetimes = df.index.intersection(datetimes)\n", - " else:\n", - " datetimes = df.index\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# merge instruments \n", - "cols_to_select = ['open', 'low', 'high', 'close', 'volume']\n", - "history = np.empty((len(instruments), len(datetimes), len(cols_to_select)))\n", - "for i,instrument in enumerate(instruments):\n", - " df = pd.read_feather(data_dir/f'std_bar_{instrument}USDT_tick_1.feather')\n", - " history[i, :, :] = df.set_index('date_time').loc[datetimes, cols_to_select].values" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "# make portfolio\n", - "portfolio = PortfolioEnv(history, instruments, len(datetimes), window_length=1440)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'PortfolioSim' object has no attribute 'p0'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mw\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m.25\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m.25\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m.25\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m.25\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mportfolio\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;32mc:\\users\\danih\\dropbox\\dagobert\\dagobert\\src\\dagobert\\modelling\\rl\\environment.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m 253\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 254\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 255\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_step\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 256\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 257\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_step\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32mc:\\users\\danih\\dropbox\\dagobert\\dagobert\\src\\dagobert\\modelling\\rl\\environment.py\u001b[0m in \u001b[0;36m_step\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m 296\u001b[0m \u001b[0mopen_price_vector\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mobservation\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 297\u001b[0m \u001b[0my1\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mclose_price_vector\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0mopen_price_vector\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 298\u001b[1;33m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfo\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone2\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msim\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_step\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mweights\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 299\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 300\u001b[0m \u001b[1;31m# calculate return for buy and hold a bit of each asset\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32mc:\\users\\danih\\dropbox\\dagobert\\dagobert\\src\\dagobert\\modelling\\rl\\environment.py\u001b[0m in \u001b[0;36m_step\u001b[1;34m(self, w1, y1)\u001b[0m\n\u001b[0;32m 146\u001b[0m \u001b[1;32massert\u001b[0m \u001b[0my1\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m1.0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"y1[0] must be 1\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 147\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 148\u001b[1;33m \u001b[0mp0\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mp0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 149\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 150\u001b[0m \u001b[0mdw1\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0my1\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mw1\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mw1\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0meps\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# (eq7) weights evolve into\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mAttributeError\u001b[0m: 'PortfolioSim' object has no attribute 'p0'" - ] - } - ], - "source": [ - "w = np.array([0, .25, .25, .25, .25])\n", - "portfolio.step(w)" - ] } ], "metadata": { diff --git a/setup.cfg b/setup.cfg index bbc2948b..f0d2b5ce 100644 --- a/setup.cfg +++ b/setup.cfg @@ -57,7 +57,7 @@ console_scripts = dagobert-tcn = dagobert.modelling.dl.tcn_runner:run dagobert-optuna = dagobert.modelling.dl.optuna:run dagobert-s3 = dagobert.io.runner:run - dagobert-tcn = dagobert.modelling.rl.rl_runner:run + dagobert-rl = dagobert.modelling.rl.rl_runner:run [test] # py.test options when running `python setup.py test` diff --git a/src/dagobert/modelling/dl/data.py b/src/dagobert/modelling/dl/data.py index 09825bda..1db71a2b 100644 --- a/src/dagobert/modelling/dl/data.py +++ b/src/dagobert/modelling/dl/data.py @@ -5,7 +5,7 @@ import logging from pathlib import Path from argparse import Namespace -from typing import List, Tuple, Union +from typing import List, Tuple, Union, Iterable, Callable import torch import numpy as np @@ -13,9 +13,9 @@ from matplotlib.figure import Figure from matplotlib import pyplot as plt from sklearn.preprocessing import MinMaxScaler -from torch.utils.data import Dataset, DataLoader +from torch.utils.data import Dataset, DataLoader, IterableDataset -from dagobert.naming import NBarVars, NAugmentationMethods +from dagobert.naming import NBarVars, NAugmentationMethods, NRL from dagobert.naming import NPreprocessingArgs as npa from dagobert.preprocessing.utils import set_dt_index from dagobert.modelling.augmentation import augment @@ -567,17 +567,19 @@ class PortfolioCryptoDataset(CryptoDataset): This extends :class:`dagobert.modelling.dl.data.CryptoDataset` to make it suitable for multi instrument portfolio optimization through reinforcement-learning. - Instead of returning an array of Xs and single y, this returns only the Xs, and - uses the last time step of the Xs as the target. + Instead of returning an array of Xs and single y, this returns for each X a y. This + is achieved by adding the rl_return target column to the cols_to_model at init, and + then fishing it out for each sample before returning it. + + This convulated way was used so we can repurpose and keep as much of the original + CryptoDataset as possible, without extensive refactoring. """ def __init__(self, *args, **kw): super().__init__(*args, **kw) - # for each instrument, fish out the index of the target_col (close_0 by default) - self.target_col_ix = [ - np.where(self.target_col == np.array(cols))[0] - for cols in self.cols_to_model.values() - ] + # for each instrument, we add the rl_return target col to their cols_to_model + for df_name, cols in self.cols_to_model.items(): + self.cols_to_model[df_name].append(NRL.rl_return) def __getitem__(self, idx): """ @@ -589,9 +591,28 @@ def __getitem__(self, idx): from_idx, upto_idx = self._get_from_upto_idxs(idx, batch_indices) Xs = self._get_Xs(batch_dfs, from_idx, upto_idx) - # cut off last time-point from each X as use that as y. + # the last column is y (see __init__), so we fish it out and delete it from X ys = np.empty(len(Xs)) for i, X in enumerate(Xs): - Xs[i] = X[:, :-1] - ys[i] = X[self.target_col_ix[i], -1] + ys[i] = X[-1, -1] + Xs[i] = X[:-1, :] return Xs, ys + + +class ExperienceSourceDataset(IterableDataset): + """ + Implementation from PyTorch Lightning Bolts. This allows us to use Lightning in a + reinforcement learning setting where we first need to generate our training data + by interacting with the environment, and only then use it to train our policy. + + Basic experience source dataset. Takes a generate_batch function that returns an + iterator. The logic for the experience source and how the batch is generated is + defined ihbthe PPO Lightning model itself. + """ + + def __init__(self, generate_batch: Callable): + self.generate_batch = generate_batch + + def __iter__(self) -> Iterable: + iterator = self.generate_batch() + return iterator diff --git a/src/dagobert/modelling/dl/preprocessing.py b/src/dagobert/modelling/dl/preprocessing.py index 50efa475..ffa6916a 100644 --- a/src/dagobert/modelling/dl/preprocessing.py +++ b/src/dagobert/modelling/dl/preprocessing.py @@ -16,7 +16,7 @@ from sklearn.utils.validation import check_is_fitted from sklearn.exceptions import NotFittedError -from dagobert.naming import NBarriers, NCrossValidation, NTimeFeatures, NBarVars +from dagobert.naming import NBarriers, NCrossValidation, NTimeFeatures, NBarVars, NRL from dagobert.naming import NPreprocessingArgs as npa from dagobert.naming import NBarriers as nb from dagobert.io import FeatherWriter, S3Connector @@ -138,6 +138,11 @@ def _preprocess_augment_dfs( df, hparams.binariser_method, hparams.binariser_threshold, df_name ) + # add rl return if required + if hparams.target_col == NRL.rl_return: + rl_return = (df[NBarVars.close] / df[NBarVars.close].shift()).fillna(1) + df.insert(0, NRL.rl_return, rl_return) + # scale all numeric columns if hparams.scaling_method is not None: cols_not_to_scale = set( @@ -162,7 +167,11 @@ def _preprocess_augment_dfs( logger.info(f"Fit-transformed {log_msg}") # save transformed file - if hparams.to_label or hparams.scaling_method: + if ( + hparams.to_label + or hparams.scaling_method + or hparams.target_col == NRL.rl_return + ): feather_writer = FeatherWriter(output_path=data_dir / df_path_prev) feather_writer.write(df) @@ -292,6 +301,11 @@ def _preprocess_train_dfs( df, hparams.binariser_method, hparams.binariser_threshold, df_name ) + # add rl return if required + if hparams.target_col == NRL.rl_return: + rl_return = (df[NBarVars.close] / df[NBarVars.close].shift()).fillna(1) + df.insert(0, NRL.rl_return, rl_return) + # split splitter = TrainValTestSplitter(data_connector=df) df_train, df_val, df_test = splitter.split( diff --git a/src/dagobert/modelling/dl/tcn.py b/src/dagobert/modelling/dl/tcn.py index 3db77df8..0fefa51d 100644 --- a/src/dagobert/modelling/dl/tcn.py +++ b/src/dagobert/modelling/dl/tcn.py @@ -182,7 +182,7 @@ def __init__(self, hparams: Namespace): # define main vars (other than model) super().__init__() - TCNLightning._pre_sanity_check(hparams) + hparams = TCNLightning._pre_sanity_check(hparams) # lightning sets this to cuda too late for some of our setup to work self.tcn_device = "cuda" if hparams.gpus > 0 else "cpu" hparams = Preprocessing().preprocess_augment_dfs(hparams) @@ -686,6 +686,7 @@ def _pre_sanity_check(hparams: Namespace): raise ValueError( "If you use augment_dfs you must at least define the anchor key." ) + return hparams def _sanity_check(self): """ diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index c8fbf72b..a4bd7b72 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -11,7 +11,15 @@ import matplotlib.pyplot as plt from torch.utils.data import Dataset, DataLoader -from dagobert.modelling.dl import PortfolioCryptoDataset, Preprocessing +from dagobert.naming import NRL +from dagobert.modelling.dl import ( + PortfolioCryptoDataset, + Preprocessing, + TemporalConvNet, + TCNLightning, +) +from dagobert.modelling.utils import update_lookback + logger = logging.getLogger(__name__) eps = np.finfo(float).eps @@ -30,10 +38,11 @@ def __init__(self, hparams: Namespace, train_val_test: str = "train"): Args: hparams: Hyparams parsed by the rl_runner. Similar to how `TCNLightning` is initialized. + train_val_test: Whether we are training, validating or testing, it must be either train, val or test. """ - RLData._pre_sanity_check(hparams) + hparams = RLData._pre_sanity_check(hparams) hparams = Preprocessing().preprocess_augment_dfs(hparams) self.hparams = Preprocessing().preprocess_train_dfs(hparams) if train_val_test == "train": @@ -53,17 +62,52 @@ def __init__(self, hparams: Namespace, train_val_test: str = "train"): augment_dfs=augment_dfs, augment_dfs_mix=self.hparams.augment_dfs_mix, ) - self.dataloader = DataLoader(self.dataset) + + self.dataloader = iter(DataLoader(self.dataset)) + + from IPython import embed + + embed() def step(self): - pass + Xs, ys = self.dataset[idx] + return Xs, y, False def reset(self): - pass + self.dataloader = iter(DataLoader(self.dataset)) @staticmethod def _pre_sanity_check(hparams: Namespace): - pass + + if hparams.target_col != NRL.rl_return: + raise ValueError("target_col has to be rl_return for RL tasks.") + + net_depth = len(hparams.num_channels) + k_size = hparams.kernel_size + max_seq_len = TemporalConvNet.get_tcn_receptive_field_size(k_size, net_depth) + logger.info( + f"A TCN with kernel size: {k_size} and depth: {net_depth} has a receptive " + f"field (can read a maximum sequence length) of {max_seq_len}." + ) + if hparams.mini_series_length == "auto": + logger.info(f"We set mini_series_length from 'auto' to {max_seq_len}.") + hparams.mini_series_length = max_seq_len + if ( + hparams.mini_series_length != "auto" + and hparams.mini_series_length > max_seq_len + ): + logger.warning( + f"Provided mini-series length: {hparams.mini_series_length} is " + f"larger than the networks receptive field size: {max_seq_len}." + ) + # calcualte what the current TCN setup corresponds to in hourly lookback + df_anchor = TCNLightning._load_anchor(hparams) + hparams.lookback = update_lookback(df_anchor, hparams.mini_series_length) + logger.info( + f"The current mini_series_legnth {hparams.mini_series_length}, " + f"corresponds to an estimated lookback of {hparams.lookback} hours." + ) + return hparams class RLPortfolio(object): @@ -83,7 +127,7 @@ class RLPortfolio(object): def __init__( self, asset_names: List[str], - steps: int = 128, + episode_length: int = 1000, trading_cost: float = 0.001, reward_type: str = "return", ): @@ -92,14 +136,12 @@ def __init__( Args: asset_names: Names of assets in the portfolio. - steps: Maximum number of steps. trading_cost: Commission rate, currently set to Binance's VIP0 taker level. reward_type: Whether to use the log return as reward or the sharpe ratio, which was found to be more stable. """ self.asset_names = asset_names self.asset_num = len(asset_names) - self.steps = steps self.trading_cost = trading_cost self.reward_type = reward_type self.reset() @@ -128,10 +170,10 @@ def _step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: # excluding change in cash to avoid double counting for transaction cost mu = self.trading_cost * (np.abs(dw1[1:] - w1[1:])).sum() - # (eq11) final portfolio value: after lot of experiments in rl notebook and - # reading the relevant parts of the paper a ton of times, we use w1 here - # instead of w0, also bc it makes intuitive sense this way - p1 = p0 * (1 - mu) * np.dot(y1, w1) + # (eq11) final portfolio value: I thought this should be w1 (at the end), but + # then think through how the env actually models the world (see Figure 1), w0 + # (which is the original implementation) makes sense here. + p1 = p0 * (1 - mu) * np.dot(y1, w0) # (eq9 & 10) rate of return log rate of return rho1 = p1 / p0 - 1 # rate of returns @@ -139,7 +181,7 @@ def _step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: # (eq22) immediate reward is log rate of return scaled by episode length if self.reward_type == "return": - reward = r1 / self.steps + reward = r1 # TODO: implement the differentiable sharpe ratio reward like so https://quant.stackexchange.com/a/38040 # remember for next step @@ -186,7 +228,6 @@ def __init__( self, history, abbreviation, - steps=730, # 2 years trading_cost=0.0025, time_cost=0.00, window_length=50, diff --git a/src/dagobert/modelling/rl/networks.py b/src/dagobert/modelling/rl/networks.py new file mode 100644 index 00000000..4e2745b6 --- /dev/null +++ b/src/dagobert/modelling/rl/networks.py @@ -0,0 +1,143 @@ +from typing import Union, Tuple + +import torch +from torch import nn +from torch.distributions import Categorical, Normal + + +def create_mlp(input_shape: Tuple[int], n_actions: int, hidden_sizes: list = [64, 64]): + """ + Simple Multi-Layer Perceptron network + """ + net_layers = [] + net_layers.append(nn.Linear(input_shape[0], hidden_sizes[0])) + net_layers.append(nn.ReLU()) + + for i in range(len(hidden_sizes) - 1): + net_layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i + 1])) + net_layers.append(nn.ReLU()) + net_layers.append(nn.Linear(hidden_sizes[-1], n_actions)) + + return nn.Sequential(*net_layers) + + +class ActorCategorical(nn.Module): + """ + Policy network, for discrete action spaces, which returns a distribution + and an action given an observation + """ + + def __init__(self, actor_net): + """ + Args: + input_shape: observation shape of the environment + n_actions: number of discrete actions available in the environment + """ + super().__init__() + + self.actor_net = actor_net + + def forward(self, states): + logits = self.actor_net(states) + pi = Categorical(logits=logits) + actions = pi.sample() + + return pi, actions + + def get_log_prob(self, pi: Categorical, actions: torch.Tensor): + """ + Takes in a distribution and actions and returns log prob of actions + under the distribution + Args: + pi: torch distribution + actions: actions taken by distribution + Returns: + log probability of the acition under pi + """ + return pi.log_prob(actions) + + +class ActorContinous(nn.Module): + """ + Policy network, for continous action spaces, which returns a distribution + and an action given an observation + """ + + def __init__(self, actor_net, act_dim): + """ + Args: + input_shape: observation shape of the environment + n_actions: number of discrete actions available in the environment + """ + super().__init__() + self.actor_net = actor_net + log_std = -0.5 * torch.ones(act_dim, dtype=torch.float) + self.log_std = torch.nn.Parameter(log_std) + + def forward(self, states): + mu = self.actor_net(states) + std = torch.exp(self.log_std) + pi = Normal(loc=mu, scale=std) + actions = pi.sample() + + return pi, actions + + def get_log_prob(self, pi: Normal, actions: torch.Tensor): + """ + Takes in a distribution and actions and returns log prob of actions + under the distribution + Args: + pi: torch distribution + actions: actions taken by distribution + Returns: + log probability of the acition under pi + """ + return pi.log_prob(actions).sum(axis=-1) + + +class ActorCriticAgent(object): + """ + Actor Critic Agent used during trajectory collection. It returns a + distribution and an action given an observation. Agent based on the + implementations found here: https://github.com/Shmuma/ptan/blob/master/ptan/agent.py + + """ + + def __init__(self, actor_net: nn.Module, critic_net: nn.Module): + self.actor_net = actor_net + self.critic_net = critic_net + + @torch.no_grad() + def __call__(self, state: torch.Tensor, device: str) -> Tuple: + """ + Takes in the current state and returns the agents policy, sampled + action, log probability of the action, and value of the given state + Args: + states: current state of the environment + device: the device used for the current batch + Returns: + torch dsitribution and randomly sampled action + """ + + state = state.to(device=device) + + pi, actions = self.actor_net(state) + log_p = self.get_log_prob(pi, actions) + + value = self.critic_net(state) + + return pi, actions, log_p, value + + def get_log_prob( + self, pi: Union[Categorical, Normal], actions: torch.Tensor + ) -> torch.Tensor: + """ + Takes in the current state and returns the agents policy, a sampled + action, log probability of the action, and the value of the state + Args: + pi: torch distribution + actions: actions taken by distribution + Returns: + log probability of the acition under pi + """ + return self.actor_net.get_log_prob(pi, actions) diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py new file mode 100644 index 00000000..30b1e957 --- /dev/null +++ b/src/dagobert/modelling/rl/ppo.py @@ -0,0 +1,340 @@ +from typing import List, Tuple + +import pytorch_lightning as pl +from networks import create_mlp, ActorCriticAgent, ActorCategorical, ActorContinous +from data import ExperienceSourceDataset + +import torch +from torch.utils.data import DataLoader +import torch.optim as optim +from torch.optim.optimizer import Optimizer +import numpy as np + +try: + import gym +except ModuleNotFoundError: + _GYM_AVAILABLE = False +else: + _GYM_AVAILABLE = True + + +class PPO(pl.LightningModule): + """ + PyTorch Lightning implementation of `PPO + `_ + Paper authors: John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, Oleg Klimov + + Example: + model = PPO("CartPole-v0") + Train: + trainer = Trainer() + trainer.fit(model) + Note: + This example is based on: + https://github.com/openai/baselines/blob/master/baselines/ppo2/ppo2.py + https://github.com/PyTorchLightning/pytorch-lightning-bolts/blob/master/pl_bolts/models/rl/reinforce_model.py + + """ + + def __init__( + self, + env: str, + gamma: float = 0.99, + lam: float = 0.95, + lr_actor: float = 3e-4, + lr_critic: float = 1e-3, + max_episode_len: float = 200, + batch_size: int = 512, + steps_per_epoch: int = 2048, + nb_optim_iters: int = 4, + clip_ratio: float = 0.2, + ) -> None: + + """ + Args: + env: gym environment tag + gamma: discount factor + lam: advantage discount factor (lambda in the paper) + lr_actor: learning rate of actor network + lr_critic: learning rate of critic network + max_episode_len: maximum number interactions (actions) in an episode + batch_size: batch_size when training network- can simulate number of policy updates performed per epoch + steps_per_epoch: how many action-state pairs to rollout for trajectory collection per epoch + nb_optim_iters: how many steps of gradient descent to perform on each batch + clip_ratio: hyperparameter for clipping in the policy objective + """ + super().__init__() + + if not _GYM_AVAILABLE: + raise ModuleNotFoundError( + "This Module requires gym environment which is not installed yet." + ) + + # Hyperparameters + self.lr_actor = lr_actor + self.lr_critic = lr_critic + self.steps_per_epoch = steps_per_epoch + self.nb_optim_iters = nb_optim_iters + self.batch_size = batch_size + self.gamma = gamma + self.lam = lam + self.max_episode_len = max_episode_len + self.clip_ratio = clip_ratio + self.save_hyperparameters() + + self.env = gym.make(env) + # value network + self.critic = create_mlp(self.env.observation_space.shape, 1) + # policy network (agent) + if type(self.env.action_space) == gym.spaces.box.Box: + act_dim = self.env.action_space.shape[0] + actor_mlp = create_mlp(self.env.observation_space.shape, act_dim) + self.actor = ActorContinous(actor_mlp, act_dim) + elif type(self.env.action_space) == gym.spaces.discrete.Discrete: + actor_mlp = create_mlp( + self.env.observation_space.shape, self.env.action_space.n + ) + self.actor = ActorCategorical(actor_mlp) + else: + raise NotImplementedError( + "Env action space should be of type Box (continous) or Discrete (categorical)" + "Got type: ", + type(self.env.action_space), + ) + self.agent = ActorCriticAgent(self.actor, self.critic) + + self.batch_states = [] + self.batch_actions = [] + self.batch_adv = [] + self.batch_qvals = [] + self.batch_logp = [] + + self.ep_rewards = [] + self.ep_values = [] + + self.done_episodes = 0 + self.epoch_rewards = 0 + self.avg_ep_reward = 0 + self.avg_ep_len = 0 + self.avg_reward = 0 + + self.state = torch.FloatTensor(self.env.reset()) + + def forward( + self, x: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Passes in a state x through the network and returns the policy and a sampled action + Args: + x: environment state + Returns: + Tuple of policy and action + """ + pi, action = self.actor(x) + value = self.critic(x) + + return pi, action, value + + def discount_rewards(self, rewards: List[float], discount: float) -> List[float]: + """Calculate the discounted rewards of all rewards in list + Args: + rewards: list of rewards/advantages + Returns: + list of discounted rewards/advantages + """ + assert isinstance(rewards[0], float) + + cumul_reward = [] + sum_r = 0.0 + + for r in reversed(rewards): + sum_r = (sum_r * discount) + r + cumul_reward.append(sum_r) + + return list(reversed(cumul_reward)) + + def calc_advantage( + self, rewards: List[float], values: List[float], last_value: float + ) -> List[float]: + """Calculate the advantage given rewards, state values, and the last value of episode + Args: + rewards: list of episode rewards + values: list of state values from critic + last_value: value of last state of episode + Returns: + list of advantages + """ + rews = rewards + [last_value] + vals = values + [last_value] + # GAE + delta = [ + rews[i] + self.gamma * vals[i + 1] - vals[i] for i in range(len(rews) - 1) + ] + adv = self.discount_rewards(delta, self.gamma * self.lam) + + return adv + + def train_batch( + self, + ) -> Tuple[List[torch.Tensor], List[torch.Tensor], List[torch.Tensor]]: + """ + Contains the logic for generating trajectory data to train policy and value network + Yield: + Tuple of Lists containing tensors for states, actions, log probs, qvals and advantage + """ + + for step in range(self.steps_per_epoch): + pi, action, log_prob, value = self.agent(self.state, self.device) + next_state, reward, done, _ = self.env.step(action.cpu().numpy()) + + self.batch_states.append(self.state) + self.batch_actions.append(action) + self.batch_logp.append(log_prob) + + self.ep_rewards.append(reward) + self.ep_values.append(value.item()) + + self.state = torch.FloatTensor(next_state) + + epoch_end = step == (self.steps_per_epoch - 1) + terminal = len(self.ep_rewards) == self.max_episode_len + + if epoch_end or done or terminal: + # if trajectory ends abtruptly, boostrap value of next state + if (terminal or epoch_end) and not done: + with torch.no_grad(): + _, _, _, value = self.agent(self.state, self.device) + last_value = value.item() + else: + last_value = 0 + + # discounted cumulative reward + self.batch_qvals += self.discount_rewards( + self.ep_rewards + [last_value], self.gamma + )[:-1] + # advantage + self.batch_adv += self.calc_advantage( + self.ep_rewards, self.ep_values, last_value + ) + # logs + self.done_episodes += 1 + self.epoch_rewards += np.sum(self.ep_rewards) + # reset params + self.ep_rewards = [] + self.ep_values = [] + self.state = torch.FloatTensor(self.env.reset()) + + if epoch_end: + train_data = zip( + self.batch_states, + self.batch_actions, + self.batch_logp, + self.batch_qvals, + self.batch_adv, + ) + + for state, action, logp_old, qval, adv in train_data: + yield state, action, logp_old, qval, adv + + self.batch_states.clear() + self.batch_actions.clear() + self.batch_adv.clear() + self.batch_logp.clear() + self.batch_qvals.clear() + + self.avg_ep_reward = self.epoch_rewards / self.done_episodes + self.avg_reward = self.epoch_rewards / self.steps_per_epoch + self.avg_ep_len = self.steps_per_epoch / self.done_episodes + + self.epoch_rewards = 0 + self.done_episodes = 0 + + def actor_loss(self, state, action, logp_old, qval, adv) -> torch.Tensor: + pi, _ = self.actor(state) + logp = self.actor.get_log_prob(pi, action) + ratio = torch.exp(logp - logp_old) + clip_adv = torch.clamp(ratio, 1 - self.clip_ratio, 1 + self.clip_ratio) * adv + loss_actor = -(torch.min(ratio * adv, clip_adv)).mean() + return loss_actor + + def critic_loss(self, state, action, logp_old, qval, adv) -> torch.Tensor: + value = self.critic(state) + loss_critic = (qval - value).pow(2).mean() + return loss_critic + + def training_step( + self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx, optimizer_idx + ): + """ + Carries out a single update to actor and critic network from a batch of replay buffer. + + Args: + batch: batch of replay buffer/trajectory data + batch_idx: not used + optimizer_idx: idx that controls optimizing actor or critic network + Returns: + loss + """ + state, action, old_logp, qval, adv = batch + self.log( + "avg_ep_len", self.avg_ep_len, prog_bar=True, on_step=False, on_epoch=True + ) + self.log( + "avg_ep_reward", + self.avg_ep_reward, + prog_bar=True, + on_step=False, + on_epoch=True, + ) + self.log( + "avg_reward", self.avg_reward, prog_bar=True, on_step=False, on_epoch=True + ) + + if optimizer_idx % 2 == 0: + loss_actor = self.actor_loss(state, action, old_logp, qval, adv) + self.log( + "loss_actor", + loss_actor, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) + + return loss_actor + + else: + loss_critic = self.critic_loss(state, action, old_logp, qval, adv) + self.log( + "loss_critic", + loss_critic, + on_step=False, + on_epoch=True, + prog_bar=False, + logger=True, + ) + + return loss_critic + + def configure_optimizers(self) -> List[Optimizer]: + """ Initialize Adam optimizer""" + optimizer_actor = optim.Adam(self.actor.parameters(), lr=self.lr_actor) + optimizer_critic = optim.Adam(self.critic.parameters(), lr=self.lr_critic) + + # to run multple steps of gradient descent + optimizers = [] + for i in range(self.nb_optim_iters): + optimizers.append(optimizer_actor) + optimizers.append(optimizer_critic) + + return optimizers + + def _dataloader(self) -> DataLoader: + """Initialize the Replay Buffer dataset used for retrieving experiences""" + dataset = ExperienceSourceDataset(self.train_batch) + dataloader = DataLoader(dataset=dataset, batch_size=self.batch_size) + return dataloader + + def train_dataloader(self) -> DataLoader: + """Get train loader""" + return self._dataloader() diff --git a/src/dagobert/modelling/rl/rl.py b/src/dagobert/modelling/rl/rl.py index ca60f407..35f465d9 100644 --- a/src/dagobert/modelling/rl/rl.py +++ b/src/dagobert/modelling/rl/rl.py @@ -1,5 +1,6 @@ -from dagobert.modelling.rl import RLEnv +from dagobert.modelling.rl import RLEnv, RLData def run_rl(args): - env = RLEnv(args) + rld = RLData(args) + # env = RLEnv(args) diff --git a/src/dagobert/naming.py b/src/dagobert/naming.py index f034ce41..829c0eca 100644 --- a/src/dagobert/naming.py +++ b/src/dagobert/naming.py @@ -199,6 +199,7 @@ class NPreprocessingArgs(object): """ anchor = "anchor" + target_col = "target_col" cols_to_model = "cols_to_model" close_original = "close_original" @@ -275,3 +276,11 @@ class NStockstats(object): wr_40 = "wr_40" vr_120 = "vr_120" vr_40 = "vr_40" + + +class NRL(object): + """ + Naming object for reinforcement learning environment / agent / algos. + """ + + rl_return = "rl_return" From 78b865055b24a9ea37b29b65989b28fc810e276b Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Wed, 30 Dec 2020 18:45:26 +0000 Subject: [PATCH 05/62] going through environment.py and trying to make it work --- config/rl_config.yaml | 72 ++------ src/dagobert/modelling/dl/tcn.py | 23 ++- src/dagobert/modelling/rl/environment.py | 213 +++++++++-------------- src/dagobert/modelling/rl/utils.py | 21 +++ 4 files changed, 130 insertions(+), 199 deletions(-) create mode 100644 src/dagobert/modelling/rl/utils.py diff --git a/config/rl_config.yaml b/config/rl_config.yaml index da8f251d..03a64656 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -19,19 +19,28 @@ auto_scale_batch_size: log_dir: logs num_workers: 4 -exp_name: TCN +exp_name: RL-PPO-TCN tags: - - model1 - - ethusdt_volume500 + - RL_test no_comet_logger: True seed: 42 -batch_size: 100 +batch_size: 128 + + +# -------------------------------------------------------------------------------------- +# RL +# -------------------------------------------------------------------------------------- + +episode_length: 1000 +target_col: rl_return +asset_names: + - BTC + - ETH # -------------------------------------------------------------------------------------- # MODEL # -------------------------------------------------------------------------------------- -output_size: 2 num_channels: [150, 150, 150, 150, 150, 150, 150] kernel_size: 10 dropout: 0.5 @@ -41,8 +50,6 @@ non_last_y_frac: 0.5 regression: False density_num: 3 mix_density_net: False -no_class_weights: True -no_sample_weights: True # -------------------------------------------------------------------------------------- # DATA @@ -53,13 +60,6 @@ data_dir: "C:/Work/dagobert/data/modelling" lookback: auto mini_series_length: auto -# If this is set to a number, then simple lookahead labelling is in place -simple_lookahead_y: -simple_lookahead_reg: False - -# If this is True, anchor is labelled before preprocessing. to_label and simple_lookahead_y cannot be used together. -to_label: False - df_train: anchor: std_bar_BTCUSDT_tick_1.feather df2: std_bar_ETHUSDT_tick_1.feather @@ -110,49 +110,7 @@ cols_to_model: - mdi_60 - vr_60 df2: - - date_diff - - open - - high - - low - - close - - open_fd_0.0 - - high_fd_0.0 - - low_fd_0.0 - - close_fd_0.0 - - open_fd_tuned - - high_fd_tuned - - low_fd_tuned - - close_fd_tuned - - cum_ticks - - cum_dollar - - volume - - cum_volume_buy - - cum_volume_sell - - cum_volume_quote - - cum_volume_quote_buy - - cum_volume_quote_sell - - sin_date - - cos_date - - sin_time - - cos_time - - boll - - boll_lb - - boll_ub - - macd - - macds - - macdh - - wr_60 - - rsi_60 - - rsv_60 - - atr_60 - - cci_60 - - kdjk_60 - - kdjd_60 - - kdjj_60 - - pdi_60 - - mdi_60 - - vr_60 -target_col: close_fd_0.0 + time_feat_n: 1 time_embed_dim: 12 diff --git a/src/dagobert/modelling/dl/tcn.py b/src/dagobert/modelling/dl/tcn.py index 0fefa51d..e49304fc 100644 --- a/src/dagobert/modelling/dl/tcn.py +++ b/src/dagobert/modelling/dl/tcn.py @@ -626,7 +626,7 @@ def _log_graph(self, datasets: CryptoDataset): # ---------------------------------------------------------------------------------- @staticmethod - def _pre_sanity_check(hparams: Namespace): + def _pre_sanity_check(hparams: Namespace) -> Namespace: """Certain sanity checks must happen before preprocessing takes place.""" # TARGET VARIABLE if ( @@ -654,7 +654,19 @@ def _pre_sanity_check(hparams: Namespace): "Classification is not applicable with mixed density nets" ) - # NET + # MINI SERIES / LOOKBACK + hparams = TCNLightning._pre_sanity_check_mini_series_lookback(hparams) + + # ETC + if hparams.augment_dfs and npa.anchor not in hparams.augment_dfs.keys(): + raise ValueError( + "If you use augment_dfs you must at least define the anchor key." + ) + return hparams + + @staticmethod + def _pre_sanity_check_mini_series_lookback(hparams: Namespace) -> Namespace: + """Calculate lookback and mini_series_length if necessary.""" net_depth = len(hparams.num_channels) k_size = hparams.kernel_size max_seq_len = TemporalConvNet.get_tcn_receptive_field_size(k_size, net_depth) @@ -681,13 +693,6 @@ def _pre_sanity_check(hparams: Namespace): f"corresponds to an estimated lookback of {hparams.lookback} hours." ) - # ETC - if hparams.augment_dfs and npa.anchor not in hparams.augment_dfs.keys(): - raise ValueError( - "If you use augment_dfs you must at least define the anchor key." - ) - return hparams - def _sanity_check(self): """ Make sure the options defined in hparams don't contradict each other. diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index a4bd7b72..ba900123 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -12,13 +12,9 @@ from torch.utils.data import Dataset, DataLoader from dagobert.naming import NRL -from dagobert.modelling.dl import ( - PortfolioCryptoDataset, - Preprocessing, - TemporalConvNet, - TCNLightning, -) -from dagobert.modelling.utils import update_lookback +from dagobert.naming import NPreprocessingArgs as npa +from dagobert.modelling.dl import PortfolioCryptoDataset, Preprocessing, TCNLightning +from dagobert.modelling.rl.utils import sharpe_ratio, max_drawdown logger = logging.getLogger(__name__) @@ -31,20 +27,23 @@ class RLData(object): module as much as possible. """ - def __init__(self, hparams: Namespace, train_val_test: str = "train"): + def __init__( + self, + hparams: Namespace, + train_val_test: str = "train", + ): """ Class constructor. Args: hparams: Hyparams parsed by the rl_runner. Similar to how `TCNLightning` is initialized. - train_val_test: Whether we are training, validating or testing, it must be either train, val or test. """ - hparams = RLData._pre_sanity_check(hparams) - hparams = Preprocessing().preprocess_augment_dfs(hparams) - self.hparams = Preprocessing().preprocess_train_dfs(hparams) + self.idx = 0 + self.hparams = hparams + if train_val_test == "train": augment_dfs = self.hparams.augment_dfs augment_method = self.hparams.augment_method @@ -62,51 +61,34 @@ def __init__(self, hparams: Namespace, train_val_test: str = "train"): augment_dfs=augment_dfs, augment_dfs_mix=self.hparams.augment_dfs_mix, ) - - self.dataloader = iter(DataLoader(self.dataset)) - - from IPython import embed - - embed() + self.dataset_len = len(self.dataset) + self.reset() def step(self): - Xs, ys = self.dataset[idx] - return Xs, y, False + Xs, ys = self.dataset[self.idx] + y1 = np.concatenate([[1.0], ys]) + episode_full = self.idx == self.hparams.episode_length - 1 + done = True if episode_full else False + self.idx += 1 + return Xs, y1, done def reset(self): - self.dataloader = iter(DataLoader(self.dataset)) + self.idx = np.random.randint(self.dataset_len - self.hparams.episode_length) @staticmethod def _pre_sanity_check(hparams: Namespace): - + # ensure we have the rl specific target column in the config if hparams.target_col != NRL.rl_return: raise ValueError("target_col has to be rl_return for RL tasks.") - net_depth = len(hparams.num_channels) - k_size = hparams.kernel_size - max_seq_len = TemporalConvNet.get_tcn_receptive_field_size(k_size, net_depth) - logger.info( - f"A TCN with kernel size: {k_size} and depth: {net_depth} has a receptive " - f"field (can read a maximum sequence length) of {max_seq_len}." - ) - if hparams.mini_series_length == "auto": - logger.info(f"We set mini_series_length from 'auto' to {max_seq_len}.") - hparams.mini_series_length = max_seq_len - if ( - hparams.mini_series_length != "auto" - and hparams.mini_series_length > max_seq_len - ): - logger.warning( - f"Provided mini-series length: {hparams.mini_series_length} is " - f"larger than the networks receptive field size: {max_seq_len}." - ) - # calcualte what the current TCN setup corresponds to in hourly lookback - df_anchor = TCNLightning._load_anchor(hparams) - hparams.lookback = update_lookback(df_anchor, hparams.mini_series_length) - logger.info( - f"The current mini_series_legnth {hparams.mini_series_length}, " - f"corresponds to an estimated lookback of {hparams.lookback} hours." - ) + # make sure we have the same cols for each instrument + # this helps to have an environment with a single tensor as state + if len(hparams.cols_to_model) > 1: + for df_name, cols in hparams.cols_to_model.items(): + hparams.cols_to_model[df_name] = hparams.cols_to_model[npa.anchor] + + # MINI SERIES / LOOKBACK + hparams = TCNLightning._pre_sanity_check_mini_series_lookback(hparams) return hparams @@ -128,7 +110,7 @@ def __init__( self, asset_names: List[str], episode_length: int = 1000, - trading_cost: float = 0.001, + trading_cost: float = 0.002, reward_type: str = "return", ): """ @@ -136,7 +118,8 @@ def __init__( Args: asset_names: Names of assets in the portfolio. - trading_cost: Commission rate, currently set to Binance's VIP0 taker level. + trading_cost: Commission rate, currently set to Binance's VIP0 taker level + plus doubled it to account for slippage. TODO: model slippage. reward_type: Whether to use the log return as reward or the sharpe ratio, which was found to be more stable. """ @@ -146,7 +129,7 @@ def __init__( self.reward_type = reward_type self.reset() - def _step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: + def step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: """ See Figure 1 in https://arxiv.org/abs/1706.10059 to understand what this one step corresponds to, also the equation numbers correspond to the paper's. @@ -226,105 +209,69 @@ class RLEnv(gym.Env): def __init__( self, - history, - abbreviation, - trading_cost=0.0025, - time_cost=0.00, - window_length=50, - start_idx=0, - sample_start_date=None, + hparams: Namespace, + asset_names: List[str], + train_val_test: str = "train", + episode_length: int = 1000, + trading_cost: float = 0.001, + reward_type: str = "return", ): """ An environment for financial portfolio management. - Params: - steps - steps in episode - scale - scale data and each episode (except return) - augment - fraction to randomly shift data by - trading_cost - cost of trade as a fraction - time_cost - cost of holding as a fraction - window_length - how many past observations to return - start_idx - The number of days from '2012-08-13' of the dataset - sample_start_date - The start date sampling from the history + + Args: + hparams: + asset_names: + + train_val_test: + episode_length: + trading_cost: + reward_type: """ - self.window_length = window_length - self.num_stocks = history.shape[0] - self.start_idx = start_idx - - self.src = RLData( - history, - abbreviation, - steps=steps, - window_length=window_length, - start_idx=start_idx, - start_date=sample_start_date, - ) + # prepare datafiles if necessary + hparams = RLData._pre_sanity_check(hparams) + hparams = Preprocessing().preprocess_augment_dfs(hparams) + self.hparams = Preprocessing().preprocess_train_dfs(hparams) + self.asset_names = self.hparams.asset_names + self.asset_n = len(self.asset_names) + self.feat_n = len(self.hparams.cols_to_model[npa.anchor]) - self.sim = RLPortfolio( - asset_names=abbreviation, - trading_cost=trading_cost, - time_cost=time_cost, - steps=steps, - ) + self.data = RLData(self.hparams, train_val_test="train") + self.portfolio = RLPortfolio(self.asset_names, self.hparams.episode_length) - # openai gym attributes - # action will be the portfolio weights from 0 to 1 for each asset + # include cash in the portfolio action space self.action_space = gym.spaces.Box( - 0, 1, shape=(len(self.src.asset_names) + 1,), dtype=np.float32 - ) # include cash + 0.0, 1.0, shape=(self.asset_n + 1), dtype=np.float32 + ) # get the observation space from the data min and max - self.observation_space = gym.spaces.Box( - low=-np.inf, - high=np.inf, - shape=(len(abbreviation), window_length, history.shape[-1]), - dtype=np.float32, + self.observation_space = gym.spaces.Dict( + { + "state": gym.spaces.Box( + low=-10, + high=10, + shape=(self.asset_n, self.feat_n, self.hparams.mini_series_length), + dtype=np.float32, + ), + "weights": self.action_space, + } ) - def step(self, action): - return self._step(action) - - def _step(self, action): - """ - Step the env. - Actions should be portfolio [w0...] - - Where wn is a portfolio weight from 0 to 1. The first is cash_bias - - cn is the portfolio conversion weights see PortioSim._step for description + def step(self, action: np.array): """ - np.testing.assert_almost_equal(action.shape, (len(self.sim.asset_names) + 1,)) + Step in the environment. - # normalise just in case + Args: + action: Portfolio weights for the N assets and the cash (first item). + They should all be between 0 and 1 (no shorting) and sum to 1. + """ + # cut and normalise action (just in case) action = np.clip(action, 0, 1) - - weights = action # np.array([cash_bias] + list(action)) # [w0, w1...] + weights = action weights /= weights.sum() + eps - weights[0] += np.clip( - 1 - weights.sum(), 0, 1 - ) # so if weights are all zeros we normalise to [1,0...] - - assert ((action >= 0) * (action <= 1)).all(), ( - "all action values should be between 0 and 1. Not %s" % action - ) - np.testing.assert_almost_equal( - np.sum(weights), - 1.0, - 3, - err_msg='weights should sum to 1. action="%s"' % weights, - ) - - observation, done1, ground_truth_obs = self.src._step() - - # concatenate observation with ones - cash_observation = np.ones((1, self.window_length, observation.shape[2])) - observation = np.concatenate((cash_observation, observation), axis=0) - - cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2])) - ground_truth_obs = np.concatenate((cash_ground_truth, ground_truth_obs), axis=0) - # relative price vector of last observation day (close/open) - close_price_vector = observation[:, -1, 3] - open_price_vector = observation[:, -1, 0] - y1 = close_price_vector / open_price_vector - reward, info, done2 = self.sim._step(weights, y1) + Xs, y1, done1 = self.data.step() + reward, info, done2 = self.portfolio.step(weights, y1) # calculate return for buy and hold a bit of each asset info["market_value"] = np.cumprod( diff --git a/src/dagobert/modelling/rl/utils.py b/src/dagobert/modelling/rl/utils.py new file mode 100644 index 00000000..f2651087 --- /dev/null +++ b/src/dagobert/modelling/rl/utils.py @@ -0,0 +1,21 @@ +"""Util functions for portfolio optimization and other RL related tasks""" + +import numpy as np + +eps = np.finfo(float).eps + + +def sharpe_ratio(returns, freq: int = 30, rfr: int = 0): + """ + Given a set of returns, calculates naive (rfr=0) sharpe (eq 28). + """ + return (np.sqrt(freq) * np.mean(returns - rfr + eps)) / np.std(returns - rfr + eps) + + +def max_drawdown(returns): + """ + Max drawdown. See https://www.investopedia.com/terms/m/maximum-drawdown-mdd.asp + """ + peak = returns.max() + trough = returns[returns.argmax() :].min() + return (trough - peak) / (peak + eps) From 6e466d16da76cabd26874a7a770fc266dc0ce06d Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Thu, 31 Dec 2020 14:23:01 +0000 Subject: [PATCH 06/62] working on environment --- src/dagobert/modelling/rl/environment.py | 62 +++++------------------- 1 file changed, 13 insertions(+), 49 deletions(-) diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index ba900123..4a198942 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -74,6 +74,7 @@ def step(self): def reset(self): self.idx = np.random.randint(self.dataset_len - self.hparams.episode_length) + return self.step() @staticmethod def _pre_sanity_check(hparams: Namespace): @@ -232,14 +233,16 @@ def __init__( hparams = RLData._pre_sanity_check(hparams) hparams = Preprocessing().preprocess_augment_dfs(hparams) self.hparams = Preprocessing().preprocess_train_dfs(hparams) + + # setup data and portfolio and other vars + self.infos = [] self.asset_names = self.hparams.asset_names self.asset_n = len(self.asset_names) self.feat_n = len(self.hparams.cols_to_model[npa.anchor]) - self.data = RLData(self.hparams, train_val_test="train") self.portfolio = RLPortfolio(self.asset_names, self.hparams.episode_length) - # include cash in the portfolio action space + # setup openai gym env - include cash in the portfolio action space self.action_space = gym.spaces.Box( 0.0, 1.0, shape=(self.asset_n + 1), dtype=np.float32 ) @@ -270,56 +273,17 @@ def step(self, action: np.array): weights = action weights /= weights.sum() + eps - Xs, y1, done1 = self.data.step() + next_state, y1, done1 = self.data.step() reward, info, done2 = self.portfolio.step(weights, y1) - - # calculate return for buy and hold a bit of each asset - info["market_value"] = np.cumprod( - [inf["return"] for inf in self.infos + [info]] - )[-1] - # add dates - info["date"] = self.start_idx + self.src.idx + self.src.step - info["steps"] = self.src.step - info["next_obs"] = ground_truth_obs - self.infos.append(info) - return observation, reward, done1 or done2, info + return next_state, reward, done1 or done2, info def reset(self): - return self._reset() - - def _reset(self): self.infos = [] - self.sim.reset() - observation, ground_truth_obs = self.src.reset() - cash_observation = np.ones((1, self.window_length, observation.shape[2])) - observation = np.concatenate((cash_observation, observation), axis=0) - cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2])) - ground_truth_obs = np.concatenate((cash_ground_truth, ground_truth_obs), axis=0) - info = {} - info["next_obs"] = ground_truth_obs - return observation, info - - def _render(self, mode="human", close=False): - if close: - return - if mode == "ansi": - print(self.infos[-1]) - elif mode == "human": - self.plot() - - def render(self, mode="human", close=False): - return self._render(mode="human", close=False) - - def plot(self): - # show a plot of portfolio vs mean market performance - df_info = pd.DataFrame(self.infos) - df_info["date"] = pd.to_datetime(df_info["date"], format="%Y-%m-%d") - df_info.set_index("date", inplace=True) - mdd = max_drawdown(df_info.rate_of_return + 1) - sharpe_ratio = sharpe(df_info.rate_of_return) - title = "max_drawdown={: 2.2%} sharpe_ratio={: 2.4f}".format(mdd, sharpe_ratio) - df_info[["portfolio_value", "market_value"]].plot( - title=title, fig=plt.gcf(), rot=30 - ) + self.portfolio.reset() + next_state, _, _ = self.data.reset() + return next_state + + def render(self): + pass From 28b11cb78dca51b9d33254ee54f2f0f00b15744b Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Fri, 1 Jan 2021 13:48:17 +0000 Subject: [PATCH 07/62] adding rl args and setting up the runner properly, time to go through the networks and the actual algo --- config/rl_config.yaml | 37 ++-- src/dagobert/modelling/dl/__init__.py | 2 +- src/dagobert/modelling/dl/preprocessing.py | 10 +- src/dagobert/modelling/dl/tcn.py | 11 +- src/dagobert/modelling/dl/tcn_args.py | 18 +- src/dagobert/modelling/rl/__init__.py | 2 + src/dagobert/modelling/rl/environment.py | 60 ++---- src/dagobert/modelling/rl/ppo.py | 238 ++++++++++++--------- src/dagobert/modelling/rl/rl.py | 6 - src/dagobert/modelling/rl/rl_args.py | 177 +++++++++++++++ src/dagobert/modelling/rl/rl_runner.py | 8 +- 11 files changed, 366 insertions(+), 203 deletions(-) delete mode 100644 src/dagobert/modelling/rl/rl.py create mode 100644 src/dagobert/modelling/rl/rl_args.py diff --git a/config/rl_config.yaml b/config/rl_config.yaml index 03a64656..dae94a58 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -5,7 +5,6 @@ # -------------------------------------------------------------------------------------- gpus: 1 -multiprocessing: False pin_memory: True profiler: True val_check_interval: 0.5 @@ -31,25 +30,36 @@ batch_size: 128 # RL # -------------------------------------------------------------------------------------- -episode_length: 1000 -target_col: rl_return asset_names: - BTC - ETH +trading_cost: 0.002 +reward_type: return +max_episode_length: 1000 +steps_per_epoch: 2000 +n_optim_iters: 4 +gamma: 0.99 +lamb: 0.95 +lr_actor: 0.0003 +lr_critic: 0.001 +clip_ratio: 0.2 + +# don't change these, or preprocessing won't work +target_col: rl_return +to_label: False +no_sample_weights: True +binariser_method: # -------------------------------------------------------------------------------------- # MODEL # -------------------------------------------------------------------------------------- -num_channels: [150, 150, 150, 150, 150, 150, 150] -kernel_size: 10 -dropout: 0.5 -use_last_timepoint: True -last_y: False -non_last_y_frac: 0.5 -regression: False -density_num: 3 -mix_density_net: False +actor_num_channels: [50, 50, 50, 50, 50] +actor_kernel_size: 5 +actor_dropout: 0.25 +critic_num_channels: [50, 50, 50, 50, 50] +critic_kernel_size: 5 +critic_dropout: 0.25 # -------------------------------------------------------------------------------------- # DATA @@ -110,6 +120,7 @@ cols_to_model: - mdi_60 - vr_60 df2: + # the cols of the secondary DFs will automatically be set to anchor's time_feat_n: 1 time_embed_dim: 12 @@ -132,6 +143,4 @@ test_days: 30 test_train_offset_days: 62 test_puffer_days: 1 -sample_weights: -binariser_method: scaling_method: minmax diff --git a/src/dagobert/modelling/dl/__init__.py b/src/dagobert/modelling/dl/__init__.py index 81ddc443..4dd9f930 100644 --- a/src/dagobert/modelling/dl/__init__.py +++ b/src/dagobert/modelling/dl/__init__.py @@ -1,4 +1,4 @@ -from .data import CryptoDataset, PortfolioCryptoDataset +from .data import CryptoDataset, PortfolioCryptoDataset, ExperienceSourceDataset from .tcn_net import TemporalConvNet from .utils import LogCoshLoss, FocalLoss, MixedNormalPDFLoss from .adabelief import AdaBelief diff --git a/src/dagobert/modelling/dl/preprocessing.py b/src/dagobert/modelling/dl/preprocessing.py index ffa6916a..ebce640c 100644 --- a/src/dagobert/modelling/dl/preprocessing.py +++ b/src/dagobert/modelling/dl/preprocessing.py @@ -85,10 +85,7 @@ def preprocess_augment_dfs(hparams: Namespace) -> Namespace: @staticmethod def _preprocess_augment_dfs( - hparams: Namespace, - df_name: str, - df_path: str, - df_path_prev: str, + hparams: Namespace, df_name: str, df_path: str, df_path_prev: str, ) -> Optional: """ Helper function that performs the preprocessing of simple augment DFs. @@ -553,10 +550,7 @@ def _quantile_filter( @staticmethod def _binarise( - df: pd.DataFrame, - method: str, - threshold: float, - df_name: str = "", + df: pd.DataFrame, method: str, threshold: float, df_name: str = "", ) -> pd.DataFrame: """ Binarises a DF with the provided method and threshold. diff --git a/src/dagobert/modelling/dl/tcn.py b/src/dagobert/modelling/dl/tcn.py index e49304fc..6aacde87 100644 --- a/src/dagobert/modelling/dl/tcn.py +++ b/src/dagobert/modelling/dl/tcn.py @@ -5,7 +5,6 @@ import logging from pathlib import Path from typing import Optional -from functools import partial from argparse import Namespace import numpy as np @@ -131,11 +130,7 @@ def setup_and_run_tcn_lightning(args: Namespace, study: bool = False): args, logger=tcn_loggers, checkpoint_callback=checkpoint_callback, - callbacks=[ - early_stop_callback, - metrics_callback, - LearningRateMonitor(), - ], + callbacks=[early_stop_callback, metrics_callback, LearningRateMonitor(),], ) model = TCNLightning(args) @@ -178,6 +173,7 @@ def __init__(self, hparams: Namespace): Args: hparams: Hyper-params passed in to the module. See the docs for more details https://pytorch-lightning.readthedocs.io/en/latest/hyperparameters.html + and dagobert.modelling.dl.tcn_args for more information on the params. """ # define main vars (other than model) @@ -212,8 +208,7 @@ def __init__(self, hparams: Namespace): if self.hparams.mix_density_net: self.linear_mu = nn.Linear(self.hparams.num_channels[-1], self.density_num) self.linear_sigmasq = nn.Linear( - self.hparams.num_channels[-1], - self.density_num, + self.hparams.num_channels[-1], self.density_num, ) self.linear_mix = nn.Linear(self.hparams.num_channels[-1], self.density_num) self = self.float() diff --git a/src/dagobert/modelling/dl/tcn_args.py b/src/dagobert/modelling/dl/tcn_args.py index d08b0a0b..cd15e7db 100644 --- a/src/dagobert/modelling/dl/tcn_args.py +++ b/src/dagobert/modelling/dl/tcn_args.py @@ -43,10 +43,7 @@ def add_run_specific_args(parent_parser): help="Number of cores to use to prepare the batches.", ) parser.add_argument( - "--exp_name", - type=str, - default="TCN", - help="Name of experiment.", + "--exp_name", type=str, default="TCN", help="Name of experiment.", ) parser.add_argument( "--tags", @@ -236,9 +233,7 @@ def add_data_specific_args(parent_parser): # this is just a place-holder so it's easier to read the million params in the cmd parser.add_argument("--DATA_PARAMS", help="====================================") parser.add_argument( - "--data_dir", - type=str, - help="Path to folder holding the data files to use.", + "--data_dir", type=str, help="Path to folder holding the data files to use.", ) parser.add_argument( "--lookback", type=float, default=6, help="Lookback length in hours." @@ -272,9 +267,7 @@ def add_data_specific_args(parent_parser): ), ) parser.add_argument( - "--to_label", - action="store_true", - help="Label datasets before preprocessing.", + "--to_label", action="store_true", help="Label datasets before preprocessing.", ) parser.add_argument( "--label_sl", type=int, default=1, help="Stop-loss barrier size." @@ -286,10 +279,7 @@ def add_data_specific_args(parent_parser): "--label_first_or_max", type=str, default=NBarriers.first, - choices=[ - NBarriers.first, - NBarriers.max, - ], + choices=[NBarriers.first, NBarriers.max,], help="Weather to use the first or maximum barrier-touch.", ) parser.add_argument( diff --git a/src/dagobert/modelling/rl/__init__.py b/src/dagobert/modelling/rl/__init__.py index 56d3ada1..d848e772 100644 --- a/src/dagobert/modelling/rl/__init__.py +++ b/src/dagobert/modelling/rl/__init__.py @@ -1 +1,3 @@ from .environment import RLData, RLPortfolio, RLEnv +from .networks import create_mlp, ActorCriticAgent, ActorContinous +from .ppo import PPO, run_rl diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index 4a198942..dd71b9c2 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -11,9 +11,8 @@ import matplotlib.pyplot as plt from torch.utils.data import Dataset, DataLoader -from dagobert.naming import NRL from dagobert.naming import NPreprocessingArgs as npa -from dagobert.modelling.dl import PortfolioCryptoDataset, Preprocessing, TCNLightning +from dagobert.modelling.dl import PortfolioCryptoDataset from dagobert.modelling.rl.utils import sharpe_ratio, max_drawdown @@ -28,9 +27,7 @@ class RLData(object): """ def __init__( - self, - hparams: Namespace, - train_val_test: str = "train", + self, hparams: Namespace, train_val_test: str = "train", ): """ Class constructor. @@ -67,31 +64,15 @@ def __init__( def step(self): Xs, ys = self.dataset[self.idx] y1 = np.concatenate([[1.0], ys]) - episode_full = self.idx == self.hparams.episode_length - 1 + episode_full = self.idx == self.hparams.max_episode_length - 1 done = True if episode_full else False self.idx += 1 return Xs, y1, done def reset(self): - self.idx = np.random.randint(self.dataset_len - self.hparams.episode_length) + self.idx = np.random.randint(self.dataset_len - self.hparams.max_episode_length) return self.step() - @staticmethod - def _pre_sanity_check(hparams: Namespace): - # ensure we have the rl specific target column in the config - if hparams.target_col != NRL.rl_return: - raise ValueError("target_col has to be rl_return for RL tasks.") - - # make sure we have the same cols for each instrument - # this helps to have an environment with a single tensor as state - if len(hparams.cols_to_model) > 1: - for df_name, cols in hparams.cols_to_model.items(): - hparams.cols_to_model[df_name] = hparams.cols_to_model[npa.anchor] - - # MINI SERIES / LOOKBACK - hparams = TCNLightning._pre_sanity_check_mini_series_lookback(hparams) - return hparams - class RLPortfolio(object): """ @@ -110,7 +91,7 @@ class RLPortfolio(object): def __init__( self, asset_names: List[str], - episode_length: int = 1000, + max_episode_length: int = 1000, trading_cost: float = 0.002, reward_type: str = "return", ): @@ -208,39 +189,22 @@ class RLEnv(gym.Env): https://github.com/wassname/rl-portfolio-management """ - def __init__( - self, - hparams: Namespace, - asset_names: List[str], - train_val_test: str = "train", - episode_length: int = 1000, - trading_cost: float = 0.001, - reward_type: str = "return", - ): + def __init__(self, hparams: Namespace): """ An environment for financial portfolio management. Args: hparams: - asset_names: - - train_val_test: - episode_length: - trading_cost: - reward_type: + """ - # prepare datafiles if necessary - hparams = RLData._pre_sanity_check(hparams) - hparams = Preprocessing().preprocess_augment_dfs(hparams) - self.hparams = Preprocessing().preprocess_train_dfs(hparams) - - # setup data and portfolio and other vars self.infos = [] - self.asset_names = self.hparams.asset_names - self.asset_n = len(self.asset_names) + self.hparams = hparams + self.asset_n = len(self.hparams.asset_names) self.feat_n = len(self.hparams.cols_to_model[npa.anchor]) self.data = RLData(self.hparams, train_val_test="train") - self.portfolio = RLPortfolio(self.asset_names, self.hparams.episode_length) + self.portfolio = RLPortfolio( + self.hparams.asset_names, self.hparams.max_episode_length + ) # setup openai gym env - include cash in the portfolio action space self.action_space = gym.spaces.Box( diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index 30b1e957..46cbc377 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -1,108 +1,128 @@ +""" +Pytorch Lightning module of Proximal Policy Optimization RL algorithm, taken and +modified from https://github.com/sid-sundrani/ppo_lightning. +""" +# pylint: disable=no-member +import logging from typing import List, Tuple +from argparse import Namespace -import pytorch_lightning as pl -from networks import create_mlp, ActorCriticAgent, ActorCategorical, ActorContinous -from data import ExperienceSourceDataset - +import gym import torch -from torch.utils.data import DataLoader -import torch.optim as optim -from torch.optim.optimizer import Optimizer import numpy as np +import torch.optim as optim +from torch.utils.data import DataLoader +from pytorch_lightning import LightningModule +from pytorch_lightning import Trainer, Callback, loggers +from pytorch_lightning.metrics import functional as plm +from pytorch_lightning.callbacks import ModelCheckpoint +from pytorch_lightning.trainer import seed_everything + + +from dagobert.naming import NRL, NPreprocessingArgs as npa +from dagobert.modelling.rl import ( + RLEnv, + create_mlp, + ActorCriticAgent, + ActorContinous, +) +from dagobert.modelling.dl import ( + ExperienceSourceDataset, + Preprocessing, + TCNLightning, +) + + +logger = logging.getLogger(__name__) + + +def run_rl(args): + # setup loggers + seed_everything(args.seed) + tb_logger_name = None + comet_name = args.exp_name + tcn_loggers = [] + tb_logger = loggers.TensorBoardLogger( + save_dir=Path(args.log_dir), name=args.exp_name, version=tb_logger_name + ) + tcn_loggers.append(tb_logger) + if not args.no_comet_logger: + tcn_loggers.append( + loggers.CometLogger( + api_key=NStudy.comet_api_key, + workspace=NStudy.comet_workspace, + save_dir=args.log_dir, + project_name=NStudy.comet_project_name, + experiment_name=f"{comet_name}_{tb_logger.version}", + ) + ) -try: - import gym -except ModuleNotFoundError: - _GYM_AVAILABLE = False -else: - _GYM_AVAILABLE = True - - -class PPO(pl.LightningModule): + # setup callbacks + checkpoint_callback = ModelCheckpoint( + monitor="loss/val", + filename="_{epoch:02d}_{loss_val:.10f}", + dirpath=f"{args.log_dir}/models/{args.exp_name}_{tb_logger.version}", + save_top_k=3, + mode="max", + ) + + # define trainer and and lightning module + args.multiprocessing = True if args.gpus != 1 else False + trainer = Trainer.from_argparse_args( + args, + logger=tcn_loggers, + checkpoint_callback=checkpoint_callback, + callbacks=[early_stop_callback, metrics_callback, LearningRateMonitor(),], + ) + model = PPO(args) + trainer.fit(model) + # trainer.test() + + # return the validation and test loss for Optuna mainly + try: + val_loss = metrics_callback.get_min_max_metric("loss/val") + test_loss = metrics_callback.get_min_max_metric("loss/test") + except: + val_loss, test_loss = np.nan, np.nan + return val_loss, test_loss + rld = RLData(args) + + +class PPO(LightningModule): """ - PyTorch Lightning implementation of `PPO - `_ - Paper authors: John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, Oleg Klimov - - Example: - model = PPO("CartPole-v0") - Train: - trainer = Trainer() - trainer.fit(model) - Note: - This example is based on: - https://github.com/openai/baselines/blob/master/baselines/ppo2/ppo2.py - https://github.com/PyTorchLightning/pytorch-lightning-bolts/blob/master/pl_bolts/models/rl/reinforce_model.py - + PyTorch Lightning implementation of PPO https://arxiv.org/abs/1707.06347 + The algorithm closely follows this: + https://github.com/openai/baselines/blob/master/baselines/ppo2/ppo2.py """ - def __init__( - self, - env: str, - gamma: float = 0.99, - lam: float = 0.95, - lr_actor: float = 3e-4, - lr_critic: float = 1e-3, - max_episode_len: float = 200, - batch_size: int = 512, - steps_per_epoch: int = 2048, - nb_optim_iters: int = 4, - clip_ratio: float = 0.2, - ) -> None: - + def __init__(self, hparams: Namespace): """ + Class constructor. + Args: - env: gym environment tag - gamma: discount factor - lam: advantage discount factor (lambda in the paper) - lr_actor: learning rate of actor network - lr_critic: learning rate of critic network - max_episode_len: maximum number interactions (actions) in an episode - batch_size: batch_size when training network- can simulate number of policy updates performed per epoch - steps_per_epoch: how many action-state pairs to rollout for trajectory collection per epoch - nb_optim_iters: how many steps of gradient descent to perform on each batch - clip_ratio: hyperparameter for clipping in the policy objective + hparams: Hyper-params passed in to the module. See the docs for more details + https://pytorch-lightning.readthedocs.io/en/latest/hyperparameters.html + and dagobert.modelling.rl.rl_args for more information on the params. """ super().__init__() - if not _GYM_AVAILABLE: - raise ModuleNotFoundError( - "This Module requires gym environment which is not installed yet." - ) + # sanity check and setup device + hparams = PPO._pre_sanity_check(hparams) + self.tcn_device = "cuda" if hparams.gpus > 0 else "cpu" - # Hyperparameters - self.lr_actor = lr_actor - self.lr_critic = lr_critic - self.steps_per_epoch = steps_per_epoch - self.nb_optim_iters = nb_optim_iters - self.batch_size = batch_size - self.gamma = gamma - self.lam = lam - self.max_episode_len = max_episode_len - self.clip_ratio = clip_ratio - self.save_hyperparameters() - - self.env = gym.make(env) - # value network + # prepare datafiles if necessary + hparams = Preprocessing().preprocess_augment_dfs(hparams) + self.hparams = Preprocessing().preprocess_train_dfs(hparams) + + # create env and policy/value networks + self.env = RLEnv(hparams) self.critic = create_mlp(self.env.observation_space.shape, 1) - # policy network (agent) - if type(self.env.action_space) == gym.spaces.box.Box: - act_dim = self.env.action_space.shape[0] - actor_mlp = create_mlp(self.env.observation_space.shape, act_dim) - self.actor = ActorContinous(actor_mlp, act_dim) - elif type(self.env.action_space) == gym.spaces.discrete.Discrete: - actor_mlp = create_mlp( - self.env.observation_space.shape, self.env.action_space.n - ) - self.actor = ActorCategorical(actor_mlp) - else: - raise NotImplementedError( - "Env action space should be of type Box (continous) or Discrete (categorical)" - "Got type: ", - type(self.env.action_space), - ) + act_dim = self.env.action_space.shape[0] + actor_mlp = create_mlp(self.env.observation_space.shape, act_dim) + self.actor = ActorContinous(actor_mlp, act_dim) self.agent = ActorCriticAgent(self.actor, self.critic) + # init self.batch_states = [] self.batch_actions = [] self.batch_adv = [] @@ -168,10 +188,10 @@ def calc_advantage( vals = values + [last_value] # GAE delta = [ - rews[i] + self.gamma * vals[i + 1] - vals[i] for i in range(len(rews) - 1) + rews[i] + self.hparams.gamma * vals[i + 1] - vals[i] + for i in range(len(rews) - 1) ] - adv = self.discount_rewards(delta, self.gamma * self.lam) - + adv = self.discount_rewards(delta, self.hparams.gamma * self.hparams.lam) return adv def train_batch( @@ -183,7 +203,7 @@ def train_batch( Tuple of Lists containing tensors for states, actions, log probs, qvals and advantage """ - for step in range(self.steps_per_epoch): + for step in range(self.hparams.steps_per_epoch): pi, action, log_prob, value = self.agent(self.state, self.device) next_state, reward, done, _ = self.env.step(action.cpu().numpy()) @@ -196,8 +216,8 @@ def train_batch( self.state = torch.FloatTensor(next_state) - epoch_end = step == (self.steps_per_epoch - 1) - terminal = len(self.ep_rewards) == self.max_episode_len + epoch_end = step == (self.hparams.steps_per_epoch - 1) + terminal = len(self.ep_rewards) == self.hparams.max_episode_length if epoch_end or done or terminal: # if trajectory ends abtruptly, boostrap value of next state @@ -210,7 +230,7 @@ def train_batch( # discounted cumulative reward self.batch_qvals += self.discount_rewards( - self.ep_rewards + [last_value], self.gamma + self.ep_rewards + [last_value], self.hparams.gamma )[:-1] # advantage self.batch_adv += self.calc_advantage( @@ -316,14 +336,16 @@ def training_step( return loss_critic - def configure_optimizers(self) -> List[Optimizer]: + def configure_optimizers(self) -> List[optim.Optimizer]: """ Initialize Adam optimizer""" - optimizer_actor = optim.Adam(self.actor.parameters(), lr=self.lr_actor) - optimizer_critic = optim.Adam(self.critic.parameters(), lr=self.lr_critic) + optimizer_actor = optim.Adam(self.actor.parameters(), lr=self.hparams.lr_actor) + optimizer_critic = optim.Adam( + self.critic.parameters(), lr=self.hparams.lr_critic + ) # to run multple steps of gradient descent optimizers = [] - for i in range(self.nb_optim_iters): + for i in range(self.hparams.n_optim_iters): optimizers.append(optimizer_actor) optimizers.append(optimizer_critic) @@ -332,9 +354,25 @@ def configure_optimizers(self) -> List[Optimizer]: def _dataloader(self) -> DataLoader: """Initialize the Replay Buffer dataset used for retrieving experiences""" dataset = ExperienceSourceDataset(self.train_batch) - dataloader = DataLoader(dataset=dataset, batch_size=self.batch_size) + dataloader = DataLoader(dataset=dataset, batch_size=self.hparams.batch_size) return dataloader def train_dataloader(self) -> DataLoader: """Get train loader""" return self._dataloader() + + @staticmethod + def _pre_sanity_check(hparams: Namespace): + # ensure we have the rl specific target column in the config + if hparams.target_col != NRL.rl_return: + raise ValueError("target_col has to be rl_return for RL tasks.") + + # make sure we have the same cols for each instrument + # this helps to have an environment with a single tensor as state + if len(hparams.cols_to_model) > 1: + for df_name, cols in hparams.cols_to_model.items(): + hparams.cols_to_model[df_name] = hparams.cols_to_model[npa.anchor] + + # MINI SERIES / LOOKBACK + hparams = TCNLightning._pre_sanity_check_mini_series_lookback(hparams) + return hparams diff --git a/src/dagobert/modelling/rl/rl.py b/src/dagobert/modelling/rl/rl.py deleted file mode 100644 index 35f465d9..00000000 --- a/src/dagobert/modelling/rl/rl.py +++ /dev/null @@ -1,6 +0,0 @@ -from dagobert.modelling.rl import RLEnv, RLData - - -def run_rl(args): - rld = RLData(args) - # env = RLEnv(args) diff --git a/src/dagobert/modelling/rl/rl_args.py b/src/dagobert/modelling/rl/rl_args.py new file mode 100644 index 00000000..01d2f9dd --- /dev/null +++ b/src/dagobert/modelling/rl/rl_args.py @@ -0,0 +1,177 @@ +""" +All custom arguments and hyper-parameters for the reinforcement learning module. +""" + +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter + +from pytorch_lightning import Trainer + +from dagobert.modelling.dl.tcn import TCNLightning +from dagobert.modelling.dl.tcn_args import ( + add_run_specific_args, + add_data_specific_args, + add_preprocessing_specific_args, +) +from dagobert.naming import ( + NInputDataCols, + NAugmentationMethods, + NBarriers, + NPreprocessingArgs, +) + + +def add_rl_specific_args(parent_parser): + parser = ArgumentParser( + parents=[parent_parser], + add_help=False, + formatter_class=ArgumentDefaultsHelpFormatter, + ) + + # this is just a place-holder so it's easier to read the million params in the cmd + parser.add_argument("--RL_PARAMS", help="====================================") + parser.add_argument( + "--asset_names", + type=str, + nargs="+", + default=["BTC", "ETH"], + help=( + "Names of instruments to include in the portfolio, corresponding to " + "anchor, df2, df3, etc." + ), + ) + parser.add_argument( + "--trading_cost", + type=float, + default=0.002, + help="Commission rate of making trades + an estimated cost of slippage.", + ) + parser.add_argument( + "--reward_type", + type=str, + default="return", + help=( + "Determines the overall reward to maximise by the agent. Either return or " + "sharpe. See RLPortfolio class for more details." + ), + ) + parser.add_argument( + "--max_episode_length", + type=int, + default=1000, + help=( + "Maximum number of interactions between the agent and the environment in " + "an episode." + ), + ) + parser.add_argument( + "--steps_per_epoch", + type=int, + default=10000, + help=( + "How many action-state pairs to rollout for trajectory collection per " + "epoch. I.e. if all episodes run to their max_episode_length, we'll have " + "steps_per_epoch/max_episode_length number of unique episodes/trajectories." + ), + ) + parser.add_argument( + "--n_optim_iters", + type=int, + default=4, + help=( + "How many steps of gradient descent to perform on each batch. This might " + "seem weird, but it helps sampling efficiency, done by the original PPO " + "implementation and the Google ablation study found it to be useful." + ), + ) + parser.add_argument( + "--gamma", type=float, default=0.99, help="Discounting of rewards." + ) + parser.add_argument( + "--lam", + type=float, + default=0.95, + help="Lambda parameter in the advantage discounting equation.", + ) + parser.add_argument( + "--lr_actor", + type=float, + default=0.0003, + help="Learning rate for the actor/policy network.", + ) + parser.add_argument( + "--lr_critic", + type=float, + default=0.001, + help="Learning rate for the critic/value network.", + ) + parser.add_argument( + "--clip_ratio", + type=float, + default=0.2, + help="Clipping parameter for the PPO's policy upgrade cost function.", + ) + + return parser + + +def add_model_specific_args(parent_parser): + parser = ArgumentParser( + parents=[parent_parser], + add_help=False, + formatter_class=ArgumentDefaultsHelpFormatter, + ) + + # this is just a place-holder so it's easier to read the million params in the cmd + parser.add_argument("--MODEL_PARAMS", help="====================================") + parser.add_argument( + "--actor_num_channels", + type=int, + nargs="+", + default=[50, 50, 50, 50, 50], + help=( + "Determines the number of layers (depth) of the actor / policy network and " + "the hidden unit count in each layer." + ), + ) + parser.add_argument( + "--critic_num_channels", + type=int, + nargs="+", + default=[50, 50, 50, 50, 50], + help=( + "Determines the number of layers (depth) of the critic / value network and " + "the hidden unit count in each layer." + ), + ) + parser.add_argument("--actor_kernel_size", type=int, default=5, help=" ") + parser.add_argument("--critic_kernel_size", type=int, default=5, help=" ") + parser.add_argument("--actor_dropout", type=float, default=0, help=" ") + parser.add_argument("--critic_dropout", type=float, default=0, help=" ") + parser.add_argument( + "--no_class_weights", + action="store_true", + help=( + "Set this to True so we can leverage the Preprocessing pipeline written " + "for the supervised DL module." + ), + ) + + return parser + + +def get_all_args(): + parser = ArgumentParser( + description="Lightning RL module", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + + # add model params of lightning trainer (this HAS to be first) + parser = Trainer.add_argparse_args(parser) + + # add model and run specific params + parser = add_rl_specific_args(parser) + parser = add_model_specific_args(parser) + parser = add_run_specific_args(parser) + parser = add_data_specific_args(parser) + parser = add_preprocessing_specific_args(parser) + return parser.parse_args() diff --git a/src/dagobert/modelling/rl/rl_runner.py b/src/dagobert/modelling/rl/rl_runner.py index 8a777213..d7d23845 100644 --- a/src/dagobert/modelling/rl/rl_runner.py +++ b/src/dagobert/modelling/rl/rl_runner.py @@ -3,15 +3,15 @@ This module is driven by the `dagobert-rl` command which can be parametrised by command line arguments, but it's much more convenient to use YAML configs for this, -see the `tcn_args.py` for more detail. +see the `tcn_args.py` and `rl_args.py` for more detail. """ import logging from pathlib import Path from dagobert.utils import setup_logging from dagobert.runner_utils import load_config, update_args -from dagobert.modelling.dl.tcn_args import get_all_args -from dagobert.modelling.rl.rl import run_rl +from dagobert.modelling.rl.rl_args import get_all_args +from dagobert.modelling.rl import run_rl logger = logging.getLogger(__name__) @@ -19,7 +19,7 @@ def run(): """ - Run a single TCN training or parallelized hyper parameter tuning study using optuna. + Initialise a reinforcement-learning environment and a PPO agent and train it. """ # parse arguments amd setup logging From d35e2cafe2b5983c04769d915cdeefff66127cc1 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Fri, 1 Jan 2021 13:49:33 +0000 Subject: [PATCH 08/62] adding rl args and setting up the runner properly, time to go through the networks and the actual algo --- .pre-commit-config.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 25d9788a..9decdab1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,5 +2,4 @@ repos: - repo: https://github.com/ambv/black rev: 20.8b1 hooks: - - id: black - language_version: python3.7 \ No newline at end of file + - id: black \ No newline at end of file From d6f8fd3c0758788e5a7ad16c2c579667dc50c78f Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Fri, 1 Jan 2021 18:07:52 +0000 Subject: [PATCH 09/62] going through ppo and networks to make it work with TCNs --- .pre-commit-config.yaml | 3 +- config/rl_config.yaml | 1 + src/dagobert/modelling/rl/__init__.py | 2 +- src/dagobert/modelling/rl/environment.py | 9 +-- src/dagobert/modelling/rl/networks.py | 82 ++++++++++++------------ src/dagobert/modelling/rl/ppo.py | 62 +++++++++--------- src/dagobert/modelling/rl/rl_args.py | 11 +++- 7 files changed, 93 insertions(+), 77 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9decdab1..25d9788a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,4 +2,5 @@ repos: - repo: https://github.com/ambv/black rev: 20.8b1 hooks: - - id: black \ No newline at end of file + - id: black + language_version: python3.7 \ No newline at end of file diff --git a/config/rl_config.yaml b/config/rl_config.yaml index dae94a58..6671c217 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -60,6 +60,7 @@ actor_dropout: 0.25 critic_num_channels: [50, 50, 50, 50, 50] critic_kernel_size: 5 critic_dropout: 0.25 +use_last_timepoint: True # -------------------------------------------------------------------------------------- # DATA diff --git a/src/dagobert/modelling/rl/__init__.py b/src/dagobert/modelling/rl/__init__.py index d848e772..4df05a47 100644 --- a/src/dagobert/modelling/rl/__init__.py +++ b/src/dagobert/modelling/rl/__init__.py @@ -1,3 +1,3 @@ from .environment import RLData, RLPortfolio, RLEnv -from .networks import create_mlp, ActorCriticAgent, ActorContinous +from .networks import build_tcn, ActorCriticAgent, ActorContinous from .ppo import PPO, run_rl diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index dd71b9c2..0405a6c7 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -27,7 +27,9 @@ class RLData(object): """ def __init__( - self, hparams: Namespace, train_val_test: str = "train", + self, + hparams: Namespace, + train_val_test: str = "train", ): """ Class constructor. @@ -38,7 +40,6 @@ def __init__( train_val_test: Whether we are training, validating or testing, it must be either train, val or test. """ - self.idx = 0 self.hparams = hparams if train_val_test == "train": @@ -59,7 +60,7 @@ def __init__( augment_dfs_mix=self.hparams.augment_dfs_mix, ) self.dataset_len = len(self.dataset) - self.reset() + self.idx = np.random.randint(self.dataset_len - self.hparams.max_episode_length) def step(self): Xs, ys = self.dataset[self.idx] @@ -195,7 +196,7 @@ def __init__(self, hparams: Namespace): Args: hparams: - + """ self.infos = [] self.hparams = hparams diff --git a/src/dagobert/modelling/rl/networks.py b/src/dagobert/modelling/rl/networks.py index 4e2745b6..c45824cb 100644 --- a/src/dagobert/modelling/rl/networks.py +++ b/src/dagobert/modelling/rl/networks.py @@ -1,60 +1,60 @@ +# pylint: disable=no-member +from argparse import Namespace from typing import Union, Tuple import torch from torch import nn from torch.distributions import Categorical, Normal +from dagobert.modelling.dl import TemporalConvNet -def create_mlp(input_shape: Tuple[int], n_actions: int, hidden_sizes: list = [64, 64]): - """ - Simple Multi-Layer Perceptron network - """ - net_layers = [] - net_layers.append(nn.Linear(input_shape[0], hidden_sizes[0])) - net_layers.append(nn.ReLU()) - - for i in range(len(hidden_sizes) - 1): - net_layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i + 1])) - net_layers.append(nn.ReLU()) - net_layers.append(nn.Linear(hidden_sizes[-1], n_actions)) - - return nn.Sequential(*net_layers) - -class ActorCategorical(nn.Module): +class ActorCriticTCN(nn.Module): """ - Policy network, for discrete action spaces, which returns a distribution - and an action given an observation + Creates either the actor/policy or the critic/value network as a TCN net, followed + by the appropriate linear layers. """ - def __init__(self, actor_net): + def __init__( + self, hparams: Namespace, n_actions: int, actor: bool = True + ) -> nn.Module: """ - Args: - input_shape: observation shape of the environment - n_actions: number of discrete actions available in the environment - """ - super().__init__() - - self.actor_net = actor_net + Init a TCN like we do in `dagobert.modelling.dl.tcn`. - def forward(self, states): - logits = self.actor_net(states) - pi = Categorical(logits=logits) - actions = pi.sample() - - return pi, actions - - def get_log_prob(self, pi: Categorical, actions: torch.Tensor): - """ - Takes in a distribution and actions and returns log prob of actions - under the distribution Args: - pi: torch distribution - actions: actions taken by distribution + hparams: Hparam parsed and updated by PPO module in dagobert.modelling.rl. + n_actions: Number of units at the end of the network: different for actor/critic + actor: If True, we are using the network params in hparams for the actor net, + else we take the params for the critic. + Returns: - log probability of the acition under pi + Initiated TCN with the appropriate size for actor or critic. """ - return pi.log_prob(actions) + self.hparams = hparams + num_inputs = [len(cols) for dataset, cols in hparams.cols_to_model.items()] + num_channels = ( + hparams.actor_num_channels if actor else hparams.critic_num_channels + ) + kernel_size = hparams.actor_kernel_size if actor else hparams.critic_kernel_size + dropout = hparams.actor_dropout if actor else hparams.critic_dropout + self.tcn = TemporalConvNet( + num_inputs=num_inputs, + num_channels=num_channels, + kernel_size=kernel_size, + dropout=dropout, + time_feat_n=hparams.time_feat_n, + time_embed_dim=hparams.time_embed_dim, + ) + self.linear1 = nn.Linear(hparams.mini_series_length, 1) + self.linear2 = nn.Linear(num_channels[-1], hparams.n_actions) + + def forward(self, *x): + y1 = self.tcn(*x) + if self.hparams.use_last_timepoint: + return self.linear2(y1[:, :, -1]) + else: + y2 = nn.functional.relu(self.linear1(y1).squeeze(-1)) + return self.linear2(y2) class ActorContinous(nn.Module): diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index 46cbc377..dc51ccd9 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -4,6 +4,7 @@ """ # pylint: disable=no-member import logging +from pathlib import Path from typing import List, Tuple from argparse import Namespace @@ -19,12 +20,12 @@ from pytorch_lightning.trainer import seed_everything -from dagobert.naming import NRL, NPreprocessingArgs as npa +from dagobert.naming import NRL, NStudy, NPreprocessingArgs as npa from dagobert.modelling.rl import ( RLEnv, - create_mlp, - ActorCriticAgent, + build_tcn, ActorContinous, + ActorCriticAgent, ) from dagobert.modelling.dl import ( ExperienceSourceDataset, @@ -72,21 +73,11 @@ def run_rl(args): args, logger=tcn_loggers, checkpoint_callback=checkpoint_callback, - callbacks=[early_stop_callback, metrics_callback, LearningRateMonitor(),], ) model = PPO(args) trainer.fit(model) # trainer.test() - # return the validation and test loss for Optuna mainly - try: - val_loss = metrics_callback.get_min_max_metric("loss/val") - test_loss = metrics_callback.get_min_max_metric("loss/test") - except: - val_loss, test_loss = np.nan, np.nan - return val_loss, test_loss - rld = RLData(args) - class PPO(LightningModule): """ @@ -116,10 +107,9 @@ def __init__(self, hparams: Namespace): # create env and policy/value networks self.env = RLEnv(hparams) - self.critic = create_mlp(self.env.observation_space.shape, 1) + self.critic = build_tcn(hparams, 1, actor=False) act_dim = self.env.action_space.shape[0] - actor_mlp = create_mlp(self.env.observation_space.shape, act_dim) - self.actor = ActorContinous(actor_mlp, act_dim) + self.actor = ActorContinous(build_tcn(hparams, act_dim), act_dim) self.agent = ActorCriticAgent(self.actor, self.critic) # init @@ -144,9 +134,11 @@ def forward( self, x: torch.Tensor ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ - Passes in a state x through the network and returns the policy and a sampled action + Passes state x through the network and returns the policy and a sampled action. + Args: x: environment state + Returns: Tuple of policy and action """ @@ -156,9 +148,12 @@ def forward( return pi, action, value def discount_rewards(self, rewards: List[float], discount: float) -> List[float]: - """Calculate the discounted rewards of all rewards in list + """ + Calculate the discounted rewards of all rewards in list. + Args: rewards: list of rewards/advantages + Returns: list of discounted rewards/advantages """ @@ -176,11 +171,14 @@ def discount_rewards(self, rewards: List[float], discount: float) -> List[float] def calc_advantage( self, rewards: List[float], values: List[float], last_value: float ) -> List[float]: - """Calculate the advantage given rewards, state values, and the last value of episode + """ + Calculate the advantage given rewards, state values, and last value of episode. + Args: rewards: list of episode rewards values: list of state values from critic last_value: value of last state of episode + Returns: list of advantages """ @@ -196,16 +194,24 @@ def calc_advantage( def train_batch( self, - ) -> Tuple[List[torch.Tensor], List[torch.Tensor], List[torch.Tensor]]: + ) -> Tuple[ + List[torch.Tensor], + List[torch.Tensor], + List[torch.Tensor], + List[torch.Tensor], + List[torch.Tensor], + ]: """ - Contains the logic for generating trajectory data to train policy and value network + Logic for generating trajectory data to train policy and value network + Yield: - Tuple of Lists containing tensors for states, actions, log probs, qvals and advantage + Tuple of Lists containing tensors for states, actions, log probs, qvals and + advantage. """ for step in range(self.hparams.steps_per_epoch): pi, action, log_prob, value = self.agent(self.state, self.device) - next_state, reward, done, _ = self.env.step(action.cpu().numpy()) + next_state, reward, done, info = self.env.step(action.cpu().numpy()) self.batch_states.append(self.state) self.batch_actions.append(action) @@ -286,7 +292,8 @@ def training_step( self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx, optimizer_idx ): """ - Carries out a single update to actor and critic network from a batch of replay buffer. + Carries out a n_optim_iter number of updates to actor and critic network from a + batch of replay buffer. Args: batch: batch of replay buffer/trajectory data @@ -320,7 +327,6 @@ def training_step( prog_bar=True, logger=True, ) - return loss_actor else: @@ -333,7 +339,6 @@ def training_step( prog_bar=False, logger=True, ) - return loss_critic def configure_optimizers(self) -> List[optim.Optimizer]: @@ -343,12 +348,11 @@ def configure_optimizers(self) -> List[optim.Optimizer]: self.critic.parameters(), lr=self.hparams.lr_critic ) - # to run multple steps of gradient descent + # workaround to run multple steps of gradient descent within LightningModule optimizers = [] - for i in range(self.hparams.n_optim_iters): + for _ in range(self.hparams.n_optim_iters): optimizers.append(optimizer_actor) optimizers.append(optimizer_critic) - return optimizers def _dataloader(self) -> DataLoader: diff --git a/src/dagobert/modelling/rl/rl_args.py b/src/dagobert/modelling/rl/rl_args.py index 01d2f9dd..32853ad6 100644 --- a/src/dagobert/modelling/rl/rl_args.py +++ b/src/dagobert/modelling/rl/rl_args.py @@ -155,7 +155,16 @@ def add_model_specific_args(parent_parser): "for the supervised DL module." ), ) - + parser.add_argument( + "--use_last_timepoint", + action="store_true", + help=( + "If this flag is used the only the network's representation " + "corresponding at the latest time-point is used to predict the outcome." + "By default, we combine all representations across the sequence length" + "to make a prediction from, instead of just using the last one." + ), + ) return parser From 67f84a33c66719e1934af1fdcf823749632c3fd1 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Fri, 1 Jan 2021 19:20:40 +0000 Subject: [PATCH 10/62] trying to get the tensor dims right to pass data through the TCN - not working yet --- config/rl_config.yaml | 2 +- src/dagobert/modelling/dl/tcn.py | 39 ++++++++++++++------ src/dagobert/modelling/rl/__init__.py | 4 +-- src/dagobert/modelling/rl/environment.py | 6 ++-- src/dagobert/modelling/rl/networks.py | 7 ++-- src/dagobert/modelling/rl/ppo.py | 45 +++++++++++++----------- src/dagobert/modelling/rl/rl_runner.py | 2 +- 7 files changed, 64 insertions(+), 41 deletions(-) diff --git a/config/rl_config.yaml b/config/rl_config.yaml index 6671c217..a651a58a 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -7,7 +7,7 @@ gpus: 1 pin_memory: True profiler: True -val_check_interval: 0.5 +#val_check_interval: 0.5 # enable it with 'power' or 'binsearch' auto_scale_batch_size: #precision: 16 diff --git a/src/dagobert/modelling/dl/tcn.py b/src/dagobert/modelling/dl/tcn.py index 6aacde87..8d0e2cc3 100644 --- a/src/dagobert/modelling/dl/tcn.py +++ b/src/dagobert/modelling/dl/tcn.py @@ -130,7 +130,11 @@ def setup_and_run_tcn_lightning(args: Namespace, study: bool = False): args, logger=tcn_loggers, checkpoint_callback=checkpoint_callback, - callbacks=[early_stop_callback, metrics_callback, LearningRateMonitor(),], + callbacks=[ + early_stop_callback, + metrics_callback, + LearningRateMonitor(), + ], ) model = TCNLightning(args) @@ -208,7 +212,8 @@ def __init__(self, hparams: Namespace): if self.hparams.mix_density_net: self.linear_mu = nn.Linear(self.hparams.num_channels[-1], self.density_num) self.linear_sigmasq = nn.Linear( - self.hparams.num_channels[-1], self.density_num, + self.hparams.num_channels[-1], + self.density_num, ) self.linear_mix = nn.Linear(self.hparams.num_channels[-1], self.density_num) self = self.float() @@ -661,14 +666,27 @@ def _pre_sanity_check(hparams: Namespace) -> Namespace: @staticmethod def _pre_sanity_check_mini_series_lookback(hparams: Namespace) -> Namespace: - """Calculate lookback and mini_series_length if necessary.""" - net_depth = len(hparams.num_channels) - k_size = hparams.kernel_size - max_seq_len = TemporalConvNet.get_tcn_receptive_field_size(k_size, net_depth) - logger.info( - f"A TCN with kernel size: {k_size} and depth: {net_depth} has a receptive " - f"field (can read a maximum sequence length) of {max_seq_len}." - ) + """ + Calculate lookback and mini_series_length if necessary. + + Note, this works for both DL and RL (with two simultaneously trained nets). + However, for RL, we use the actor network's params to set the mini_series_len. + """ + + cases = ["", "critic_", "actor_"] + for case in cases: + num_channels = f"{case}num_channels" + k_size = f"{case}kernel_size" + if num_channels in hparams: + net_depth = len(hparams.__getattribute__(num_channels)) + k_size = hparams.__getattribute__(k_size) + max_seq_len = TemporalConvNet.get_tcn_receptive_field_size( + k_size, net_depth + ) + logger.info( + f"A {case}TCN with kernel size: {k_size} and depth: {net_depth} " + f"can read a maximum sequence length of {max_seq_len}." + ) if hparams.mini_series_length == "auto": logger.info(f"We set mini_series_length from 'auto' to {max_seq_len}.") hparams.mini_series_length = max_seq_len @@ -687,6 +705,7 @@ def _pre_sanity_check_mini_series_lookback(hparams: Namespace) -> Namespace: f"The current mini_series_legnth {hparams.mini_series_length}, " f"corresponds to an estimated lookback of {hparams.lookback} hours." ) + return hparams def _sanity_check(self): """ diff --git a/src/dagobert/modelling/rl/__init__.py b/src/dagobert/modelling/rl/__init__.py index 4df05a47..d4900664 100644 --- a/src/dagobert/modelling/rl/__init__.py +++ b/src/dagobert/modelling/rl/__init__.py @@ -1,3 +1,3 @@ from .environment import RLData, RLPortfolio, RLEnv -from .networks import build_tcn, ActorCriticAgent, ActorContinous -from .ppo import PPO, run_rl +from .networks import ActorCriticTCN, ActorCriticAgent, ActorContinous +from .ppo import PPO diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index 0405a6c7..a969b2ae 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -6,9 +6,8 @@ from argparse import Namespace import gym +import torch import numpy as np -import pandas as pd -import matplotlib.pyplot as plt from torch.utils.data import Dataset, DataLoader from dagobert.naming import NPreprocessingArgs as npa @@ -65,6 +64,7 @@ def __init__( def step(self): Xs, ys = self.dataset[self.idx] y1 = np.concatenate([[1.0], ys]) + Xs = [torch.Tensor(x).unsqueeze(0) for x in Xs] episode_full = self.idx == self.hparams.max_episode_length - 1 done = True if episode_full else False self.idx += 1 @@ -209,7 +209,7 @@ def __init__(self, hparams: Namespace): # setup openai gym env - include cash in the portfolio action space self.action_space = gym.spaces.Box( - 0.0, 1.0, shape=(self.asset_n + 1), dtype=np.float32 + 0.0, 1.0, shape=(self.asset_n + 1,), dtype=np.float32 ) # get the observation space from the data min and max diff --git a/src/dagobert/modelling/rl/networks.py b/src/dagobert/modelling/rl/networks.py index c45824cb..16c18d3b 100644 --- a/src/dagobert/modelling/rl/networks.py +++ b/src/dagobert/modelling/rl/networks.py @@ -30,6 +30,7 @@ def __init__( Returns: Initiated TCN with the appropriate size for actor or critic. """ + super().__init__() self.hparams = hparams num_inputs = [len(cols) for dataset, cols in hparams.cols_to_model.items()] num_channels = ( @@ -46,9 +47,9 @@ def __init__( time_embed_dim=hparams.time_embed_dim, ) self.linear1 = nn.Linear(hparams.mini_series_length, 1) - self.linear2 = nn.Linear(num_channels[-1], hparams.n_actions) + self.linear2 = nn.Linear(num_channels[-1], n_actions) - def forward(self, *x): + def forward(self, x): y1 = self.tcn(*x) if self.hparams.use_last_timepoint: return self.linear2(y1[:, :, -1]) @@ -119,7 +120,7 @@ def __call__(self, state: torch.Tensor, device: str) -> Tuple: torch dsitribution and randomly sampled action """ - state = state.to(device=device) + state = [s.to(device=device) for s in state] pi, actions = self.actor_net(state) log_p = self.get_log_prob(pi, actions) diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index dc51ccd9..d0fa0c90 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -4,6 +4,7 @@ """ # pylint: disable=no-member import logging +from copy import deepcopy from pathlib import Path from typing import List, Tuple from argparse import Namespace @@ -23,7 +24,7 @@ from dagobert.naming import NRL, NStudy, NPreprocessingArgs as npa from dagobert.modelling.rl import ( RLEnv, - build_tcn, + ActorCriticTCN, ActorContinous, ActorCriticAgent, ) @@ -105,14 +106,17 @@ def __init__(self, hparams: Namespace): hparams = Preprocessing().preprocess_augment_dfs(hparams) self.hparams = Preprocessing().preprocess_train_dfs(hparams) - # create env and policy/value networks + # create env, init starting state and policy/value networks self.env = RLEnv(hparams) - self.critic = build_tcn(hparams, 1, actor=False) + first_Xs = self.env.reset() + self.state = [torch.FloatTensor(x) for x in first_Xs] + + self.critic = ActorCriticTCN(hparams, 1, actor=False) act_dim = self.env.action_space.shape[0] - self.actor = ActorContinous(build_tcn(hparams, act_dim), act_dim) + self.actor = ActorContinous(ActorCriticTCN(hparams, act_dim), act_dim) self.agent = ActorCriticAgent(self.actor, self.critic) - # init + # init batching and progress tracking vars self.batch_states = [] self.batch_actions = [] self.batch_adv = [] @@ -128,24 +132,22 @@ def __init__(self, hparams: Namespace): self.avg_ep_len = 0 self.avg_reward = 0 - self.state = torch.FloatTensor(self.env.reset()) - - def forward( - self, x: torch.Tensor - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Passes state x through the network and returns the policy and a sampled action. + # def forward( + # self, x: torch.Tensor + # ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + # """ + # Passes state x through the network and returns the policy and a sampled action. - Args: - x: environment state + # Args: + # x: environment state - Returns: - Tuple of policy and action - """ - pi, action = self.actor(x) - value = self.critic(x) + # Returns: + # Tuple of policy and action + # """ + # pi, action = self.actor(*x) + # value = self.critic(*x) - return pi, action, value + # return pi, action, value def discount_rewards(self, rewards: List[float], discount: float) -> List[float]: """ @@ -374,8 +376,9 @@ def _pre_sanity_check(hparams: Namespace): # make sure we have the same cols for each instrument # this helps to have an environment with a single tensor as state if len(hparams.cols_to_model) > 1: + anchor_cols = deepcopy(hparams.cols_to_model[npa.anchor]) for df_name, cols in hparams.cols_to_model.items(): - hparams.cols_to_model[df_name] = hparams.cols_to_model[npa.anchor] + hparams.cols_to_model[df_name] = anchor_cols # MINI SERIES / LOOKBACK hparams = TCNLightning._pre_sanity_check_mini_series_lookback(hparams) diff --git a/src/dagobert/modelling/rl/rl_runner.py b/src/dagobert/modelling/rl/rl_runner.py index d7d23845..5b3b9d1c 100644 --- a/src/dagobert/modelling/rl/rl_runner.py +++ b/src/dagobert/modelling/rl/rl_runner.py @@ -11,7 +11,7 @@ from dagobert.utils import setup_logging from dagobert.runner_utils import load_config, update_args from dagobert.modelling.rl.rl_args import get_all_args -from dagobert.modelling.rl import run_rl +from dagobert.modelling.rl.ppo import run_rl logger = logging.getLogger(__name__) From 85c8c9f8733743cb5a5729a1c3fb3f4e868bbcb6 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Sat, 2 Jan 2021 12:58:30 +0000 Subject: [PATCH 11/62] solved the data feeding problem, now onto making the env work --- config/rl_config.yaml | 7 +- src/dagobert/modelling/dl/data.py | 10 +-- src/dagobert/modelling/dl/tcn.py | 101 +++++++++++------------ src/dagobert/modelling/dl/tcn_net.py | 9 +- src/dagobert/modelling/rl/environment.py | 9 +- src/dagobert/modelling/rl/ppo.py | 30 +++---- 6 files changed, 74 insertions(+), 92 deletions(-) diff --git a/config/rl_config.yaml b/config/rl_config.yaml index a651a58a..b56cb761 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -4,7 +4,7 @@ # LIGHTNING # -------------------------------------------------------------------------------------- -gpus: 1 +gpus: 0 pin_memory: True profiler: True #val_check_interval: 0.5 @@ -66,7 +66,8 @@ use_last_timepoint: True # DATA # -------------------------------------------------------------------------------------- -data_dir: "C:/Work/dagobert/data/modelling" +#data_dir: "C:/Work/dagobert/data/modelling" +data_dir: "/home/daniel/dagobert_data/modelling" lookback: auto mini_series_length: auto @@ -121,7 +122,7 @@ cols_to_model: - mdi_60 - vr_60 df2: - # the cols of the secondary DFs will automatically be set to anchor's + # the cols of the secondary DFs will automatically be set to anchor's if not defined time_feat_n: 1 time_embed_dim: 12 diff --git a/src/dagobert/modelling/dl/data.py b/src/dagobert/modelling/dl/data.py index 1db71a2b..46748dd3 100644 --- a/src/dagobert/modelling/dl/data.py +++ b/src/dagobert/modelling/dl/data.py @@ -198,9 +198,7 @@ def __getitem__(self, idx): # FUNCTIONS FOR SETUP # ---------------------------------------------------------------------------------- - def _load_df_anchor( - self, - ) -> pd.DatetimeIndex: + def _load_df_anchor(self,) -> pd.DatetimeIndex: """ Loads the anchor DF, and returns it. We use the anchor df for plotting and to extract the master index which we measure everything else against in batching. @@ -576,10 +574,10 @@ class PortfolioCryptoDataset(CryptoDataset): """ def __init__(self, *args, **kw): - super().__init__(*args, **kw) # for each instrument, we add the rl_return target col to their cols_to_model - for df_name, cols in self.cols_to_model.items(): - self.cols_to_model[df_name].append(NRL.rl_return) + for df_name, _ in kw[npa.cols_to_model].items(): + kw[npa.cols_to_model][df_name].append(NRL.rl_return) + super().__init__(*args, **kw) def __getitem__(self, idx): """ diff --git a/src/dagobert/modelling/dl/tcn.py b/src/dagobert/modelling/dl/tcn.py index 8d0e2cc3..35e858ad 100644 --- a/src/dagobert/modelling/dl/tcn.py +++ b/src/dagobert/modelling/dl/tcn.py @@ -130,11 +130,7 @@ def setup_and_run_tcn_lightning(args: Namespace, study: bool = False): args, logger=tcn_loggers, checkpoint_callback=checkpoint_callback, - callbacks=[ - early_stop_callback, - metrics_callback, - LearningRateMonitor(), - ], + callbacks=[early_stop_callback, metrics_callback, LearningRateMonitor(),], ) model = TCNLightning(args) @@ -212,8 +208,7 @@ def __init__(self, hparams: Namespace): if self.hparams.mix_density_net: self.linear_mu = nn.Linear(self.hparams.num_channels[-1], self.density_num) self.linear_sigmasq = nn.Linear( - self.hparams.num_channels[-1], - self.density_num, + self.hparams.num_channels[-1], self.density_num, ) self.linear_mix = nn.Linear(self.hparams.num_channels[-1], self.density_num) self = self.float() @@ -654,9 +649,6 @@ def _pre_sanity_check(hparams: Namespace) -> Namespace: "Classification is not applicable with mixed density nets" ) - # MINI SERIES / LOOKBACK - hparams = TCNLightning._pre_sanity_check_mini_series_lookback(hparams) - # ETC if hparams.augment_dfs and npa.anchor not in hparams.augment_dfs.keys(): raise ValueError( @@ -664,49 +656,6 @@ def _pre_sanity_check(hparams: Namespace) -> Namespace: ) return hparams - @staticmethod - def _pre_sanity_check_mini_series_lookback(hparams: Namespace) -> Namespace: - """ - Calculate lookback and mini_series_length if necessary. - - Note, this works for both DL and RL (with two simultaneously trained nets). - However, for RL, we use the actor network's params to set the mini_series_len. - """ - - cases = ["", "critic_", "actor_"] - for case in cases: - num_channels = f"{case}num_channels" - k_size = f"{case}kernel_size" - if num_channels in hparams: - net_depth = len(hparams.__getattribute__(num_channels)) - k_size = hparams.__getattribute__(k_size) - max_seq_len = TemporalConvNet.get_tcn_receptive_field_size( - k_size, net_depth - ) - logger.info( - f"A {case}TCN with kernel size: {k_size} and depth: {net_depth} " - f"can read a maximum sequence length of {max_seq_len}." - ) - if hparams.mini_series_length == "auto": - logger.info(f"We set mini_series_length from 'auto' to {max_seq_len}.") - hparams.mini_series_length = max_seq_len - if ( - hparams.mini_series_length != "auto" - and hparams.mini_series_length > max_seq_len - ): - logger.warning( - f"Provided mini-series length: {hparams.mini_series_length} is " - f"larger than the networks receptive field size: {max_seq_len}." - ) - # calcualte what the current TCN setup corresponds to in hourly lookback - df_anchor = TCNLightning._load_anchor(hparams) - hparams.lookback = update_lookback(df_anchor, hparams.mini_series_length) - logger.info( - f"The current mini_series_legnth {hparams.mini_series_length}, " - f"corresponds to an estimated lookback of {hparams.lookback} hours." - ) - return hparams - def _sanity_check(self): """ Make sure the options defined in hparams don't contradict each other. @@ -724,6 +673,9 @@ def _sanity_check(self): ): raise ValueError("You can either provide both df_val/df_test or neither!") + # MINI SERIES / LOOKBACK + self.hparams = TCNLightning._check_mini_series_lookback(self.hparams) + # TARGET VARIABLE if not self.hparams.regression: if self.hparams.simple_lookahead_reg: @@ -765,3 +717,46 @@ def _sanity_check(self): raise ValueError( "non_last_y_frac has to be between 0 and 1 when using lasy_y=False." ) + + @staticmethod + def _check_mini_series_lookback(hparams: Namespace) -> Namespace: + """ + Calculate lookback and mini_series_length if necessary. + + Note, this works for both DL and RL (with two simultaneously trained nets). + However, for RL, we use the actor network's params to set the mini_series_len. + """ + + cases = ["", "critic_", "actor_"] + for case in cases: + num_channels = f"{case}num_channels" + k_size = f"{case}kernel_size" + if num_channels in hparams: + net_depth = len(hparams.__getattribute__(num_channels)) + k_size = hparams.__getattribute__(k_size) + max_seq_len = TemporalConvNet.get_tcn_receptive_field_size( + k_size, net_depth + ) + logger.info( + f"A {case}TCN with kernel size: {k_size} and depth: {net_depth} " + f"can read a maximum sequence length of {max_seq_len}." + ) + if hparams.mini_series_length == "auto": + logger.info(f"We set mini_series_length from 'auto' to {max_seq_len}.") + hparams.mini_series_length = max_seq_len + if ( + hparams.mini_series_length != "auto" + and hparams.mini_series_length > max_seq_len + ): + logger.warning( + f"Provided mini-series length: {hparams.mini_series_length} is " + f"larger than the networks receptive field size: {max_seq_len}." + ) + # calcualte what the current TCN setup corresponds to in hourly lookback + df_anchor = TCNLightning._load_anchor(hparams) + hparams.lookback = update_lookback(df_anchor, hparams.mini_series_length) + logger.info( + f"The current mini_series_legnth {hparams.mini_series_length}, " + f"corresponds to an estimated lookback of {hparams.lookback} hours." + ) + return hparams diff --git a/src/dagobert/modelling/dl/tcn_net.py b/src/dagobert/modelling/dl/tcn_net.py index b49b139a..0bf2bba7 100644 --- a/src/dagobert/modelling/dl/tcn_net.py +++ b/src/dagobert/modelling/dl/tcn_net.py @@ -40,14 +40,7 @@ class TemporalBlock(nn.Module): """ def __init__( - self, - n_inputs, - n_outputs, - kernel_size, - stride, - dilation, - padding, - dropout=0.2, + self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2, ): super(TemporalBlock, self).__init__() self.conv1 = weight_norm( diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index a969b2ae..89aad13c 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -2,6 +2,7 @@ Class defining PyTorch datasets for supervised modelling of a single instrument. """ import logging +from copy import deepcopy from typing import List, Tuple from argparse import Namespace @@ -26,9 +27,7 @@ class RLData(object): """ def __init__( - self, - hparams: Namespace, - train_val_test: str = "train", + self, hparams: Namespace, train_val_test: str = "train", ): """ Class constructor. @@ -49,7 +48,7 @@ def __init__( augment_method = None self.dataset = PortfolioCryptoDataset( df_to_load=getattr(self.hparams, f"df_{train_val_test}"), - cols_to_model=self.hparams.cols_to_model, + cols_to_model=deepcopy(self.hparams.cols_to_model), target_col=self.hparams.target_col, mini_series_length=self.hparams.mini_series_length, data_dir=self.hparams.data_dir, @@ -63,7 +62,9 @@ def __init__( def step(self): Xs, ys = self.dataset[self.idx] + # add cash price (always 1) to the new price vector y1 = np.concatenate([[1.0], ys]) + # turn Xs into a batch of 1 Xs = [torch.Tensor(x).unsqueeze(0) for x in Xs] episode_full = self.idx == self.hparams.max_episode_length - 1 done = True if episode_full else False diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index d0fa0c90..eeba2652 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -71,9 +71,7 @@ def run_rl(args): # define trainer and and lightning module args.multiprocessing = True if args.gpus != 1 else False trainer = Trainer.from_argparse_args( - args, - logger=tcn_loggers, - checkpoint_callback=checkpoint_callback, + args, logger=tcn_loggers, checkpoint_callback=checkpoint_callback, ) model = PPO(args) trainer.fit(model) @@ -97,23 +95,21 @@ def __init__(self, hparams: Namespace): and dagobert.modelling.rl.rl_args for more information on the params. """ super().__init__() - # sanity check and setup device hparams = PPO._pre_sanity_check(hparams) self.tcn_device = "cuda" if hparams.gpus > 0 else "cpu" # prepare datafiles if necessary hparams = Preprocessing().preprocess_augment_dfs(hparams) - self.hparams = Preprocessing().preprocess_train_dfs(hparams) + hparams = Preprocessing().preprocess_train_dfs(hparams) + self.hparams = TCNLightning._check_mini_series_lookback(hparams) # create env, init starting state and policy/value networks - self.env = RLEnv(hparams) - first_Xs = self.env.reset() - self.state = [torch.FloatTensor(x) for x in first_Xs] - - self.critic = ActorCriticTCN(hparams, 1, actor=False) + self.env = RLEnv(self.hparams) + self.state = self.env.reset() + self.critic = ActorCriticTCN(self.hparams, 1, actor=False) act_dim = self.env.action_space.shape[0] - self.actor = ActorContinous(ActorCriticTCN(hparams, act_dim), act_dim) + self.actor = ActorContinous(ActorCriticTCN(self.hparams, act_dim), act_dim) self.agent = ActorCriticAgent(self.actor, self.critic) # init batching and progress tracking vars @@ -373,13 +369,11 @@ def _pre_sanity_check(hparams: Namespace): if hparams.target_col != NRL.rl_return: raise ValueError("target_col has to be rl_return for RL tasks.") - # make sure we have the same cols for each instrument - # this helps to have an environment with a single tensor as state + # fill in the same cols for any df that doesn't have the cols_to_model defined if len(hparams.cols_to_model) > 1: - anchor_cols = deepcopy(hparams.cols_to_model[npa.anchor]) for df_name, cols in hparams.cols_to_model.items(): - hparams.cols_to_model[df_name] = anchor_cols - - # MINI SERIES / LOOKBACK - hparams = TCNLightning._pre_sanity_check_mini_series_lookback(hparams) + if df_name != npa.anchor and (cols is None or len(cols) == 0): + hparams.cols_to_model[df_name] = deepcopy( + hparams.cols_to_model[npa.anchor] + ) return hparams From ed384b920c226e0a6c4fac69d48d9e3e65ff0724 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Sat, 2 Jan 2021 12:58:44 +0000 Subject: [PATCH 12/62] solved the data feeding problem, now onto making the env work --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 25d9788a..1f2b0af4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,4 +3,4 @@ repos: rev: 20.8b1 hooks: - id: black - language_version: python3.7 \ No newline at end of file + \ No newline at end of file From a596113c26712267e10f80883902f4f4d928c4f8 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Sat, 2 Jan 2021 14:17:06 +0000 Subject: [PATCH 13/62] nearly training the network.. added dirichlet distribution instead of gaussian. still getting tensor shape mismatch errors.. --- .pre-commit-config.yaml | 2 +- config/rl_config.yaml | 6 ++-- src/dagobert/modelling/rl/environment.py | 8 +++-- src/dagobert/modelling/rl/networks.py | 45 +++++++++++++++--------- src/dagobert/modelling/rl/ppo.py | 40 ++++++++++----------- 5 files changed, 57 insertions(+), 44 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1f2b0af4..25d9788a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,4 +3,4 @@ repos: rev: 20.8b1 hooks: - id: black - \ No newline at end of file + language_version: python3.7 \ No newline at end of file diff --git a/config/rl_config.yaml b/config/rl_config.yaml index b56cb761..3c0e573d 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -4,7 +4,7 @@ # LIGHTNING # -------------------------------------------------------------------------------------- -gpus: 0 +gpus: 1 pin_memory: True profiler: True #val_check_interval: 0.5 @@ -66,8 +66,8 @@ use_last_timepoint: True # DATA # -------------------------------------------------------------------------------------- -#data_dir: "C:/Work/dagobert/data/modelling" -data_dir: "/home/daniel/dagobert_data/modelling" +data_dir: "C:/Work/dagobert/data/modelling" +#data_dir: "/home/daniel/dagobert_data/modelling" lookback: auto mini_series_length: auto diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index 89aad13c..33818192 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -27,7 +27,9 @@ class RLData(object): """ def __init__( - self, hparams: Namespace, train_val_test: str = "train", + self, + hparams: Namespace, + train_val_test: str = "train", ): """ Class constructor. @@ -108,7 +110,7 @@ def __init__( which was found to be more stable. """ self.asset_names = asset_names - self.asset_num = len(asset_names) + self.asset_n = len(asset_names) self.trading_cost = trading_cost self.reward_type = reward_type self.reset() @@ -179,7 +181,7 @@ def step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: def reset(self): self.infos = [] - self.w0 = np.zeros(self.asset_num) + self.w0 = np.zeros(self.asset_n + 1) self.w0[0] = 1 self.p0 = 1.0 diff --git a/src/dagobert/modelling/rl/networks.py b/src/dagobert/modelling/rl/networks.py index 16c18d3b..06bfb89c 100644 --- a/src/dagobert/modelling/rl/networks.py +++ b/src/dagobert/modelling/rl/networks.py @@ -4,7 +4,7 @@ import torch from torch import nn -from torch.distributions import Categorical, Normal +from torch.distributions import Dirichlet from dagobert.modelling.dl import TemporalConvNet @@ -64,29 +64,45 @@ class ActorContinous(nn.Module): and an action given an observation """ - def __init__(self, actor_net, act_dim): + def __init__(self, actor_net): """ + The original PPO can be used for discrete action spaces with a Categorical + distribution or for a continuous actions space with a multivariate Gaussian, + where the network's outputs as raw logits go into it as the vector of mu and + the std is a separate learned parameter (same for all components). + + This is fine if we want to sample unbounded continuos actions between -inf/inf, + but we here we need a mixture of weights for our portfolio that sums up to one. + + The recommended thing to do in RL circles in this setup is to use DDPG, which + is a completely different and deterministic policy gradient algo. Instead of + that, here we implement an idea that I found here on this reddit discussion + https://www.reddit.com/r/reinforcementlearning/comments/cl2kqn/special_case_of_continuous_action_space_rl/ + where they recommend swapping the Gaussian distribution for a Dirichlet one + and sampling our actions from that. This by design returns a probability + summing to one and there's no need to learn a separate std param. + + NOTE! I'm not sure how well this works or how legit it is, as I haven't found + any papers or implementations actually doing this. + Args: input_shape: observation shape of the environment n_actions: number of discrete actions available in the environment """ super().__init__() self.actor_net = actor_net - log_std = -0.5 * torch.ones(act_dim, dtype=torch.float) - self.log_std = torch.nn.Parameter(log_std) def forward(self, states): - mu = self.actor_net(states) - std = torch.exp(self.log_std) - pi = Normal(loc=mu, scale=std) + concentrations = nn.functional.softmax(self.actor_net(states), dim=1).squeeze(0) + pi = Dirichlet(concentrations) actions = pi.sample() - return pi, actions - def get_log_prob(self, pi: Normal, actions: torch.Tensor): + def get_log_prob(self, pi: Dirichlet, actions: torch.Tensor): """ - Takes in a distribution and actions and returns log prob of actions - under the distribution + Takes in a distribution and actions and returns log prob of actions under + the distribution + Args: pi: torch distribution actions: actions taken by distribution @@ -121,17 +137,12 @@ def __call__(self, state: torch.Tensor, device: str) -> Tuple: """ state = [s.to(device=device) for s in state] - pi, actions = self.actor_net(state) log_p = self.get_log_prob(pi, actions) - value = self.critic_net(state) - return pi, actions, log_p, value - def get_log_prob( - self, pi: Union[Categorical, Normal], actions: torch.Tensor - ) -> torch.Tensor: + def get_log_prob(self, pi: Dirichlet, actions: torch.Tensor) -> torch.Tensor: """ Takes in the current state and returns the agents policy, a sampled action, log probability of the action, and the value of the state diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index eeba2652..d92676a7 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -71,7 +71,9 @@ def run_rl(args): # define trainer and and lightning module args.multiprocessing = True if args.gpus != 1 else False trainer = Trainer.from_argparse_args( - args, logger=tcn_loggers, checkpoint_callback=checkpoint_callback, + args, + logger=tcn_loggers, + checkpoint_callback=checkpoint_callback, ) model = PPO(args) trainer.fit(model) @@ -108,8 +110,8 @@ def __init__(self, hparams: Namespace): self.env = RLEnv(self.hparams) self.state = self.env.reset() self.critic = ActorCriticTCN(self.hparams, 1, actor=False) - act_dim = self.env.action_space.shape[0] - self.actor = ActorContinous(ActorCriticTCN(self.hparams, act_dim), act_dim) + actor_tcn = ActorCriticTCN(self.hparams, self.env.action_space.shape[0]) + self.actor = ActorContinous(actor_tcn) self.agent = ActorCriticAgent(self.actor, self.critic) # init batching and progress tracking vars @@ -128,22 +130,21 @@ def __init__(self, hparams: Namespace): self.avg_ep_len = 0 self.avg_reward = 0 - # def forward( - # self, x: torch.Tensor - # ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - # """ - # Passes state x through the network and returns the policy and a sampled action. - - # Args: - # x: environment state + def forward( + self, *x: List[torch.Tensor] + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Passes state x through the network and returns the policy and a sampled action. - # Returns: - # Tuple of policy and action - # """ - # pi, action = self.actor(*x) - # value = self.critic(*x) + Args: + x: environment state - # return pi, action, value + Returns: + Tuple of policy and action + """ + pi, action = self.actor(*x) + value = self.critic(*x) + return pi, action, value def discount_rewards(self, rewards: List[float], discount: float) -> List[float]: """ @@ -163,7 +164,6 @@ def discount_rewards(self, rewards: List[float], discount: float) -> List[float] for r in reversed(rewards): sum_r = (sum_r * discount) + r cumul_reward.append(sum_r) - return list(reversed(cumul_reward)) def calc_advantage( @@ -218,7 +218,7 @@ def train_batch( self.ep_rewards.append(reward) self.ep_values.append(value.item()) - self.state = torch.FloatTensor(next_state) + self.state = next_state epoch_end = step == (self.hparams.steps_per_epoch - 1) terminal = len(self.ep_rewards) == self.hparams.max_episode_length @@ -246,7 +246,7 @@ def train_batch( # reset params self.ep_rewards = [] self.ep_values = [] - self.state = torch.FloatTensor(self.env.reset()) + self.state = self.env.reset() if epoch_end: train_data = zip( From c32ed32cdaf1dbaeda03c133c9831b241f4e8001 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Sat, 2 Jan 2021 16:59:27 +0000 Subject: [PATCH 14/62] ppo is training (and producing nonsensical results) but it's TRAINING in lightninggit status! --- src/dagobert/modelling/rl/environment.py | 2 +- src/dagobert/modelling/rl/networks.py | 6 ++- src/dagobert/modelling/rl/ppo.py | 56 +++++++++++------------- 3 files changed, 31 insertions(+), 33 deletions(-) diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index 33818192..ded6c16c 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -66,7 +66,7 @@ def step(self): Xs, ys = self.dataset[self.idx] # add cash price (always 1) to the new price vector y1 = np.concatenate([[1.0], ys]) - # turn Xs into a batch of 1 + # turn Xs into a batch of 1, ready to be fed into the actor/critic Xs = [torch.Tensor(x).unsqueeze(0) for x in Xs] episode_full = self.idx == self.hparams.max_episode_length - 1 done = True if episode_full else False diff --git a/src/dagobert/modelling/rl/networks.py b/src/dagobert/modelling/rl/networks.py index 06bfb89c..664e82d0 100644 --- a/src/dagobert/modelling/rl/networks.py +++ b/src/dagobert/modelling/rl/networks.py @@ -77,12 +77,14 @@ def __init__(self, actor_net): The recommended thing to do in RL circles in this setup is to use DDPG, which is a completely different and deterministic policy gradient algo. Instead of that, here we implement an idea that I found here on this reddit discussion - https://www.reddit.com/r/reinforcementlearning/comments/cl2kqn/special_case_of_continuous_action_space_rl/ + https://www.reddit.com/r/reinforcementlearning/comments/cl2kqn/ + special_case_of_continuous_action_space_rl/ where they recommend swapping the Gaussian distribution for a Dirichlet one and sampling our actions from that. This by design returns a probability summing to one and there's no need to learn a separate std param. - NOTE! I'm not sure how well this works or how legit it is, as I haven't found + NOTE! + I'm not sure how well this works or how legit it is, as I haven't found any papers or implementations actually doing this. Args: diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index d92676a7..0c70dab8 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -61,8 +61,8 @@ def run_rl(args): # setup callbacks checkpoint_callback = ModelCheckpoint( - monitor="loss/val", - filename="_{epoch:02d}_{loss_val:.10f}", + monitor="avg_reward", + filename="_{epoch:02d}_{avg_reward:.10f}", dirpath=f"{args.log_dir}/models/{args.exp_name}_{tb_logger.version}", save_top_k=3, mode="max", @@ -130,21 +130,21 @@ def __init__(self, hparams: Namespace): self.avg_ep_len = 0 self.avg_reward = 0 - def forward( - self, *x: List[torch.Tensor] - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Passes state x through the network and returns the policy and a sampled action. + # def forward( + # self, states: List[torch.Tensor] + # ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + # """ + # Passes state x through the network and returns the policy and a sampled action. - Args: - x: environment state + # Args: + # x: environment state - Returns: - Tuple of policy and action - """ - pi, action = self.actor(*x) - value = self.critic(*x) - return pi, action, value + # Returns: + # Tuple of policy and action + # """ + # pi, action = self.actor(states) + # value = self.critic(states) + # return pi, action, value def discount_rewards(self, rewards: List[float], discount: float) -> List[float]: """ @@ -160,7 +160,6 @@ def discount_rewards(self, rewards: List[float], discount: float) -> List[float] cumul_reward = [] sum_r = 0.0 - for r in reversed(rewards): sum_r = (sum_r * discount) + r cumul_reward.append(sum_r) @@ -211,13 +210,12 @@ def train_batch( pi, action, log_prob, value = self.agent(self.state, self.device) next_state, reward, done, info = self.env.step(action.cpu().numpy()) - self.batch_states.append(self.state) + # drop first batch dim so dataloader later can resample them for backprop + self.batch_states.append([s.squeeze(0) for s in self.state]) self.batch_actions.append(action) self.batch_logp.append(log_prob) - self.ep_rewards.append(reward) self.ep_values.append(value.item()) - self.state = next_state epoch_end = step == (self.hparams.steps_per_epoch - 1) @@ -267,8 +265,8 @@ def train_batch( self.batch_qvals.clear() self.avg_ep_reward = self.epoch_rewards / self.done_episodes - self.avg_reward = self.epoch_rewards / self.steps_per_epoch - self.avg_ep_len = self.steps_per_epoch / self.done_episodes + self.avg_reward = self.epoch_rewards / self.hparams.steps_per_epoch + self.avg_ep_len = self.hparams.steps_per_epoch / self.done_episodes self.epoch_rewards = 0 self.done_episodes = 0 @@ -276,9 +274,11 @@ def train_batch( def actor_loss(self, state, action, logp_old, qval, adv) -> torch.Tensor: pi, _ = self.actor(state) logp = self.actor.get_log_prob(pi, action) - ratio = torch.exp(logp - logp_old) - clip_adv = torch.clamp(ratio, 1 - self.clip_ratio, 1 + self.clip_ratio) * adv - loss_actor = -(torch.min(ratio * adv, clip_adv)).mean() + ratio = torch.exp(logp - logp_old.sum(-1)) + clip_ratio = torch.clamp( + ratio, 1 - self.hparams.clip_ratio, 1 + self.hparams.clip_ratio + ) + loss_actor = -(torch.min(ratio * adv, clip_ratio * adv)).mean() return loss_actor def critic_loss(self, state, action, logp_old, qval, adv) -> torch.Tensor: @@ -334,7 +334,7 @@ def training_step( loss_critic, on_step=False, on_epoch=True, - prog_bar=False, + prog_bar=True, logger=True, ) return loss_critic @@ -353,16 +353,12 @@ def configure_optimizers(self) -> List[optim.Optimizer]: optimizers.append(optimizer_critic) return optimizers - def _dataloader(self) -> DataLoader: + def train_dataloader(self) -> DataLoader: """Initialize the Replay Buffer dataset used for retrieving experiences""" dataset = ExperienceSourceDataset(self.train_batch) dataloader = DataLoader(dataset=dataset, batch_size=self.hparams.batch_size) return dataloader - def train_dataloader(self) -> DataLoader: - """Get train loader""" - return self._dataloader() - @staticmethod def _pre_sanity_check(hparams: Namespace): # ensure we have the rl specific target column in the config From 466d7b2c7ae0aa732ff1ee900860ed9d536dc344 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Sat, 2 Jan 2021 17:00:27 +0000 Subject: [PATCH 15/62] deleted unnecessary files --- src/dagobert/modelling/rl/env01.py | 442 --------------------------- src/dagobert/modelling/rl/env02.py | 473 ----------------------------- 2 files changed, 915 deletions(-) delete mode 100644 src/dagobert/modelling/rl/env01.py delete mode 100644 src/dagobert/modelling/rl/env02.py diff --git a/src/dagobert/modelling/rl/env01.py b/src/dagobert/modelling/rl/env01.py deleted file mode 100644 index 4015cf7c..00000000 --- a/src/dagobert/modelling/rl/env01.py +++ /dev/null @@ -1,442 +0,0 @@ -""" -https://github.com/wassname/rl-portfolio-management/blob/master/rl_portfolio_management/environments/portfolio.py -""" -import numpy as np -import pandas as pd -from matplotlib import pyplot as plt -from pprint import pprint -import logging -import os -import tempfile -import time -import gym -import gym.spaces - -from ..config import eps -from ..data.utils import normalize, random_shift, scale_to_start -from ..util import MDD as max_drawdown, sharpe, softmax -from ..callbacks.notebook_plot import LivePlotNotebook - -logger = logging.getLogger(__name__) - - -class DataSrc(object): - """Acts as data provider for each new episode.""" - - def __init__( - self, - df, - steps=252, - scale=True, - scale_extra_cols=True, - augment=0.00, - window_length=50, - random_reset=True, - ): - """ - DataSrc. - - df - csv for data frame index of timestamps - and multi-index columns levels=[['LTCBTC'],...],['open','low','high','close',...]] - an example is included as an hdf file in this repository - steps - total steps in episode - scale - scale the data for each episode - scale_extra_cols - scale extra columns by global mean and std - augment - fraction to augment the data by - random_reset - reset to a random time (otherwise continue through time) - """ - self.steps = steps + 1 - self.augment = augment - self.random_reset = random_reset - self.scale = scale - self.scale_extra_cols = scale_extra_cols - self.window_length = window_length - self.idx = self.window_length - - # get rid of NaN's - df = df.copy() - df.replace(np.nan, 0, inplace=True) - df = df.fillna(method="pad") - - # dataframe to matrix - self.asset_names = df.columns.levels[0].tolist() - self.features = df.columns.levels[1].tolist() - data = df.as_matrix().reshape( - (len(df), len(self.asset_names), len(self.features)) - ) - self._data = np.transpose(data, (1, 0, 2)) - self._times = df.index - - self.price_columns = ["close", "high", "low", "open"] - self.non_price_columns = set(df.columns.levels[1]) - set(self.price_columns) - - # Stats to let us normalize non price columns - if scale_extra_cols: - x = self._data.reshape((-1, len(self.features))) - self.stats = dict(mean=x.mean(0), std=x.std(0)) - # for column in self._data.columns.levels[1].tolist(): - # x = df.xs(key=column, axis=1, level='Price').as_matrix()[:, :] - # self.stats["mean"].append(x.mean()) - # = dict(mean=x.mean(), std=x.std()) - - self.reset() - - def _step(self): - # get history matrix from dataframe - data_window = self.data[:, self.step : self.step + self.window_length].copy() - - # (eq.1) prices - y1 = data_window[:, -1, 0] / data_window[:, -2, 0] - y1 = np.concatenate([[1.0], y1]) # add cash price - - # (eq 18) X: prices are divided by close price - nb_pc = len(self.price_columns) - if self.scale: - last_close_price = data_window[:, -1, 0] - data_window[:, :, :nb_pc] /= last_close_price[:, np.newaxis, np.newaxis] - - if self.scale_extra_cols: - # normalize non price columns - data_window[:, :, nb_pc:] -= self.stats["mean"][None, None, nb_pc:] - data_window[:, :, nb_pc:] /= self.stats["std"][None, None, nb_pc:] - data_window[:, :, nb_pc:] = np.clip( - data_window[:, :, nb_pc:], - self.stats["mean"][nb_pc:] - self.stats["std"][nb_pc:] * 10, - self.stats["mean"][nb_pc:] + self.stats["std"][nb_pc:] * 10, - ) - - self.step += 1 - history = data_window - done = bool(self.step >= self.steps) - - return history, y1, done - - def reset(self): - self.step = 0 - - # get data for this episode - if self.random_reset: - self.idx = np.random.randint( - low=self.window_length + 1, high=self._data.shape[1] - self.steps - 2 - ) - else: - # continue sequentially, before reseting to start - if self.idx > (self._data.shape[1] - self.steps - self.window_length - 1): - self.idx = self.window_length + 1 - else: - self.idx += self.steps - data = self._data[ - :, self.idx - self.window_length : self.idx + self.steps + 1 - ].copy() - self.times = self._times[ - self.idx - self.window_length : self.idx + self.steps + 1 - ] - - # augment data to prevent overfitting - data += np.random.normal(loc=0, scale=self.augment, size=data.shape) - - self.data = data - - -class PortfolioSim(object): - """ - Portfolio management sim. - - Params: - - cost e.g. 0.0025 is max in Poliniex - - Based of [Jiang 2017](https://arxiv.org/abs/1706.10059) - """ - - def __init__(self, asset_names=[], steps=128, trading_cost=0.0025, time_cost=0.0): - self.cost = trading_cost - self.time_cost = time_cost - self.steps = steps - self.asset_names = asset_names - self.reset() - - def _step(self, w1, y1): - """ - Step. - - w1 - new action of portfolio weights - e.g. [0.1,0.9, 0.0] - y1 - price relative vector also called return - e.g. [1.0, 0.9, 1.1] - Numbered equations are from https://arxiv.org/abs/1706.10059 - """ - w0 = self.w0 - p0 = self.p0 - - # (eq7) since we last acted prices changed, so weights evolve into - dw1 = (y1 * w0) / (np.dot(y1, w0) + eps) - - # (eq16) cost to change portfolio: p' -> mu -> pt, see Figure 1 - # excluding change in cash to avoid double counting for transaction cost - mu = self.cost * (np.abs(dw1[1:] - w1[1:])).sum() - - # (eq11) final portfolio value: see section between (eq19-20) why this works - p1 = p0 * (1 - mu) * np.dot(y1, w0) - - # (eq9 & 10) rate of return log rate of return - rho1 = p1 / p0 - 1 # rate of returns - r1 = np.log(p1 + eps) - np.log(p0 + eps) - - # (eq22) immediate reward is log rate of return scaled by episode length - reward = r1 / self.steps - - # remember for next step - self.w0 = w1 - self.p0 = p1 - - # if we run out of money, we're done - done = bool(p1 == 0) - - # should only return single values, not list - info = { - "reward": reward, - "log_return": r1, - "portfolio_value": p1, - "market_return": y1.mean(), - "rate_of_return": rho1, - "weights_mean": w1.mean(), - "weights_std": w1.std(), - "cost": mu, - } - # record weights and prices - for i, name in enumerate(["BTCBTC"] + self.asset_names): - info["weight_" + name] = w1[i] - info["price_" + name] = y1[i] - - self.infos.append(info) - return reward, info, done - - def reset(self): - self.infos = [] - self.w0 = np.array([1.0] + [0.0] * len(self.asset_names)) - self.p0 = 1.0 - - -class PortfolioEnv(gym.Env): - """ - An environment for financial portfolio management. - - Financial portfolio management is the process of constant redistribution of a fund into different - financial products. - - Based on [Jiang 2017](https://arxiv.org/abs/1706.10059) - """ - - metadata = {"render.modes": ["notebook", "ansi"]} - - def __init__( - self, - df, - steps=256, - trading_cost=0.0025, - time_cost=0.00, - window_length=50, - augment=0.00, - output_mode="EIIE", - log_dir=None, - scale=True, - scale_extra_cols=True, - random_reset=True, - ): - """ - An environment for financial portfolio management. - - Params: - df - csv for data frame index of timestamps - and multi-index columns levels=[['LTCBTC'],...],['open','low','high','close']] - steps - steps in episode - window_length - how many past observations["history"] to return - trading_cost - cost of trade as a fraction, e.g. 0.0025 corresponding to max rate of 0.25% at Poloniex (2017) - time_cost - cost of holding as a fraction - augment - fraction to randomly shift data by - output_mode: decides observation["history"] shape - - 'EIIE' for (assets, window, 3) - - 'atari' for (window, window, 3) (assets is padded) - - 'mlp' for (assets*window*3) - log_dir: directory to save plots to - scale - scales price data by last opening price on each episode (except return) - scale_extra_cols - scales non price data using mean and std for whole dataset - """ - self.src = DataSrc( - df=df, - steps=steps, - scale=scale, - scale_extra_cols=scale_extra_cols, - augment=augment, - window_length=window_length, - random_reset=random_reset, - ) - self._plot = self._plot2 = self._plot3 = None - self.output_mode = output_mode - self.sim = PortfolioSim( - asset_names=self.src.asset_names, - trading_cost=trading_cost, - time_cost=time_cost, - steps=steps, - ) - self.log_dir = log_dir - - # openai gym attributes - # action will be the portfolio weights [cash_bias,w1,w2...] where wn are [0, 1] for each asset - nb_assets = len(self.src.asset_names) - self.action_space = gym.spaces.Box(0.0, 1.0, shape=nb_assets + 1) - - # get the history space from the data min and max - if output_mode == "EIIE": - obs_shape = (nb_assets, window_length, len(self.src.features)) - elif output_mode == "atari": - obs_shape = (window_length, window_length, len(self.src.features)) - elif output_mode == "mlp": - obs_shape = (nb_assets) * window_length * (len(self.src.features)) - else: - raise Exception("Invalid value for output_mode: %s" % self.output_mode) - - self.observation_space = gym.spaces.Dict( - { - "history": gym.spaces.Box( - -10, - 20 - if scale - else 1, # if scale=True observed price changes return could be large fractions - obs_shape, - ), - "weights": self.action_space, - } - ) - self._reset() - - def _step(self, action): - """ - Step the env. - - Actions should be portfolio [w0...] - - Where wn is a portfolio weight between 0 and 1. The first (w0) is cash_bias - - cn is the portfolio conversion weights see PortioSim._step for description - """ - logger.debug("action: %s", action) - - weights = np.clip(action, 0.0, 1.0) - weights /= weights.sum() + eps - - # Sanity checks - assert self.action_space.contains( - action - ), "action should be within %r but is %r" % (self.action_space, action) - np.testing.assert_almost_equal( - np.sum(weights), - 1.0, - 3, - err_msg='weights should sum to 1. action="%s"' % weights, - ) - - history, y1, done1 = self.src._step() - - reward, info, done2 = self.sim._step(weights, y1) - - # calculate return for buy and hold a bit of each asset - info["market_value"] = np.cumprod( - [inf["market_return"] for inf in self.infos + [info]] - )[-1] - # add dates - info["date"] = self.src.times[self.src.step].timestamp() - info["steps"] = self.src.step - - self.infos.append(info) - - # reshape history according to output mode - if self.output_mode == "EIIE": - pass - elif self.output_mode == "atari": - padding = history.shape[1] - history.shape[0] - history = np.pad(history, [[0, padding], [0, 0], [0, 0]], mode="constant") - elif self.output_mode == "mlp": - history = history.flatten() - - return {"history": history, "weights": weights}, reward, done1 or done2, info - - def _reset(self): - self.sim.reset() - self.src.reset() - self.infos = [] - action = self.sim.w0 - observation, reward, done, info = self.step(action) - return observation - - def _seed(self, seed): - np.random.seed(seed) - return [seed] - - def _render(self, mode="notebook", close=False): - # if close: - # return - if mode == "ansi": - pprint(self.infos[-1]) - elif mode == "notebook": - self.plot_notebook(close) - - def plot_notebook(self, close=False): - """Live plot using the jupyter notebook rendering of matplotlib.""" - - if close: - self._plot = self._plot2 = self._plot3 = None - return - - df_info = pd.DataFrame(self.infos) - df_info.index = pd.to_datetime(df_info["date"], unit="s") - - # plot prices and performance - all_assets = ["BTCBTC"] + self.sim.asset_names - if not self._plot: - colors = [None] * len(all_assets) + ["black"] - self._plot_dir = ( - os.path.join(self.log_dir, "notebook_plot_prices_" + str(time.time())) - if self.log_dir - else None - ) - self._plot = LivePlotNotebook( - log_dir=self._plot_dir, - title="prices & performance", - labels=all_assets + ["Portfolio"], - ylabel="value", - colors=colors, - ) - x = df_info.index - y_portfolio = df_info["portfolio_value"] - y_assets = [df_info["price_" + name].cumprod() for name in all_assets] - self._plot.update(x, y_assets + [y_portfolio]) - - # plot portfolio weights - if not self._plot2: - self._plot_dir2 = ( - os.path.join(self.log_dir, "notebook_plot_weights_" + str(time.time())) - if self.log_dir - else None - ) - self._plot2 = LivePlotNotebook( - log_dir=self._plot_dir2, - labels=all_assets, - title="weights", - ylabel="weight", - ) - ys = [df_info["weight_" + name] for name in all_assets] - self._plot2.update(x, ys) - - # plot portfolio costs - if not self._plot3: - self._plot_dir3 = ( - os.path.join(self.log_dir, "notebook_plot_cost_" + str(time.time())) - if self.log_dir - else None - ) - self._plot3 = LivePlotNotebook( - log_dir=self._plot_dir3, labels=["cost"], title="costs", ylabel="cost" - ) - ys = [df_info["cost"].cumsum()] - self._plot3.update(x, ys) - - if close: - self._plot = self._plot2 = self._plot3 = None diff --git a/src/dagobert/modelling/rl/env02.py b/src/dagobert/modelling/rl/env02.py deleted file mode 100644 index 7dfdbd55..00000000 --- a/src/dagobert/modelling/rl/env02.py +++ /dev/null @@ -1,473 +0,0 @@ -""" -Modified from https://github.com/vermouth1992/drl-portfolio-management -""" -import numpy as np -import pandas as pd -import matplotlib.pyplot as plt - -import gym -import gym.spaces - -eps = np.finfo(float).eps - - -def random_shift(x, fraction): - """ Apply a random shift to a pandas series. """ - min_x, max_x = np.min(x), np.max(x) - m = np.random.uniform(-fraction, fraction, size=x.shape) + 1 - return np.clip(x * m, min_x, max_x) - - -def scale_to_start(x): - """ Scale pandas series so that it starts at one. """ - x = (x + eps) / (x[0] + eps) - return x - - -def sharpe(returns, freq=30, rfr=0): - """ Given a set of returns, calculates naive (rfr=0) sharpe (eq 28). """ - return (np.sqrt(freq) * np.mean(returns - rfr + eps)) / np.std(returns - rfr + eps) - - -def max_drawdown(returns): - """ Max drawdown. See https://www.investopedia.com/terms/m/maximum-drawdown-mdd.asp """ - peak = returns.max() - trough = returns[returns.argmax() :].min() - return (trough - peak) / (peak + eps) - - -class DataGenerator(object): - """Acts as data provider for each new episode.""" - - def __init__( - self, - history, - abbreviation, - steps=730, - window_length=50, - start_idx=0, - start_date=None, - ): - """ - - Args: - history: (num_stocks, timestamp, 5) open, high, low, close, volume - abbreviation: a list of length num_stocks with assets name - steps: the total number of steps to simulate, default is 2 years - window_length: observation window, must be less than 50 - start_date: the date to start. Default is None and random pick one. - It should be a string e.g. '2012-08-13' - """ - assert history.shape[0] == len( - abbreviation - ), "Number of stock is not consistent" - import copy - - self.steps = steps + 1 - self.window_length = window_length - self.step = start_idx - self.start_date = start_date - - # make immutable class - self._data = history.copy() # all data - self.asset_names = copy.copy(abbreviation) - - def _step(self): - # get observation matrix from history, exclude volume, maybe volume is useful as it - # indicates how market total investment changes. Normalize could be critical here - self.step += 1 - obs = self.data[:, self.step : self.step + self.window_length, :].copy() - # normalize obs with open price - - # used for compute optimal action and sanity check - ground_truth_obs = self.data[ - :, self.step + self.window_length : self.step + self.window_length + 1, : - ].copy() - - done = self.step >= self.steps - return obs, done, ground_truth_obs - - def reset(self): - self.step = 0 - - # get data for this episode, each episode might be different. - if self.start_date is None: - self.idx = np.random.randint( - low=self.window_length, high=self._data.shape[1] - self.steps - ) - else: - raise ValueError("start_date is not yet supported / implemented") - # compute index corresponding to start_date for repeatable sequence - # self.idx = date_to_index(self.start_date) - self.start_idx - # assert ( - # self.idx >= self.window_length - # and self.idx <= self._data.shape[1] - self.steps - # ), "Invalid start date, must be window_length day after start date and simulation steps day before end date" - data = self._data[ - :, self.idx - self.window_length : self.idx + self.steps + 1, :4 - ] - # apply augmentation? - self.data = data - return ( - self.data[:, self.step : self.step + self.window_length, :].copy(), - self.data[ - :, - self.step + self.window_length : self.step + self.window_length + 1, - :, - ].copy(), - ) - - -class PortfolioSim(object): - """ - Portfolio management sim. - Params: - - cost e.g. 0.0025 is max in Poliniex - Based of [Jiang 2017](https://arxiv.org/abs/1706.10059) - """ - - def __init__( - self, asset_names=list(), steps=730, trading_cost=0.0025, time_cost=0.0 - ): - self.asset_names = asset_names - self.cost = trading_cost - self.time_cost = time_cost - self.steps = steps - self.reset() - - def _step(self, w1, y1): - """ - Step. - w1 - new action of portfolio weights - e.g. [0.1,0.9,0.0] - y1 - price relative vector also called return - e.g. [1.0, 0.9, 1.1] - Numbered equations are from https://arxiv.org/abs/1706.10059 - """ - assert w1.shape == y1.shape, "w1 and y1 must have the same shape" - assert y1[0] == 1.0, "y1[0] must be 1" - - p0 = self.p0 - - dw1 = (y1 * w1) / (np.dot(y1, w1) + eps) # (eq7) weights evolve into - - mu1 = self.cost * (np.abs(dw1 - w1)).sum() # (eq16) cost to change portfolio - - assert mu1 < 1.0, "Cost is larger than current holding" - - p1 = p0 * (1 - mu1) * np.dot(y1, w1) # (eq11) final portfolio value - - p1 = p1 * (1 - self.time_cost) # we can add a cost to holding - - rho1 = p1 / p0 - 1 # rate of returns - r1 = np.log((p1 + eps) / (p0 + eps)) # log rate of return - reward = r1 / self.steps * 1000.0 # (22) average logarithmic accumulated return - # remember for next step - self.p0 = p1 - - # if we run out of money, we're done (losing all the money) - done = p1 == 0 - - info = { - "reward": reward, - "log_return": r1, - "portfolio_value": p1, - "return": y1.mean(), - "rate_of_return": rho1, - "weights_mean": w1.mean(), - "weights_std": w1.std(), - "cost": mu1, - } - self.infos.append(info) - return reward, info, done - - def reset(self): - self.infos = [] - self.p0 = 1.0 - - -class PortfolioEnv(gym.Env): - """ - An environment for financial portfolio management. - Financial portfolio management is the process of constant redistribution of a fund into different - financial products. - Based on [Jiang 2017](https://arxiv.org/abs/1706.10059) - """ - - metadata = {"render.modes": ["human", "ansi"]} - - def __init__( - self, - history, - abbreviation, - steps=730, # 2 years - trading_cost=0.0025, - time_cost=0.00, - window_length=50, - start_idx=0, - sample_start_date=None, - ): - """ - An environment for financial portfolio management. - Params: - steps - steps in episode - scale - scale data and each episode (except return) - augment - fraction to randomly shift data by - trading_cost - cost of trade as a fraction - time_cost - cost of holding as a fraction - window_length - how many past observations to return - start_idx - The number of days from '2012-08-13' of the dataset - sample_start_date - The start date sampling from the history - """ - self.window_length = window_length - self.num_stocks = history.shape[0] - self.start_idx = start_idx - - self.src = DataGenerator( - history, - abbreviation, - steps=steps, - window_length=window_length, - start_idx=start_idx, - start_date=sample_start_date, - ) - - self.sim = PortfolioSim( - asset_names=abbreviation, - trading_cost=trading_cost, - time_cost=time_cost, - steps=steps, - ) - - # openai gym attributes - # action will be the portfolio weights from 0 to 1 for each asset - self.action_space = gym.spaces.Box( - 0, 1, shape=(len(self.src.asset_names) + 1,), dtype=np.float32 - ) # include cash - - # get the observation space from the data min and max - self.observation_space = gym.spaces.Box( - low=-np.inf, - high=np.inf, - shape=(len(abbreviation), window_length, history.shape[-1]), - dtype=np.float32, - ) - - def step(self, action): - return self._step(action) - - def _step(self, action): - """ - Step the env. - Actions should be portfolio [w0...] - - Where wn is a portfolio weight from 0 to 1. The first is cash_bias - - cn is the portfolio conversion weights see PortioSim._step for description - """ - np.testing.assert_almost_equal(action.shape, (len(self.sim.asset_names) + 1,)) - - # normalise just in case - action = np.clip(action, 0, 1) - - weights = action # np.array([cash_bias] + list(action)) # [w0, w1...] - weights /= weights.sum() + eps - weights[0] += np.clip( - 1 - weights.sum(), 0, 1 - ) # so if weights are all zeros we normalise to [1,0...] - - assert ((action >= 0) * (action <= 1)).all(), ( - "all action values should be between 0 and 1. Not %s" % action - ) - np.testing.assert_almost_equal( - np.sum(weights), - 1.0, - 3, - err_msg='weights should sum to 1. action="%s"' % weights, - ) - - observation, done1, ground_truth_obs = self.src._step() - - # concatenate observation with ones - cash_observation = np.ones((1, self.window_length, observation.shape[2])) - observation = np.concatenate((cash_observation, observation), axis=0) - - cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2])) - ground_truth_obs = np.concatenate((cash_ground_truth, ground_truth_obs), axis=0) - - # relative price vector of last observation day (close/open) - close_price_vector = observation[:, -1, 3] - open_price_vector = observation[:, -1, 0] - y1 = close_price_vector / open_price_vector - reward, info, done2 = self.sim._step(weights, y1) - - # calculate return for buy and hold a bit of each asset - info["market_value"] = np.cumprod( - [inf["return"] for inf in self.infos + [info]] - )[-1] - # add dates - info["date"] = self.start_idx + self.src.idx + self.src.step - info["steps"] = self.src.step - info["next_obs"] = ground_truth_obs - - self.infos.append(info) - - return observation, reward, done1 or done2, info - - def reset(self): - return self._reset() - - def _reset(self): - self.infos = [] - self.sim.reset() - observation, ground_truth_obs = self.src.reset() - cash_observation = np.ones((1, self.window_length, observation.shape[2])) - observation = np.concatenate((cash_observation, observation), axis=0) - cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2])) - ground_truth_obs = np.concatenate((cash_ground_truth, ground_truth_obs), axis=0) - info = {} - info["next_obs"] = ground_truth_obs - return observation, info - - def _render(self, mode="human", close=False): - if close: - return - if mode == "ansi": - print(self.infos[-1]) - elif mode == "human": - self.plot() - - def render(self, mode="human", close=False): - return self._render(mode="human", close=False) - - def plot(self): - # show a plot of portfolio vs mean market performance - df_info = pd.DataFrame(self.infos) - df_info["date"] = pd.to_datetime(df_info["date"], format="%Y-%m-%d") - df_info.set_index("date", inplace=True) - mdd = max_drawdown(df_info.rate_of_return + 1) - sharpe_ratio = sharpe(df_info.rate_of_return) - title = "max_drawdown={: 2.2%} sharpe_ratio={: 2.4f}".format(mdd, sharpe_ratio) - df_info[["portfolio_value", "market_value"]].plot( - title=title, fig=plt.gcf(), rot=30 - ) - - -class MultiActionPortfolioEnv(PortfolioEnv): - def __init__( - self, - history, - abbreviation, - model_names, - steps=730, # 2 years - trading_cost=0.0025, - time_cost=0.00, - window_length=50, - start_idx=0, - sample_start_date=None, - ): - super(MultiActionPortfolioEnv, self).__init__( - history, - abbreviation, - steps, - trading_cost, - time_cost, - window_length, - start_idx, - sample_start_date, - ) - self.model_names = model_names - # need to create each simulator for each model - self.sim = [ - PortfolioSim( - asset_names=abbreviation, - trading_cost=trading_cost, - time_cost=time_cost, - steps=steps, - ) - for _ in range(len(self.model_names)) - ] - - def _step(self, action): - """Step the environment by a vector of actions - - Args: - action: (num_models, num_stocks + 1) - - Returns: - - """ - assert ( - action.ndim == 2 - ), "Action must be a two dimensional array with shape (num_models, num_stocks + 1)" - assert action.shape[1] == len(self.sim[0].asset_names) + 1 - assert action.shape[0] == len(self.model_names) - # normalise just in case - action = np.clip(action, 0, 1) - weights = action # np.array([cash_bias] + list(action)) # [w0, w1...] - weights /= np.sum(weights, axis=1, keepdims=True) + eps - # so if weights are all zeros we normalise to [1,0...] - weights[:, 0] += np.clip(1 - np.sum(weights, axis=1), 0, 1) - assert ((action >= 0) * (action <= 1)).all(), ( - "all action values should be between 0 and 1. Not %s" % action - ) - np.testing.assert_almost_equal( - np.sum(weights, axis=1), - np.ones(shape=(weights.shape[0])), - 3, - err_msg='weights should sum to 1. action="%s"' % weights, - ) - observation, done1, ground_truth_obs = self.src._step() - - # concatenate observation with ones - cash_observation = np.ones((1, self.window_length, observation.shape[2])) - observation = np.concatenate((cash_observation, observation), axis=0) - - cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2])) - ground_truth_obs = np.concatenate((cash_ground_truth, ground_truth_obs), axis=0) - - # relative price vector of last observation day (close/open) - close_price_vector = observation[:, -1, 3] - open_price_vector = observation[:, -1, 0] - y1 = close_price_vector / open_price_vector - - rewards = np.empty(shape=(weights.shape[0])) - info = {} - dones = np.empty(shape=(weights.shape[0]), dtype=bool) - for i in range(weights.shape[0]): - reward, current_info, done2 = self.sim[i]._step(weights[i], y1) - rewards[i] = reward - info[self.model_names[i]] = current_info["portfolio_value"] - info["return"] = current_info["return"] - dones[i] = done2 - - # calculate return for buy and hold a bit of each asset - info["market_value"] = np.cumprod( - [inf["return"] for inf in self.infos + [info]] - )[-1] - # add dates - info["date"] = self.start_idx + self.src.idx + self.src.step - info["steps"] = self.src.step - info["next_obs"] = ground_truth_obs - - self.infos.append(info) - - return observation, rewards, np.all(dones) or done1, info - - def _reset(self): - self.infos = [] - for sim in self.sim: - sim.reset() - observation, ground_truth_obs = self.src.reset() - cash_observation = np.ones((1, self.window_length, observation.shape[2])) - observation = np.concatenate((cash_observation, observation), axis=0) - cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2])) - ground_truth_obs = np.concatenate((cash_ground_truth, ground_truth_obs), axis=0) - info = {} - info["next_obs"] = ground_truth_obs - return observation, info - - def plot(self): - df_info = pd.DataFrame(self.infos) - fig = plt.gcf() - title = "Trading Performance of Various Models" - df_info["date"] = pd.to_datetime(df_info["date"], format="%Y-%m-%d") - df_info.set_index("date", inplace=True) - df_info[self.model_names + ["market_value"]].plot(title=title, fig=fig, rot=30) From 15c2283f827032129a4cc4327aad5dd32ed094ff Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Sat, 2 Jan 2021 17:02:21 +0000 Subject: [PATCH 16/62] blackening somehow didn't run...Marci used to have this.. --- src/dagobert/modelling/dl/data.py | 4 +++- src/dagobert/modelling/dl/preprocessing.py | 10 ++++++++-- src/dagobert/modelling/dl/tcn.py | 9 +++++++-- src/dagobert/modelling/dl/tcn_args.py | 18 ++++++++++++++---- src/dagobert/modelling/dl/tcn_net.py | 9 ++++++++- 5 files changed, 40 insertions(+), 10 deletions(-) diff --git a/src/dagobert/modelling/dl/data.py b/src/dagobert/modelling/dl/data.py index 46748dd3..91e96391 100644 --- a/src/dagobert/modelling/dl/data.py +++ b/src/dagobert/modelling/dl/data.py @@ -198,7 +198,9 @@ def __getitem__(self, idx): # FUNCTIONS FOR SETUP # ---------------------------------------------------------------------------------- - def _load_df_anchor(self,) -> pd.DatetimeIndex: + def _load_df_anchor( + self, + ) -> pd.DatetimeIndex: """ Loads the anchor DF, and returns it. We use the anchor df for plotting and to extract the master index which we measure everything else against in batching. diff --git a/src/dagobert/modelling/dl/preprocessing.py b/src/dagobert/modelling/dl/preprocessing.py index ebce640c..ffa6916a 100644 --- a/src/dagobert/modelling/dl/preprocessing.py +++ b/src/dagobert/modelling/dl/preprocessing.py @@ -85,7 +85,10 @@ def preprocess_augment_dfs(hparams: Namespace) -> Namespace: @staticmethod def _preprocess_augment_dfs( - hparams: Namespace, df_name: str, df_path: str, df_path_prev: str, + hparams: Namespace, + df_name: str, + df_path: str, + df_path_prev: str, ) -> Optional: """ Helper function that performs the preprocessing of simple augment DFs. @@ -550,7 +553,10 @@ def _quantile_filter( @staticmethod def _binarise( - df: pd.DataFrame, method: str, threshold: float, df_name: str = "", + df: pd.DataFrame, + method: str, + threshold: float, + df_name: str = "", ) -> pd.DataFrame: """ Binarises a DF with the provided method and threshold. diff --git a/src/dagobert/modelling/dl/tcn.py b/src/dagobert/modelling/dl/tcn.py index 35e858ad..79dd6457 100644 --- a/src/dagobert/modelling/dl/tcn.py +++ b/src/dagobert/modelling/dl/tcn.py @@ -130,7 +130,11 @@ def setup_and_run_tcn_lightning(args: Namespace, study: bool = False): args, logger=tcn_loggers, checkpoint_callback=checkpoint_callback, - callbacks=[early_stop_callback, metrics_callback, LearningRateMonitor(),], + callbacks=[ + early_stop_callback, + metrics_callback, + LearningRateMonitor(), + ], ) model = TCNLightning(args) @@ -208,7 +212,8 @@ def __init__(self, hparams: Namespace): if self.hparams.mix_density_net: self.linear_mu = nn.Linear(self.hparams.num_channels[-1], self.density_num) self.linear_sigmasq = nn.Linear( - self.hparams.num_channels[-1], self.density_num, + self.hparams.num_channels[-1], + self.density_num, ) self.linear_mix = nn.Linear(self.hparams.num_channels[-1], self.density_num) self = self.float() diff --git a/src/dagobert/modelling/dl/tcn_args.py b/src/dagobert/modelling/dl/tcn_args.py index cd15e7db..d08b0a0b 100644 --- a/src/dagobert/modelling/dl/tcn_args.py +++ b/src/dagobert/modelling/dl/tcn_args.py @@ -43,7 +43,10 @@ def add_run_specific_args(parent_parser): help="Number of cores to use to prepare the batches.", ) parser.add_argument( - "--exp_name", type=str, default="TCN", help="Name of experiment.", + "--exp_name", + type=str, + default="TCN", + help="Name of experiment.", ) parser.add_argument( "--tags", @@ -233,7 +236,9 @@ def add_data_specific_args(parent_parser): # this is just a place-holder so it's easier to read the million params in the cmd parser.add_argument("--DATA_PARAMS", help="====================================") parser.add_argument( - "--data_dir", type=str, help="Path to folder holding the data files to use.", + "--data_dir", + type=str, + help="Path to folder holding the data files to use.", ) parser.add_argument( "--lookback", type=float, default=6, help="Lookback length in hours." @@ -267,7 +272,9 @@ def add_data_specific_args(parent_parser): ), ) parser.add_argument( - "--to_label", action="store_true", help="Label datasets before preprocessing.", + "--to_label", + action="store_true", + help="Label datasets before preprocessing.", ) parser.add_argument( "--label_sl", type=int, default=1, help="Stop-loss barrier size." @@ -279,7 +286,10 @@ def add_data_specific_args(parent_parser): "--label_first_or_max", type=str, default=NBarriers.first, - choices=[NBarriers.first, NBarriers.max,], + choices=[ + NBarriers.first, + NBarriers.max, + ], help="Weather to use the first or maximum barrier-touch.", ) parser.add_argument( diff --git a/src/dagobert/modelling/dl/tcn_net.py b/src/dagobert/modelling/dl/tcn_net.py index 0bf2bba7..b49b139a 100644 --- a/src/dagobert/modelling/dl/tcn_net.py +++ b/src/dagobert/modelling/dl/tcn_net.py @@ -40,7 +40,14 @@ class TemporalBlock(nn.Module): """ def __init__( - self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2, + self, + n_inputs, + n_outputs, + kernel_size, + stride, + dilation, + padding, + dropout=0.2, ): super(TemporalBlock, self).__init__() self.conv1 = weight_norm( From 06aec4e418d48a38a7eb048da7bad68ce7d6ead0 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Mon, 4 Jan 2021 18:15:39 +0000 Subject: [PATCH 17/62] ppo now trains and loss doesn't explode (nearly as often) --- config/rl_config.yaml | 13 ++-- src/dagobert/modelling/rl/environment.py | 72 +++++++++++---------- src/dagobert/modelling/rl/networks.py | 26 ++++++-- src/dagobert/modelling/rl/ppo.py | 79 +++++++++--------------- 4 files changed, 99 insertions(+), 91 deletions(-) diff --git a/config/rl_config.yaml b/config/rl_config.yaml index 3c0e573d..5e9315ec 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -23,7 +23,7 @@ tags: - RL_test no_comet_logger: True seed: 42 -batch_size: 128 +batch_size: 256 # -------------------------------------------------------------------------------------- @@ -35,14 +35,15 @@ asset_names: - ETH trading_cost: 0.002 reward_type: return -max_episode_length: 1000 -steps_per_epoch: 2000 +max_episode_length: 500 +steps_per_epoch: 5000 n_optim_iters: 4 gamma: 0.99 -lamb: 0.95 -lr_actor: 0.0003 +lam: 0.95 +lr_actor: 0.001 lr_critic: 0.001 -clip_ratio: 0.2 +clip_ratio: 0.25 +target_kl: 0.01 # don't change these, or preprocessing won't work target_col: rl_return diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index ded6c16c..a7074f91 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -36,7 +36,16 @@ def __init__( Args: hparams: Hyparams parsed by the rl_runner. Similar to how `TCNLightning` is - initialized. + initialized with the following fields: + - max_episode_length + - cols_to_model + - target_col + - mini_series_length + - data_dir + - augment_dfs + - augment_prob + - augment_method + - augment_dfs_mix train_val_test: Whether we are training, validating or testing, it must be either train, val or test. """ @@ -68,10 +77,8 @@ def step(self): y1 = np.concatenate([[1.0], ys]) # turn Xs into a batch of 1, ready to be fed into the actor/critic Xs = [torch.Tensor(x).unsqueeze(0) for x in Xs] - episode_full = self.idx == self.hparams.max_episode_length - 1 - done = True if episode_full else False self.idx += 1 - return Xs, y1, done + return Xs, y1 def reset(self): self.idx = np.random.randint(self.dataset_len - self.hparams.max_episode_length) @@ -92,27 +99,21 @@ class RLPortfolio(object): https://github.com/ZhengyaoJiang/PGPortfolio/issues/99 """ - def __init__( - self, - asset_names: List[str], - max_episode_length: int = 1000, - trading_cost: float = 0.002, - reward_type: str = "return", - ): + def __init__(self, hparams: Namespace): """ Class constructor. Args: - asset_names: Names of assets in the portfolio. - trading_cost: Commission rate, currently set to Binance's VIP0 taker level - plus doubled it to account for slippage. TODO: model slippage. - reward_type: Whether to use the log return as reward or the sharpe ratio, - which was found to be more stable. + hparams: Hyparams parsed by the rl_runner. Similar to how `TCNLightning` is + initialized with the following fields: + - asset_names + - trading_cost + - reward_type """ - self.asset_names = asset_names - self.asset_n = len(asset_names) - self.trading_cost = trading_cost - self.reward_type = reward_type + self.asset_names = hparams.asset_names + self.asset_n = len(self.asset_names) + self.trading_cost = hparams.trading_cost + self.reward_type = hparams.reward_type self.reset() def step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: @@ -198,24 +199,33 @@ def __init__(self, hparams: Namespace): An environment for financial portfolio management. Args: - hparams: - + hparams: Hyparams parsed by the rl_runner. Similar to how `TCNLightning` is + initialized with the following fields: + - max_episode_length + - cols_to_model + - target_col + - mini_series_length + - data_dir + - augment_dfs + - augment_prob + - augment_method + - augment_dfs_mix + - asset_names + - trading_cost + - reward_type """ self.infos = [] self.hparams = hparams self.asset_n = len(self.hparams.asset_names) self.feat_n = len(self.hparams.cols_to_model[npa.anchor]) self.data = RLData(self.hparams, train_val_test="train") - self.portfolio = RLPortfolio( - self.hparams.asset_names, self.hparams.max_episode_length - ) + self.portfolio = RLPortfolio(self.hparams) # setup openai gym env - include cash in the portfolio action space self.action_space = gym.spaces.Box( 0.0, 1.0, shape=(self.asset_n + 1,), dtype=np.float32 ) - - # get the observation space from the data min and max + # observation space isn't used anywhere, but we define it for documnetation self.observation_space = gym.spaces.Dict( { "state": gym.spaces.Box( @@ -241,16 +251,16 @@ def step(self, action: np.array): weights = action weights /= weights.sum() + eps - next_state, y1, done1 = self.data.step() - reward, info, done2 = self.portfolio.step(weights, y1) + next_state, y1 = self.data.step() + reward, info, done = self.portfolio.step(weights, y1) self.infos.append(info) - return next_state, reward, done1 or done2, info + return next_state, reward, done, info def reset(self): self.infos = [] self.portfolio.reset() - next_state, _, _ = self.data.reset() + next_state, _ = self.data.reset() return next_state def render(self): diff --git a/src/dagobert/modelling/rl/networks.py b/src/dagobert/modelling/rl/networks.py index 664e82d0..1d651a6f 100644 --- a/src/dagobert/modelling/rl/networks.py +++ b/src/dagobert/modelling/rl/networks.py @@ -32,6 +32,7 @@ def __init__( """ super().__init__() self.hparams = hparams + self.n_actions = n_actions num_inputs = [len(cols) for dataset, cols in hparams.cols_to_model.items()] num_channels = ( hparams.actor_num_channels if actor else hparams.critic_num_channels @@ -88,14 +89,16 @@ def __init__(self, actor_net): any papers or implementations actually doing this. Args: - input_shape: observation shape of the environment - n_actions: number of discrete actions available in the environment + actor_net: Initialized actor net. """ super().__init__() self.actor_net = actor_net + self.inv_lin = InverseLinear() def forward(self, states): - concentrations = nn.functional.softmax(self.actor_net(states), dim=1).squeeze(0) + # get params for Dirichlet, and drop batch dim if batch_size=1 + logits = self.actor_net(states) + concentrations = self.inv_lin(logits).squeeze(0) pi = Dirichlet(concentrations) actions = pi.sample() return pi, actions @@ -111,7 +114,22 @@ def get_log_prob(self, pi: Dirichlet, actions: torch.Tensor): Returns: log probability of the acition under pi """ - return pi.log_prob(actions).sum(axis=-1) + return pi.log_prob(actions) + + +class InverseLinear(nn.Module): + """ + Implements a layer specifically designed for Dirichlet distribution as final + layer, see here: https://openreview.net/pdf?id=BJeRg205Fm + """ + + def __init__(self): + super().__init__() + + def forward(self, x): + x[x < 0] = 1 / (1 - x[x < 0]) + x[x >= 0] = x[x >= 0] + 1 + return x class ActorCriticAgent(object): diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index 0c70dab8..6b7b7372 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -130,22 +130,6 @@ def __init__(self, hparams: Namespace): self.avg_ep_len = 0 self.avg_reward = 0 - # def forward( - # self, states: List[torch.Tensor] - # ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - # """ - # Passes state x through the network and returns the policy and a sampled action. - - # Args: - # x: environment state - - # Returns: - # Tuple of policy and action - # """ - # pi, action = self.actor(states) - # value = self.critic(states) - # return pi, action, value - def discount_rewards(self, rewards: List[float], discount: float) -> List[float]: """ Calculate the discounted rewards of all rewards in list. @@ -157,7 +141,6 @@ def discount_rewards(self, rewards: List[float], discount: float) -> List[float] list of discounted rewards/advantages """ assert isinstance(rewards[0], float) - cumul_reward = [] sum_r = 0.0 for r in reversed(rewards): @@ -165,29 +148,28 @@ def discount_rewards(self, rewards: List[float], discount: float) -> List[float] cumul_reward.append(sum_r) return list(reversed(cumul_reward)) - def calc_advantage( - self, rewards: List[float], values: List[float], last_value: float - ) -> List[float]: + def calc_advantage(self, rewards: List[float], values: List[float]) -> List[float]: """ Calculate the advantage given rewards, state values, and last value of episode. Args: rewards: list of episode rewards values: list of state values from critic - last_value: value of last state of episode Returns: list of advantages """ - rews = rewards + [last_value] - vals = values + [last_value] # GAE delta = [ - rews[i] + self.hparams.gamma * vals[i + 1] - vals[i] - for i in range(len(rews) - 1) + rewards[i] + self.hparams.gamma * values[i + 1] - values[i] + for i in range(len(rewards) - 1) ] adv = self.discount_rewards(delta, self.hparams.gamma * self.hparams.lam) - return adv + + # normalise advantage + adv = np.array(adv) + adv = (adv - adv.mean()) / (adv.std() + np.finfo(float).eps) + return list(adv) def train_batch( self, @@ -220,24 +202,13 @@ def train_batch( epoch_end = step == (self.hparams.steps_per_epoch - 1) terminal = len(self.ep_rewards) == self.hparams.max_episode_length - if epoch_end or done or terminal: - # if trajectory ends abtruptly, boostrap value of next state - if (terminal or epoch_end) and not done: - with torch.no_grad(): - _, _, _, value = self.agent(self.state, self.device) - last_value = value.item() - else: - last_value = 0 - # discounted cumulative reward self.batch_qvals += self.discount_rewards( - self.ep_rewards + [last_value], self.hparams.gamma + self.ep_rewards, self.hparams.gamma )[:-1] # advantage - self.batch_adv += self.calc_advantage( - self.ep_rewards, self.ep_values, last_value - ) + self.batch_adv += self.calc_advantage(self.ep_rewards, self.ep_values) # logs self.done_episodes += 1 self.epoch_rewards += np.sum(self.ep_rewards) @@ -274,12 +245,15 @@ def train_batch( def actor_loss(self, state, action, logp_old, qval, adv) -> torch.Tensor: pi, _ = self.actor(state) logp = self.actor.get_log_prob(pi, action) - ratio = torch.exp(logp - logp_old.sum(-1)) + old_new_diff = logp - logp_old + ratio = torch.exp(old_new_diff) + # idea taken from spinningup PPO implemenetation to prevent exploding loss + approx_kl = old_new_diff.mean().item() clip_ratio = torch.clamp( ratio, 1 - self.hparams.clip_ratio, 1 + self.hparams.clip_ratio ) loss_actor = -(torch.min(ratio * adv, clip_ratio * adv)).mean() - return loss_actor + return loss_actor, approx_kl def critic_loss(self, state, action, logp_old, qval, adv) -> torch.Tensor: value = self.critic(state) @@ -314,9 +288,8 @@ def training_step( self.log( "avg_reward", self.avg_reward, prog_bar=True, on_step=False, on_epoch=True ) - - if optimizer_idx % 2 == 0: - loss_actor = self.actor_loss(state, action, old_logp, qval, adv) + if optimizer_idx == 0: + loss_actor, approx_kl = self.actor_loss(state, action, old_logp, qval, adv) self.log( "loss_actor", loss_actor, @@ -325,9 +298,17 @@ def training_step( prog_bar=True, logger=True, ) + self.log( + "approx_kl", + approx_kl, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) return loss_actor - else: + elif optimizer_idx == 1: loss_critic = self.critic_loss(state, action, old_logp, qval, adv) self.log( "loss_critic", @@ -345,13 +326,11 @@ def configure_optimizers(self) -> List[optim.Optimizer]: optimizer_critic = optim.Adam( self.critic.parameters(), lr=self.hparams.lr_critic ) + return optimizer_actor, optimizer_critic - # workaround to run multple steps of gradient descent within LightningModule - optimizers = [] + def optimizer_step(self, *args, **kwargs): for _ in range(self.hparams.n_optim_iters): - optimizers.append(optimizer_actor) - optimizers.append(optimizer_critic) - return optimizers + super().optimizer_step(*args, **kwargs) def train_dataloader(self) -> DataLoader: """Initialize the Replay Buffer dataset used for retrieving experiences""" From 9a7a57a7e606c709af8563458e89b3c62526b206 Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Thu, 7 Jan 2021 00:12:50 +0000 Subject: [PATCH 18/62] started --- notebooks/modelling/rl_env.ipynb | 4 +- src/dagobert/data/lambda/orderbook_data.py | 28 ++++++++++ src/dagobert/modelling/augmentation/tgan.py | 58 +++++++++++++++++++++ 3 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 src/dagobert/data/lambda/orderbook_data.py create mode 100644 src/dagobert/modelling/augmentation/tgan.py diff --git a/notebooks/modelling/rl_env.ipynb b/notebooks/modelling/rl_env.ipynb index ea5164b2..a3333841 100644 --- a/notebooks/modelling/rl_env.ipynb +++ b/notebooks/modelling/rl_env.ipynb @@ -346,9 +346,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.7.6 64-bit ('dagobert': conda)", + "display_name": "Python 3", "language": "python", - "name": "python37664bitdagobertconda90fcdb25face404d8cd237e8f8473045" + "name": "python3" }, "language_info": { "codemirror_mode": { diff --git a/src/dagobert/data/lambda/orderbook_data.py b/src/dagobert/data/lambda/orderbook_data.py new file mode 100644 index 00000000..0ddbfb33 --- /dev/null +++ b/src/dagobert/data/lambda/orderbook_data.py @@ -0,0 +1,28 @@ +import os +import boto3 +from binance.client import Client +import pandas as pd +import time + + +def fetch_orderbook_data(): + s3 = boto3.resource("s3") + client = Client("", "") + + pairs = ["BTCUSDT", "ETHUSDT", "XRPUSDT", "BCHUSDT", "LTCUSDT"] + bucket_name = "dagobert-orderbook" + + for pair in pairs: + response = client.get_order_book(symbol=pair, limit=1000) + + df = pd.DataFrame(response) + df = df[["bids", "asks"]] + + name = f"{pair}_{int(time.time())}.csv" + + df.to_csv(name, compression="gzip") + + file_object = s3.Object(bucket_name, name) + file_object.upload_file(name) + + os.remove(name) diff --git a/src/dagobert/modelling/augmentation/tgan.py b/src/dagobert/modelling/augmentation/tgan.py new file mode 100644 index 00000000..5706658d --- /dev/null +++ b/src/dagobert/modelling/augmentation/tgan.py @@ -0,0 +1,58 @@ +""" +TimeGAN network, following the original implementation: +https://bitbucket.org/mvdschaar/mlforhealthlabpub/src/master/alg/timegan/tgan.py. +""" +from typing import List + +import torch +import torch.nn as nn +import torch.nn.functional as f +from torch.nn.utils import weight_norm + + +class Generator(nn.Module): + def __init__( + self, + input_size, + hidden_size, + num_layers, + dropout, + batch_first=True, + ): + super(Generator, self).__init__() + + # input/output: (batch, seq, feature) + # TODO: hparams? + self.lstm = nn.LSTM( + input_size=input_size, + hidden_size=hidden_size, + num_layers=num_layers, + dropout=dropout, + batch_first=batch_first, + ) + # TODO: size of output is ? + self.linear = nn.Linear(hidden_size, hidden_size) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + # lstm_out = (batch_size, seq_len, hidden_size) + lstm_out, _ = self.lstm(x) + y_pred = self.linear(lstm_out[:, -1]) + return y_pred + + +def rnn_cell(module_name): + # GRU + if module_name == "gru": + rnn_cell = tf.nn.rnn_cell.GRUCell(num_units=hidden_dim, activation=tf.nn.tanh) + # LSTM + elif module_name == "lstm": + rnn_cell = tf.contrib.rnn.BasicLSTMCell( + num_units=hidden_dim, activation=tf.nn.tanh + ) + # LSTM Layer Normalization + elif module_name == "lstmLN": + rnn_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( + num_units=hidden_dim, activation=tf.nn.tanh + ) + return rnn_cell From 0e7cc2556f6f68362a7e6f98f3ae06b6a53de246 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Thu, 7 Jan 2021 09:34:46 +0000 Subject: [PATCH 19/62] small changes --- config/rl_config.yaml | 1 - notebooks/modelling/rl_env.ipynb | 41 ++-------- src/dagobert/modelling/dl/data.py | 4 +- src/dagobert/modelling/rl/networks.py | 76 +++++++++++-------- src/dagobert/modelling/rl/ppo.py | 67 ++++++++++++---- src/dagobert/modelling/utils.py | 2 +- .../feature_creation/time_features.py | 4 +- 7 files changed, 108 insertions(+), 87 deletions(-) diff --git a/config/rl_config.yaml b/config/rl_config.yaml index 5e9315ec..e6488e7a 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -25,7 +25,6 @@ no_comet_logger: True seed: 42 batch_size: 256 - # -------------------------------------------------------------------------------------- # RL # -------------------------------------------------------------------------------------- diff --git a/notebooks/modelling/rl_env.ipynb b/notebooks/modelling/rl_env.ipynb index ea5164b2..e4e1b0f2 100644 --- a/notebooks/modelling/rl_env.ipynb +++ b/notebooks/modelling/rl_env.ipynb @@ -4,16 +4,7 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", @@ -37,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -46,7 +37,7 @@ "array([0.48192771, 0.26506024, 0.25301205])" ] }, - "execution_count": 43, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -70,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -79,7 +70,7 @@ "0.00020481927710843396" ] }, - "execution_count": 44, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -114,26 +105,6 @@ "p0 * (1 - mu)" ] }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1.0375" - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.dot(y1, w0)" - ] - }, { "cell_type": "code", "execution_count": 45, @@ -189,7 +160,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 10, "metadata": {}, "outputs": [ { diff --git a/src/dagobert/modelling/dl/data.py b/src/dagobert/modelling/dl/data.py index 91e96391..969a2a56 100644 --- a/src/dagobert/modelling/dl/data.py +++ b/src/dagobert/modelling/dl/data.py @@ -325,7 +325,7 @@ def _get_target(self, df: pd.DataFrame) -> np.array: """Returns the target values (y) to use for batching for a given DF.""" if self.simple_lookahead_y: # calculate simple moving average on the close original to smooth it - mean_bar_length = pd.Series(self.idx).diff().dt.seconds.mean() / 60 + mean_bar_length = pd.Series(self.idx).diff().dt.total_seconds().mean() / 60 window_size = int(np.round(self.simple_lookahead_y / mean_bar_length)) return ( df[npa.close_original] @@ -541,7 +541,7 @@ def plot(self) -> Tuple[Figure]: # plot the data columns, add date_diff_seconds to data coumns for sanity check df_data = df[self.cols_to_model[npa.anchor]].copy(deep=True) - date_diff_secs = pd.Series(df.index).diff().dt.seconds.values + date_diff_secs = pd.Series(df.index).diff().dt.total_seconds().values df_data.insert(0, "date_diff_secs", date_diff_secs) subplot_cols_n = 5 cols_n_to_plot = len(df_data.columns) diff --git a/src/dagobert/modelling/rl/networks.py b/src/dagobert/modelling/rl/networks.py index 1d651a6f..3f05e70c 100644 --- a/src/dagobert/modelling/rl/networks.py +++ b/src/dagobert/modelling/rl/networks.py @@ -16,16 +16,19 @@ class ActorCriticTCN(nn.Module): """ def __init__( - self, hparams: Namespace, n_actions: int, actor: bool = True + self, hparams: Namespace, n_actions: int, output_size: int, actor: bool = True ) -> nn.Module: """ Init a TCN like we do in `dagobert.modelling.dl.tcn`. Args: hparams: Hparam parsed and updated by PPO module in dagobert.modelling.rl. - n_actions: Number of units at the end of the network: different for actor/critic - actor: If True, we are using the network params in hparams for the actor net, - else we take the params for the critic. + n_actions: Dimension of actions which is one of the inputs to the networks + along with the state (history price tensor). + output_size: Number of units at the end of the network. This is + different for actor/critic. + actor: If True, we are using the network params in hparams for the actor + net, else we take the params for the critic. Returns: Initiated TCN with the appropriate size for actor or critic. @@ -47,16 +50,21 @@ def __init__( time_feat_n=hparams.time_feat_n, time_embed_dim=hparams.time_embed_dim, ) + self.linear_a = nn.Linear(n_actions + 1, num_channels[-1]) self.linear1 = nn.Linear(hparams.mini_series_length, 1) - self.linear2 = nn.Linear(num_channels[-1], n_actions) + # self.linear2 = nn.Linear(num_channels[-1] * 2, output_size) + self.linear2 = nn.Linear(num_channels[-1], output_size) - def forward(self, x): - y1 = self.tcn(*x) + def forward(self, state, past_pw): + s1 = self.tcn(*state) + a1 = torch.tanh(self.linear_a(past_pw)) if self.hparams.use_last_timepoint: - return self.linear2(y1[:, :, -1]) + s2 = s1[:, :, -1] else: - y2 = nn.functional.relu(self.linear1(y1).squeeze(-1)) - return self.linear2(y2) + s2 = torch.tanh(self.linear1(s1).squeeze(-1)) + # bring together the state and past_pw representations + # return self.linear2(torch.cat([s2, a1], dim=1)) + return self.linear2(s2) class ActorContinous(nn.Module): @@ -95,9 +103,9 @@ def __init__(self, actor_net): self.actor_net = actor_net self.inv_lin = InverseLinear() - def forward(self, states): + def forward(self, states, past_pw): # get params for Dirichlet, and drop batch dim if batch_size=1 - logits = self.actor_net(states) + logits = self.actor_net(states, past_pw) concentrations = self.inv_lin(logits).squeeze(0) pi = Dirichlet(concentrations) actions = pi.sample() @@ -117,21 +125,6 @@ def get_log_prob(self, pi: Dirichlet, actions: torch.Tensor): return pi.log_prob(actions) -class InverseLinear(nn.Module): - """ - Implements a layer specifically designed for Dirichlet distribution as final - layer, see here: https://openreview.net/pdf?id=BJeRg205Fm - """ - - def __init__(self): - super().__init__() - - def forward(self, x): - x[x < 0] = 1 / (1 - x[x < 0]) - x[x >= 0] = x[x >= 0] + 1 - return x - - class ActorCriticAgent(object): """ Actor Critic Agent used during trajectory collection. It returns a @@ -145,21 +138,27 @@ def __init__(self, actor_net: nn.Module, critic_net: nn.Module): self.critic_net = critic_net @torch.no_grad() - def __call__(self, state: torch.Tensor, device: str) -> Tuple: + def __call__( + self, state: torch.Tensor, past_pw: torch.Tensor, device: str + ) -> Tuple: """ Takes in the current state and returns the agents policy, sampled action, log probability of the action, and value of the given state Args: - states: current state of the environment + state: current state of the environment + past_pw: the previous portfolio value and weights device: the device used for the current batch Returns: torch dsitribution and randomly sampled action """ state = [s.to(device=device) for s in state] - pi, actions = self.actor_net(state) + past_pw = past_pw.to(device=device) + + pi, actions = self.actor_net(state, past_pw) log_p = self.get_log_prob(pi, actions) - value = self.critic_net(state) + + value = self.critic_net(state, past_pw) return pi, actions, log_p, value def get_log_prob(self, pi: Dirichlet, actions: torch.Tensor) -> torch.Tensor: @@ -173,3 +172,18 @@ def get_log_prob(self, pi: Dirichlet, actions: torch.Tensor) -> torch.Tensor: log probability of the acition under pi """ return self.actor_net.get_log_prob(pi, actions) + + +class InverseLinear(nn.Module): + """ + Implements a layer specifically designed for Dirichlet distribution as final + layer, see here: https://openreview.net/pdf?id=BJeRg205Fm + """ + + def __init__(self): + super().__init__() + + def forward(self, x): + x[x < 0] = 1 / (1 - x[x < 0]) + x[x >= 0] = x[x >= 0] + 1 + return x diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index 6b7b7372..c64780ee 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -109,13 +109,18 @@ def __init__(self, hparams: Namespace): # create env, init starting state and policy/value networks self.env = RLEnv(self.hparams) self.state = self.env.reset() - self.critic = ActorCriticTCN(self.hparams, 1, actor=False) - actor_tcn = ActorCriticTCN(self.hparams, self.env.action_space.shape[0]) - self.actor = ActorContinous(actor_tcn) + n_actions = self.env.action_space.shape[0] + self.critic = ActorCriticTCN( + self.hparams, n_actions=n_actions, output_size=1, actor=False + ) + self.actor = ActorContinous( + ActorCriticTCN(self.hparams, n_actions=n_actions, output_size=n_actions) + ) self.agent = ActorCriticAgent(self.actor, self.critic) # init batching and progress tracking vars self.batch_states = [] + self.batch_past_pw = [] self.batch_actions = [] self.batch_adv = [] self.batch_qvals = [] @@ -165,9 +170,16 @@ def calc_advantage(self, rewards: List[float], values: List[float]) -> List[floa for i in range(len(rewards) - 1) ] adv = self.discount_rewards(delta, self.hparams.gamma * self.hparams.lam) + return self.normalise_advantage(adv) + @staticmethod + def normalise_advantage(batch_adv: List[float]) -> List[float]: + """ + Normalise across all episodes within the epoch. Apparently this helps with + covergence. + """ # normalise advantage - adv = np.array(adv) + adv = np.array(batch_adv) adv = (adv - adv.mean()) / (adv.std() + np.finfo(float).eps) return list(adv) @@ -187,19 +199,21 @@ def train_batch( Tuple of Lists containing tensors for states, actions, log probs, qvals and advantage. """ - + past_pw = self._init_past_pw() for step in range(self.hparams.steps_per_epoch): - pi, action, log_prob, value = self.agent(self.state, self.device) + pi, action, log_prob, value = self.agent(self.state, past_pw, self.device) next_state, reward, done, info = self.env.step(action.cpu().numpy()) # drop first batch dim so dataloader later can resample them for backprop self.batch_states.append([s.squeeze(0) for s in self.state]) + self.batch_past_pw.append(past_pw.squeeze(0)) self.batch_actions.append(action) self.batch_logp.append(log_prob) self.ep_rewards.append(reward) self.ep_values.append(value.item()) self.state = next_state + past_pw = self._update_past_pw(info["portfolio_value"], action) epoch_end = step == (self.hparams.steps_per_epoch - 1) terminal = len(self.ep_rewards) == self.hparams.max_episode_length if epoch_end or done or terminal: @@ -220,16 +234,18 @@ def train_batch( if epoch_end: train_data = zip( self.batch_states, + self.batch_past_pw, self.batch_actions, self.batch_logp, self.batch_qvals, self.batch_adv, ) - for state, action, logp_old, qval, adv in train_data: - yield state, action, logp_old, qval, adv + for state, past_pw, action, logp_old, qval, adv in train_data: + yield state, past_pw, action, logp_old, qval, adv self.batch_states.clear() + self.batch_past_pw.clear() self.batch_actions.clear() self.batch_adv.clear() self.batch_logp.clear() @@ -242,8 +258,8 @@ def train_batch( self.epoch_rewards = 0 self.done_episodes = 0 - def actor_loss(self, state, action, logp_old, qval, adv) -> torch.Tensor: - pi, _ = self.actor(state) + def actor_loss(self, state, past_pw, action, logp_old, adv) -> torch.Tensor: + pi, _ = self.actor(state, past_pw) logp = self.actor.get_log_prob(pi, action) old_new_diff = logp - logp_old ratio = torch.exp(old_new_diff) @@ -255,8 +271,8 @@ def actor_loss(self, state, action, logp_old, qval, adv) -> torch.Tensor: loss_actor = -(torch.min(ratio * adv, clip_ratio * adv)).mean() return loss_actor, approx_kl - def critic_loss(self, state, action, logp_old, qval, adv) -> torch.Tensor: - value = self.critic(state) + def critic_loss(self, state, past_pw, qval) -> torch.Tensor: + value = self.critic(state, past_pw) loss_critic = (qval - value).pow(2).mean() return loss_critic @@ -274,7 +290,7 @@ def training_step( Returns: loss """ - state, action, old_logp, qval, adv = batch + state, past_pw, action, old_logp, qval, adv = batch self.log( "avg_ep_len", self.avg_ep_len, prog_bar=True, on_step=False, on_epoch=True ) @@ -289,7 +305,9 @@ def training_step( "avg_reward", self.avg_reward, prog_bar=True, on_step=False, on_epoch=True ) if optimizer_idx == 0: - loss_actor, approx_kl = self.actor_loss(state, action, old_logp, qval, adv) + loss_actor, approx_kl = self.actor_loss( + state, past_pw, action, old_logp, adv + ) self.log( "loss_actor", loss_actor, @@ -309,7 +327,7 @@ def training_step( return loss_actor elif optimizer_idx == 1: - loss_critic = self.critic_loss(state, action, old_logp, qval, adv) + loss_critic = self.critic_loss(state, past_pw, qval) self.log( "loss_critic", loss_critic, @@ -338,6 +356,25 @@ def train_dataloader(self) -> DataLoader: dataloader = DataLoader(dataset=dataset, batch_size=self.hparams.batch_size) return dataloader + def _init_past_pw(self) -> torch.Tensor: + """ + Init past portfolio value and weights to [1, 1, 0, ..., 0], since after the + portfolio is reset for each trajector p0=1, w0[0]=1 (USD relative price is + always 1). + """ + past_pw = torch.zeros(len(self.hparams.asset_names) + 2).to(self.device) + past_pw[:2] = 1 + return past_pw.unsqueeze(0) + + def _update_past_pw(self, p1: float, action: torch.Tensor) -> torch.Tensor: + """ + After each interaction, update the past weight / portfolio value vector as for + the next interaction the actor and critic networks take that in along with the + new state to form their outputs. + """ + p1 = torch.Tensor([p1]).to(self.device) + return torch.cat([p1, action]).unsqueeze(0) + @staticmethod def _pre_sanity_check(hparams: Namespace): # ensure we have the rl specific target column in the config diff --git a/src/dagobert/modelling/utils.py b/src/dagobert/modelling/utils.py index ac34c93c..e23befd4 100644 --- a/src/dagobert/modelling/utils.py +++ b/src/dagobert/modelling/utils.py @@ -409,7 +409,7 @@ def update_lookback( for _ in range(num_samples): s = np.random.randint(anchor_len) diffs.append(idx[s + mini_series_length] - idx[s]) - lookback = (pd.Series(diffs).dt.seconds / 3600).quantile(quantile) + lookback = (pd.Series(diffs).dt.total_seconds() / 3600).quantile(quantile) return lookback diff --git a/src/dagobert/preprocessing/feature_creation/time_features.py b/src/dagobert/preprocessing/feature_creation/time_features.py index 3cda9c46..479e82b1 100644 --- a/src/dagobert/preprocessing/feature_creation/time_features.py +++ b/src/dagobert/preprocessing/feature_creation/time_features.py @@ -105,11 +105,11 @@ def add_time_features(self) -> Optional: if self.add_time_to_label: btt_col = pd.to_datetime(self.df_bars[self.barrier_touch_time_col]) - time_to_label = (btt_col - date_col).dt.seconds.fillna(0) + time_to_label = (btt_col - date_col).dt.total_seconds().fillna(0) self.df_bars.insert(0, NTimeFeatures.time_to_label, time_to_label) if self.add_date_diff: - date_diff = date_col.diff().dt.seconds.fillna(0) + date_diff = date_col.diff().dt.total_seconds().fillna(0) self.df_bars.insert(0, NTimeFeatures.date_diff, date_diff) logger.info("Added time features.") From 530f1218c6f1aad8f7bb48e8864834a97865cdc1 Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Fri, 8 Jan 2021 21:05:44 +0000 Subject: [PATCH 20/62] init --- config/timegan_config.yaml | 49 +++ src/dagobert/modelling/augmentation/tgan.py | 58 ---- .../modelling/augmentation/timegan.py | 302 ++++++++++++++++++ 3 files changed, 351 insertions(+), 58 deletions(-) create mode 100644 config/timegan_config.yaml delete mode 100644 src/dagobert/modelling/augmentation/tgan.py create mode 100644 src/dagobert/modelling/augmentation/timegan.py diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml new file mode 100644 index 00000000..e1f8d377 --- /dev/null +++ b/config/timegan_config.yaml @@ -0,0 +1,49 @@ + +# -------------------------------------------------------------------------------------- +# LIGHTNING +# -------------------------------------------------------------------------------------- + +gpus: 0 + + +# -------------------------------------------------------------------------------------- +# RUN +# -------------------------------------------------------------------------------------- + +log_dir: logs +num_workers: 4 +exp_name: Time-GAN +tags: + - time_gan_test +no_comet_logger: True +seed: 42 +batch_size: 256 + + +# -------------------------------------------------------------------------------------- +# GAN +# -------------------------------------------------------------------------------------- + +# gru or lstm +rnn: lstm + +# -------------------------------------------------------------------------------------- +# MODEL +# -------------------------------------------------------------------------------------- + +dropout: 0.2 +num_layers: 2 +hidden_size: 50 +z_dim: 50 + +# -------------------------------------------------------------------------------------- +# DATA +# -------------------------------------------------------------------------------------- + +#data_dir: "C:/Work/dagobert/data/modelling" +#data_dir: "/home/daniel/dagobert_data/modelling" +data_dir: "C:/Users/u164428/Desktop/Dagobert/data/modelling" + +# -------------------------------------------------------------------------------------- +# PREPROCESSING +# -------------------------------------------------------------------------------------- diff --git a/src/dagobert/modelling/augmentation/tgan.py b/src/dagobert/modelling/augmentation/tgan.py deleted file mode 100644 index 5706658d..00000000 --- a/src/dagobert/modelling/augmentation/tgan.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -TimeGAN network, following the original implementation: -https://bitbucket.org/mvdschaar/mlforhealthlabpub/src/master/alg/timegan/tgan.py. -""" -from typing import List - -import torch -import torch.nn as nn -import torch.nn.functional as f -from torch.nn.utils import weight_norm - - -class Generator(nn.Module): - def __init__( - self, - input_size, - hidden_size, - num_layers, - dropout, - batch_first=True, - ): - super(Generator, self).__init__() - - # input/output: (batch, seq, feature) - # TODO: hparams? - self.lstm = nn.LSTM( - input_size=input_size, - hidden_size=hidden_size, - num_layers=num_layers, - dropout=dropout, - batch_first=batch_first, - ) - # TODO: size of output is ? - self.linear = nn.Linear(hidden_size, hidden_size) - self.sigmoid = nn.Sigmoid() - - def forward(self, x): - # lstm_out = (batch_size, seq_len, hidden_size) - lstm_out, _ = self.lstm(x) - y_pred = self.linear(lstm_out[:, -1]) - return y_pred - - -def rnn_cell(module_name): - # GRU - if module_name == "gru": - rnn_cell = tf.nn.rnn_cell.GRUCell(num_units=hidden_dim, activation=tf.nn.tanh) - # LSTM - elif module_name == "lstm": - rnn_cell = tf.contrib.rnn.BasicLSTMCell( - num_units=hidden_dim, activation=tf.nn.tanh - ) - # LSTM Layer Normalization - elif module_name == "lstmLN": - rnn_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( - num_units=hidden_dim, activation=tf.nn.tanh - ) - return rnn_cell diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py new file mode 100644 index 00000000..94a2b076 --- /dev/null +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -0,0 +1,302 @@ +""" +TimeGAN network, following the original implementation: +https://bitbucket.org/mvdschaar/mlforhealthlabpub/src/master/alg/timegan/tgan.py. +""" +from typing import List, Optional +from argparse import Namespace +import logging +from pathlib import Path + +import numpy as np +import pandas as pd +import matplotlib +from scipy.stats import spearmanr +from matplotlib.figure import Figure + +import torch +import torch.nn as nn +import torch.nn.functional as f +from torch.nn.utils import weight_norm + +from pytorch_lightning import LightningModule + + +class RnnBlock(nn.Module): + """ + Generate time-series data in latent space. + """ + + def __init__( + self, + input_size: int, + hidden_size: int, + num_layers: int, + dropout: float = 0.2, + batch_first: bool = True, + rnn: str = "lstm", + ): + super(RnnBlock, self).__init__() + + # input/output: (batch, seq, feature) + if rnn == "lstm": + self.rnn = nn.LSTM( + input_size=input_size, + hidden_size=hidden_size, + num_layers=num_layers, + dropout=dropout, + batch_first=batch_first, + ) + elif rnn == "gru": + self.rnn = nn.GRU( + input_size=input_size, + hidden_size=hidden_size, + num_layers=num_layers, + dropout=dropout, + batch_first=batch_first, + ) + self.tanh = nn.Tanh() + # TODO: whats' size of output in latent space + self.linear = nn.Linear(hidden_size, hidden_size) + self.sigmoid = nn.Sigmoid() + + def forward(self, z): + # lstm_out = (batch_size, seq_len, hidden_size) + rnn_out, _hidden = self.rnn(z) + output = self.tanh(rnn_out) + return output + + +class Supervisor(nn.Module): + """ + Generate next sequence using the previous sequence. + """ + + def __init__( + self, + input_size, + hidden_size, + num_layers, + dropout, + batch_first=True, + ): + super(Supervisor, self).__init__() + + # input/output: (batch, seq, feature) + # TODO: hparams? + self.lstm = nn.LSTM( + input_size=input_size, + hidden_size=hidden_size, + num_layers=num_layers, + dropout=dropout, + batch_first=batch_first, + ) + self.tanh = nn.Tanh() + # TODO: whats' size of output in latent space + self.linear = nn.Linear(hidden_size, hidden_size) + self.sigmoid = nn.Sigmoid() + + def forward(self, z): + # lstm_out = (batch_size, seq_len, hidden_size) + lstm_out, _hidden = self.lstm(z) + lstm_out = self.tanh(lstm_out) + synthetic_series = self.sigmoid(self.linear(lstm_out)) + return synthetic_series + + +class Discriminator(nn.Module): + """ + Discriminate the original and synthetic time-series data + """ + + def __init__( + self, + input_size, + hidden_size, + num_layers, + dropout, + batch_first=True, + ): + super(Discriminator, self).__init__() + + # input/output: (batch, seq, feature) + # TODO: hparams? + self.lstm = nn.LSTM( + input_size=input_size, + hidden_size=hidden_size, + num_layers=num_layers, + dropout=dropout, + batch_first=batch_first, + ) + self.tanh = nn.Tanh() + # TODO: whats' size of output in latent space + self.linear = nn.Linear(hidden_size, 1) + + def forward(self, z): + # lstm_out = (batch_size, seq_len, hidden_size) + lstm_out, _hidden = self.lstm(z) + lstm_out = self.tanh(lstm_out) + synthetic_series = self.linear(lstm_out) + return synthetic_series + + +class Embedder(nn.Module): + """ + Embedding network between original feature space to latent space. + """ + + def __init__( + self, + input_size, + hidden_size, + num_layers, + dropout, + batch_first=True, + ): + super(Embedder, self).__init__() + + # input/output: (batch, seq, feature) + # TODO: hparams? + self.lstm = nn.LSTM( + input_size=input_size, + hidden_size=hidden_size, + num_layers=num_layers, + dropout=dropout, + batch_first=batch_first, + ) + self.tanh = nn.Tanh() + # TODO: whats' size of output in latent space + self.linear = nn.Linear(hidden_size, hidden_size) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + # lstm_out = (batch_size, seq_len, hidden_size) + lstm_out, _hidden = self.lstm(x) + lstm_out = self.tanh(lstm_out) + embedded_real = self.sigmoid(self.linear(lstm_out)) + return embedded_real + + +class Recovery(nn.Module): + """ + Recovery network from latent space to original space. + """ + + def __init__( + self, + input_size, + hidden_size, + num_layers, + dropout, + batch_first=True, + ): + super(Recovery, self).__init__() + + # input/output: (batch, seq, feature) + # TODO: hparams? + self.lstm = nn.LSTM( + input_size=input_size, + hidden_size=hidden_size, + num_layers=num_layers, + dropout=dropout, + batch_first=batch_first, + ) + self.tanh = nn.Tanh() + # TODO: output size is same as original number of features + self.linear = nn.Linear(hidden_size, input_size) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + # lstm_out = (batch_size, seq_len, hidden_size) + lstm_out, _hidden = self.lstm(x) + lstm_out = self.tanh(lstm_out) + X_tilde = self.sigmoid(self.linear(lstm_out)) + return X_tilde + + +class TimeGANLightning(LightningModule): + """ + Lightning model made of RNN nets working together. + """ + + # ---------------------------------------------------------------------------------- + # INIT, FORWARD, OPTIMIZER SETUP + # ---------------------------------------------------------------------------------- + + def __init__(self, hparams: Namespace): + """ + Class constructor. + + Args: + hparams: Hyper-params passed in to the module. See the docs for more details + https://pytorch-lightning.readthedocs.io/en/latest/hyperparameters.html + and dagobert.modelling.dl.tcn_args for more information on the params. + """ + + # define main vars (other than model) + super().__init__() + # TODO: sanity check, define hparams + # lightning sets this to cuda too late for some of our setup to work + self.tgan_device = "cuda" if hparams.gpus > 0 else "cpu" + # TODO: check if real data is the right one, get data in + # TODO: any sanity checks on data, hypermparams + # TODO set up losses + self.real_logging = None + self.comet_logging = not self.hparams.no_comet_logger + + # get feature number of instruments + num_inputs = [len(cols) for dataset, cols in self.hparams.cols_to_model.items()] + all_inputs = sum(num_inputs) + + # components of network + self.generator = RnnBlock( + input_size=self.hparams.z_dim, + hidden_size=self.hparams.hidden_size, + num_layers=self.hparams.num_layers, + dropout=self.hparams.dropout, + batch_first=True, + rnn=self.hparams.rnn, + ) + self.embedder = RnnBlock( + input_size=all_inputs, + hidden_size=self.hparams.hidden_size, + num_layers=self.hparams.num_layers, + dropout=self.hparams.dropout, + batch_first=True, + rnn=self.hparams.rnn, + ) + self.supervisor = RnnBlock( + input_size=self.hparams.hidden_size, + hidden_size=self.hparams.hidden_size, + num_layers=self.hparams.num_layers, + dropout=self.hparams.dropout, + batch_first=True, + rnn=self.hparams.rnn, + ) + self.recovery = RnnBlock( + input_size=self.hparams.hidden_size, + hidden_size=self.hparams.hidden_size, + num_layers=self.hparams.num_layers, + dropout=self.hparams.dropout, + batch_first=True, + rnn=self.hparams.rnn, + ) + self.discriminator = RnnBlock( + input_size=self.hparams.hidden_size, + hidden_size=self.hparams.hidden_size, + num_layers=self.hparams.num_layers, + dropout=self.hparams.dropout, + batch_first=True, + rnn=self.hparams.rnn, + ) + # final linear layers + self.generator_linear = nn.Linear( + self.hparams.hidden_size, self.hparams.hidden_size + ) + self.embedder_linear = nn.Linear( + self.hparams.hidden_size, self.hparams.hidden_size + ) + self.supervisor_linear = nn.Linear( + self.hparams.hidden_size, self.hparams.hidden_size + ) + self.recovery_linear = nn.Linear(self.hparams.hidden_size, all_inputs) + self.discriminator_linear = nn.Linear(self.hparams.hidden_size, 1) From bf55dc4fcc67a5bfe14c3ba9f95b0b671c7466bb Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Sat, 9 Jan 2021 15:34:41 +0000 Subject: [PATCH 21/62] fixing inverse linear implementation, changing past weights to past dirichlet concentrations in what we pass to the network along with the state --- notebooks/notes/rl_episodes.xlsx | Bin 0 -> 10100 bytes src/dagobert/modelling/rl/networks.py | 20 +++++++++++--------- src/dagobert/modelling/rl/ppo.py | 16 +++++++++------- 3 files changed, 20 insertions(+), 16 deletions(-) create mode 100644 notebooks/notes/rl_episodes.xlsx diff --git a/notebooks/notes/rl_episodes.xlsx b/notebooks/notes/rl_episodes.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..db08bae49194fc64e5a634d6427db76c96b6e647 GIT binary patch literal 10100 zcmeHtgf)@BMZ+ zd%wTn-kxW=o|&%qRCU*R>#aVgRODe`aRBfDAOHZM1ehLWTN^?F0CBJY05$*!tuNu^ z=x*WYZlvMuY~l8X&C9`_A|Do-Ar}A*ssF#@zxWAMCXFcdabQSY%RESIvdS&iiUMCA z27bX}QWfd$PaG^a)6cQ7dCrNr#}LoMwdMVc^=;9I^K{g@*3qFcFnq904J8T$?$$IU z=4BuFazNXKPnh7Ur+b`>LoE7&(8wspG82&I($uHMEx9HpS4m`zizgDWkv;!K3v*?# z1*KfClpnm%v#O-EjLA1#HifYu=OL)3-JT`Lc+ zwGpd+jwpySaPkL0#V)EIuZq+0+xz}kxCII$C%qI)_q1!+B>gMI=B+ugapp<{R1Pew zMPG6xmHBy@0{u$BL(P2M%75TsnuMyFW8>)@|H+b-J0K~#x%!OfXds*+`ju-P zjdjEMP5KLM?xnw8J@@HIws$`-VqXRLWGFC-ymT3&Q2!)Jsu;1p3k(3@4UsZ*$dmE1 zXZLh+wKH{cvinW9DmC9YWy)c^|1|v^;6-0U!10quyj!`)SW1iwtFE#_mKp&=iHQZ&$za3$x?VsKu6pp{vt3N| z46A6RjntMKdb!>yp%Naj|Tb=hr2%q#Kw3HMr1*4a%^H? zY9Wo#?NNJe-9V0?$u#Xf36hoZxqTf*{uM1`f=q-A25q#j1cwoE@OF@WL-4Gi45Nry zKNF{&iv6pc_k_iCk_ zgDMQ+@CSudtWHKqE{BMXMx2vDH=47=V{sPWlHlfsm?`hc+@(Qeu9dgb8T_Hi3I=_r zR*h?dqU^4wf-WVpSu6(0WS1tur`Y|^ss0>(E;u7tZY`U9KEcP+tc-X%0deqQ0mpW= z$g@T|*tt7}K+_0IJnJDh6ka9zBb))%^<%vFYoP38)xJA zf1a$9L%8=(c7jBbQqLh*S|Jif1OP%o*!hQw{gtEtRx~Kc0u#dV|L&twO;Ns|1ET}^ zDU8E2(*qZC&Yhj=Kyx1*I8@KLKu5{tcdgdx=ogRcmuTxS*q1%b zrIA>$@38HU1>u}V&qrZ^riWEzfzoi;SO44rPm^LY}PrrLb?PJ4F~+n%ti;!EGx)y-o9Y z&0YL{n6Yc&jAi8F*m85|%ghR7r2pwM7N#^UIgs^`Ed>BT0IBh>3Ej=w!ouB+{rBR< zZ!>$Ao}Jq|56*|uogwOh>^1jkhnPsf)S+X?J+TH(faX%G1`SrFE><@K|O*X}`2-eut)%?EIx*HFE zUp;s5^!B;+qh*PV<+EVOX@OtJikkq4Q{&{B=Ju3Piy|oGnM=nN>2+8IQTtX!{Qb50 zyTqGi-*@qg%Xjz_n|F8XOr2+J33HkR--?H}%o83qK7>_dedRbzR$sdWcbpJG#~U*R&1ID&m>}L zs&xr6_(n4xNLSWL^Pbleb1y$CAU`SuUnt8>f0Nd9)4EkouIt^uaF}gRB1g`0YDAS) zyr}6Vtn^cG$&Qz!EAtUkD6ri65KQsx8JWc7iaYP%cO13LIk)wpliG z(;nMl@|^9?j~%^09aLWODqY+d9>foeXNhO@%@x7l6&-{C6cRVKu|&LJFE6a{H{3Pbwt9Qbs*EE5HX1xo#cszSDP#&?12;DFL{e4chaRk-hxXPtSU&?a1cJIrpk<)Ai&C}Z81*$XcH~Oh-!Nsn zh%#kM02l-4qH`Ps?pIMQDtg)8ns6<%g^cp&vZ#dQ?KU?~ip`aOV1Dx@t10SkAG8(a zRk5omGKatdA2gwfJKKsx17wXT74#r~+jz(nK~-~0A!aRWZq+1mvH+}HDxzRGXT+FN z@f94gaBqk;o=gd(Q1^d{QAnlt_SU0wWT%|KxLsaMWk9s#Jtv=xl)Jabk^DC$+gs^w zztljA*)?sDOph}BnLP#MT93!rKFoehz$F{37^(8kpfddSoTwoREMj4i$RW!dLJkR( zvLP_=pf?*NgeW;SQrF}C0@BhQ%a8OX^hH7FqYez%2ITRsW7M%aQS%Z*6f`x=4rGtc zW0;NDfKztZyvt1&dLoPGBh<|vQ9~ulSZSX^*ZSpg1jNiK)}jg?7@jymJg3d!ybk>E zgWe#J5D`#qV<+&CCqLlhp7Qo7fxYB)AO+kjpd^wXH>BZKp^*}}n)d_7_wRswV#7W- z;k9{X;cSDB63E>Wk|;d&(qgMcdzS&UR1@|`yPze*!5z8y0b_QFKw|-#_pOa(mNm=+ z#@ga6%V3Ay_$5cIfF&B9(4$30p_+5w*VV0rQ21*b@Q{KSHGlMQI05aZZpsQ0}EL{o!e z*;x!11dtB1KoiOQa=BbU@saU4ehFX}C1{KSGR6m4l(1uQqE;BeDg3o=p%_{4*gGhJ zRbWYN8@ftqMWMZD<@4Bga@xv3 zYJcwJ5uIq^Rs3wgl@0yEZvi;qs_2gFe$?U1V%MopvBgjoqsn5}s-Np7MLLpygeT(- zQ^1^{xx_l&GCTU2W#18$oy~lpgXhsCI(*(<4X&--a7$J^qK`#K&Bl4r z+4%h?nX0Rj*}1_aHMSu!+36xOPJ}I6#gA3s6k(GsMkpJ`cMTnUm>R+U2-Wf_KQ)!} z!OKgA_hzAu`2X%XHn(6Z`e#q?|~C2!0ZCww3pNKK#h`q>L@9>&hdM9?tVJik@ikH4Y@ZV#i&!|OgJn0M;gsk zioYEn1GlfTOj0za1)}3EGRuSf!bd+>Oot)hmme^$VOFqyF zUGx{3Uh0o{Ur0!UmecTtbA)}KPIk}3x`h2Nc5eHOYFiqZjQD&~y=Hby7i9uM zyFZ0`mEF%F-k%_fx#LlKGy;Y;)Th|%eOq(byFj8d2-hyuWjEJKe%eCwA^p?q&v1)o z(H;bz0;}a2BPS=$ISNaO=PeO!5##Z8M{IJHfa`6pTo5ygBxlj>cc zC89|y+9blaW`#M|6>1_bjxat2g-_8+xEj-{vDHhMhL7}GPz2APjV?QU>MOOI5%x8w zHi7hm!!NvSYjDJxP4z&HU-HFE(#|OJO-@(;9%;I2b{ffdk&NeeW-P(-KLx@riNv~) zP&Ux+$=>wyB*?q&OtwT3aK#II{D@Hza+_C30QdXRp;1c3dj`qk`^L@*N(xJmsMNk# zY(i}8eYB)%hTF8wNTLtFvlt`&+#a3YL5}YpSQBM2FVy&>VKjnEP~f~QdA^N6-937c zz^6&XO=sMMYh=4+LYvAsne;%ek{LXvd6{JlWsy*B_npx-707IHeVXJ@W98%$=!M4+ zfw@w6OC-4@_l9pPOXFSk2QW# zPTY(c_!d!1|KK@H<_YBn4;QO{dY>ea$zZW9393W9CTFpGXPvXy?VM2Q_T5|4&2Z-C z`M%t8%24E!#h6|Cx!&OA)uotidb=-ZnC|#&Cr7#c^A;Cn($NUqK%2Pdd)W4gXatVZ z1=Uvi7F(z>s+)|LX)KFL)T|pWbg$?+=VQlF^du~ny0an`E;YQk=yJ^p!`?FEejKm5 zS338KjhRxiYN>3^C_{!zxb=K@E8fu)K3=Yx-uK(l@YGF(XPBEJFj251R((sL zYJ?_^V`vp@IG3(7;jS~GvX`2t_u6F(iNgUG*K6x)RjOkvOl)2()RH7=+YGm)KD|A< z8fh&3^}9r_vUDgFQ+6(jSaIS+QiZJ4(e8CI9FrV@_-ibJqbc{ggRSXd z)11mJrY*#-TzfI-n08WB^vxhTI`#2>LC4z$TVN9tHMiX@_6|YctQUL>^|U^IzIw9< zp=^%hji*TpA%1O`WRieEqS7$@i}*C3q<&T-R{vyF?g^572WIU63=zGJ3{htCOcxAy zL9iER54BC`5`k)t5;?P&u=Q+vR-8#p*D@73J}R-gR@6ZEUfQhG0fTDHwglZ&uBiqTP7^O&-JtFP*fJWGL#OFp9Zd-U)(-| z=C2ye?^i%d$fdTK*uR)@a;b=oUOYscKr6&>2)xqcW&UOtCA`+$k${SMQj`#BhN`aV z<4^!()5q>l$G>}8+(<*3w$eFW=a=QvB1ezWmsvJe^h{EHU*8hf7d5k>Cllg{lIzif zg0F=CRddJPmc_JcWqPe%r8YUaGq63Ul1VSlX^pL;C@jj24*XE7AhvnCrfqjTjm$MF zBt=%5hhF4D8{(KPP8O^*juMc;s`DP_j6FV6x%*@_dGL^a8;Yw27A(IZ&RIr+I^8p- z8`Yh4K}KllUQ+5(!BKMXBU8ex#va`-t9O~Wz{>f;n(R}--kZQP5;W1(d%AUc9DPfX z>3f@j{LE)N3lCoXA4?%@)c|83p2`)^cDs;RF#_RN zxyjKf@KV`xbzIIFqfOq84K{S(ujyWBk;t;=LT$HDvQ7II9++itC<~p5<-t#U<-@>I z-l7-O$)SA})*ZahdORm>iFy*~vKtBOdPrZRDF(w$05-_IKeBi8{I#$Lie0q%*Sp=S zS5goI9qkLb(M%A+Og?`4DEcHX2Gx%)M(J8ChUQ4*(Jp*Ks@mc((U{RteHi)eoHzP= zAoi3xAnxsLZXsPrV`?bw2`#LOhotkP9fs%}CX{pr+(U>e`||He6?>(LG2ob0efJ%}Cmj3~tR*MBk}ED8o3`@FwZJgJOp~ z1m3rPG70A30947q?yw;r2e{i}NpzTbsCAn5(A$M*CiKJpeBB|z1W-6{u&Ti~xPOcXJ z%FzEWfC_nZ(FsQKeH=IgD{x&BLq3_snG)g)pbu+7Fg+3L`Z7n2#^+%)t%nPljddPh z5h&SPwik`YItsJMH4=S6=aV}~@hsAa){eo`3rHQ6sk^LlzQ%ergVv?vm_iw|v)Z&x zNyG@sV>-@afyz{}KEn6*Rw4`i$|(F^YAA+#b0Zd`_r^k!B29 z!%Le7g)7PQAQV2jD)p@^Ro`iS6*OlD&Q`z460^OF(QmeIZKwAz=Nrk*``N3AVXw0Gu>zrKEh4 zmRi0;9#^w5f6Zk7BH=pEv4uU6)f&%yDjM`qMQwYEGX?PFbm}e+Lvg>RGC68h0qM<& zi>CrgXoyuaz+!}ag-KkRl=k^j4azwgBOH}JPI z6~c+X?92ES_-l6hPiQ+N#qmq7`d9GZlV^WI0RTOu-@*T%RNAk0e$A--X(zrz1M1^yLIPW2b~za~T#c{s@E0su&mPXI)(muY_c^?xtK!fpTn literal 0 HcmV?d00001 diff --git a/src/dagobert/modelling/rl/networks.py b/src/dagobert/modelling/rl/networks.py index 3f05e70c..e52328ad 100644 --- a/src/dagobert/modelling/rl/networks.py +++ b/src/dagobert/modelling/rl/networks.py @@ -52,8 +52,8 @@ def __init__( ) self.linear_a = nn.Linear(n_actions + 1, num_channels[-1]) self.linear1 = nn.Linear(hparams.mini_series_length, 1) - # self.linear2 = nn.Linear(num_channels[-1] * 2, output_size) - self.linear2 = nn.Linear(num_channels[-1], output_size) + self.linear2 = nn.Linear(num_channels[-1] * 2, output_size) + # self.linear2 = nn.Linear(num_channels[-1], output_size) def forward(self, state, past_pw): s1 = self.tcn(*state) @@ -63,8 +63,8 @@ def forward(self, state, past_pw): else: s2 = torch.tanh(self.linear1(s1).squeeze(-1)) # bring together the state and past_pw representations - # return self.linear2(torch.cat([s2, a1], dim=1)) - return self.linear2(s2) + return self.linear2(torch.cat([s2, a1], dim=1)) + # return self.linear2(s2) class ActorContinous(nn.Module): @@ -109,7 +109,7 @@ def forward(self, states, past_pw): concentrations = self.inv_lin(logits).squeeze(0) pi = Dirichlet(concentrations) actions = pi.sample() - return pi, actions + return pi, actions, logits def get_log_prob(self, pi: Dirichlet, actions: torch.Tensor): """ @@ -149,17 +149,19 @@ def __call__( past_pw: the previous portfolio value and weights device: the device used for the current batch Returns: - torch dsitribution and randomly sampled action + torch distribution and randomly sampled action, the logits that went into + the Dirichlet dist, the probability of the sample, the estimated reward + for this action by the critic """ state = [s.to(device=device) for s in state] past_pw = past_pw.to(device=device) - pi, actions = self.actor_net(state, past_pw) + pi, actions, actor_logits = self.actor_net(state, past_pw) log_p = self.get_log_prob(pi, actions) value = self.critic_net(state, past_pw) - return pi, actions, log_p, value + return pi, actions, actor_logits, log_p, value def get_log_prob(self, pi: Dirichlet, actions: torch.Tensor) -> torch.Tensor: """ @@ -184,6 +186,6 @@ def __init__(self): super().__init__() def forward(self, x): - x[x < 0] = 1 / (1 - x[x < 0]) x[x >= 0] = x[x >= 0] + 1 + x[x < 0] = 1 / (1 - x[x < 0]) return x diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index c64780ee..8bf2f078 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -201,7 +201,9 @@ def train_batch( """ past_pw = self._init_past_pw() for step in range(self.hparams.steps_per_epoch): - pi, action, log_prob, value = self.agent(self.state, past_pw, self.device) + pi, action, actor_logits, log_prob, value = self.agent( + self.state, past_pw, self.device + ) next_state, reward, done, info = self.env.step(action.cpu().numpy()) # drop first batch dim so dataloader later can resample them for backprop @@ -213,7 +215,7 @@ def train_batch( self.ep_values.append(value.item()) self.state = next_state - past_pw = self._update_past_pw(info["portfolio_value"], action) + past_pw = self._update_past_pw(info["portfolio_value"], actor_logits) epoch_end = step == (self.hparams.steps_per_epoch - 1) terminal = len(self.ep_rewards) == self.hparams.max_episode_length if epoch_end or done or terminal: @@ -238,7 +240,7 @@ def train_batch( self.batch_actions, self.batch_logp, self.batch_qvals, - self.batch_adv, + PPO.normalise_advantage(self.batch_adv), ) for state, past_pw, action, logp_old, qval, adv in train_data: @@ -259,7 +261,7 @@ def train_batch( self.done_episodes = 0 def actor_loss(self, state, past_pw, action, logp_old, adv) -> torch.Tensor: - pi, _ = self.actor(state, past_pw) + pi, _, _ = self.actor(state, past_pw) logp = self.actor.get_log_prob(pi, action) old_new_diff = logp - logp_old ratio = torch.exp(old_new_diff) @@ -362,8 +364,8 @@ def _init_past_pw(self) -> torch.Tensor: portfolio is reset for each trajector p0=1, w0[0]=1 (USD relative price is always 1). """ - past_pw = torch.zeros(len(self.hparams.asset_names) + 2).to(self.device) - past_pw[:2] = 1 + past_pw = torch.ones(len(self.hparams.asset_names) + 2).to(self.device) + # past_pw[:2] = 1 return past_pw.unsqueeze(0) def _update_past_pw(self, p1: float, action: torch.Tensor) -> torch.Tensor: @@ -373,7 +375,7 @@ def _update_past_pw(self, p1: float, action: torch.Tensor) -> torch.Tensor: new state to form their outputs. """ p1 = torch.Tensor([p1]).to(self.device) - return torch.cat([p1, action]).unsqueeze(0) + return torch.cat([p1.unsqueeze(0), action], -1) @staticmethod def _pre_sanity_check(hparams: Namespace): From e8d9e8f97c53c76b5f53c293f2a7f84971a9eba0 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Mon, 11 Jan 2021 11:30:21 +0000 Subject: [PATCH 22/62] major rewrite of the experience collection to make it parallel - doesn't work yet --- src/dagobert/modelling/rl/__init__.py | 1 + src/dagobert/modelling/rl/networks.py | 4 +- src/dagobert/modelling/rl/ppo.py | 259 +++++++++++++------------- src/dagobert/modelling/rl/utils.py | 203 +++++++++++++++++++- 4 files changed, 330 insertions(+), 137 deletions(-) diff --git a/src/dagobert/modelling/rl/__init__.py b/src/dagobert/modelling/rl/__init__.py index d4900664..72758a44 100644 --- a/src/dagobert/modelling/rl/__init__.py +++ b/src/dagobert/modelling/rl/__init__.py @@ -1,3 +1,4 @@ from .environment import RLData, RLPortfolio, RLEnv from .networks import ActorCriticTCN, ActorCriticAgent, ActorContinous from .ppo import PPO +from .utils import ExperienceBuffer, ParallelExperiences diff --git a/src/dagobert/modelling/rl/networks.py b/src/dagobert/modelling/rl/networks.py index e52328ad..4082a7cf 100644 --- a/src/dagobert/modelling/rl/networks.py +++ b/src/dagobert/modelling/rl/networks.py @@ -62,8 +62,8 @@ def forward(self, state, past_pw): s2 = s1[:, :, -1] else: s2 = torch.tanh(self.linear1(s1).squeeze(-1)) - # bring together the state and past_pw representations - return self.linear2(torch.cat([s2, a1], dim=1)) + # bring together the state and past_pw representations make residual connection + return past_pw[:, 1:] + self.linear2(torch.cat([s2, a1], dim=1)) # return self.linear2(s2) diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index 8bf2f078..e90241e9 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -9,6 +9,7 @@ from typing import List, Tuple from argparse import Namespace + import gym import torch import numpy as np @@ -27,6 +28,8 @@ ActorCriticTCN, ActorContinous, ActorCriticAgent, + ParallelExperiences, + ExperienceBuffer, ) from dagobert.modelling.dl import ( ExperienceSourceDataset, @@ -70,6 +73,7 @@ def run_rl(args): # define trainer and and lightning module args.multiprocessing = True if args.gpus != 1 else False + args.num_workers = 1 if args.num_workers == 0 else args.num_workers trainer = Trainer.from_argparse_args( args, logger=tcn_loggers, @@ -106,10 +110,9 @@ def __init__(self, hparams: Namespace): hparams = Preprocessing().preprocess_train_dfs(hparams) self.hparams = TCNLightning._check_mini_series_lookback(hparams) - # create env, init starting state and policy/value networks + # create env, policy/value networks and experience buffer + tracking vars self.env = RLEnv(self.hparams) - self.state = self.env.reset() - n_actions = self.env.action_space.shape[0] + n_actions = self.envs.action_space.shape[0] self.critic = ActorCriticTCN( self.hparams, n_actions=n_actions, output_size=1, actor=False ) @@ -117,27 +120,73 @@ def __init__(self, hparams: Namespace): ActorCriticTCN(self.hparams, n_actions=n_actions, output_size=n_actions) ) self.agent = ActorCriticAgent(self.actor, self.critic) - - # init batching and progress tracking vars - self.batch_states = [] - self.batch_past_pw = [] - self.batch_actions = [] - self.batch_adv = [] - self.batch_qvals = [] - self.batch_logp = [] - - self.ep_rewards = [] - self.ep_values = [] - - self.done_episodes = 0 - self.epoch_rewards = 0 + self.buffer = ExperienceBuffer() self.avg_ep_reward = 0 self.avg_ep_len = 0 self.avg_reward = 0 - def discount_rewards(self, rewards: List[float], discount: float) -> List[float]: + # ---------------------------------------------------------------------------------- + # EXPERIENCE COLLECTION FOR TRAIN DATALOADER + # ---------------------------------------------------------------------------------- + + def train_dataloader(self) -> DataLoader: + """Initialize the Experience Buffer dataset used for retrieving experiences""" + dataset = ExperienceSourceDataset(self.generate_experience_buffer) + dataloader = DataLoader(dataset=dataset, batch_size=self.hparams.batch_size) + return dataloader + + def generate_experience_buffer( + self, + ) -> Tuple[ + List[torch.Tensor], + List[torch.Tensor], + List[torch.Tensor], + List[torch.Tensor], + List[torch.Tensor], + ]: + """ + Logic for generating trajectory data to train policy and value networks. This + is done leveraging the `Process` and `Queue` classes of the `multiprocessing` + module of Python. We'll launch hparams.num_workers number of new processes, + each replicating the environment in memory, so this can get expensive where + `num_assets` in the portfolio is large. + + Yield: + Tuple of Lists containing tensors for states, actions, log probs, qvals and + advantage. + """ + # setup workers and pass them the env, agent, vars to work with + max_worker_steps = int(self.hparams.steps_per_epoch / self.hparams.num_workers) + parallel_experiences = ParallelExperiences() + for i in range(self.hparams.num_workers): + args = ( + deepcopy(self.env), + self.agent, + self.device, + max_worker_steps, + self.hparams.max_episode_length, + len(self.hparams.asset_names), + self.hparams.gamma, + self.hparams.lam, + ) + parallel_experiences.create_worker(args) + + # collect experiences in parallel, then merge them and create dataset + self.buffer.merge_buffers(parallel_experiences.collect_experiences()) + self.buffer.yield_dataset() # this will yield a dataset for dataloader + self.buffer.clear_buffer() + + # finally update metrics we log + self.avg_ep_reward = self.buffer.epoch_rewards / self.buffer.done_episodes + self.avg_reward = self.buffer.epoch_rewards / self.hparams.steps_per_epoch + self.avg_ep_len = self.hparams.steps_per_epoch / self.buffer.done_episodes + + @staticmethod + def discount_rewards(rewards: List[float], discount: float) -> List[float]: """ - Calculate the discounted rewards of all rewards in list. + Calculate the discounted rewards of all rewards in list. This is used as + Q-values for training the critic network so it becomes better approximating + the real reward we can expect from a given state. Args: rewards: list of rewards/advantages @@ -153,24 +202,39 @@ def discount_rewards(self, rewards: List[float], discount: float) -> List[float] cumul_reward.append(sum_r) return list(reversed(cumul_reward)) - def calc_advantage(self, rewards: List[float], values: List[float]) -> List[float]: + @staticmethod + def calc_advantage( + rewards: List[float], + values: List[float], + gamma: float = 0.99, + lam: float = 0.95, + norm: bool = True, + ) -> List[float]: """ Calculate the advantage given rewards, state values, and last value of episode. + The advantage compares how much better the actor did compared to what the + critic thought the given state is worth in reward. Args: rewards: list of episode rewards values: list of state values from critic + gamma: Gamma for discounting the long-term rewards. + lam: Lambda for the GAE advantage calculation. + norm: If True, the advantages are normalised to mean=0, std=1. Returns: - list of advantages + List of advantages. """ # GAE delta = [ - rewards[i] + self.hparams.gamma * values[i + 1] - values[i] + rewards[i] + gamma * values[i + 1] - values[i] for i in range(len(rewards) - 1) ] - adv = self.discount_rewards(delta, self.hparams.gamma * self.hparams.lam) - return self.normalise_advantage(adv) + adv = PPO.discount_rewards(delta, gamma * lam) + if norm: + return PPO.normalise_advantage(adv) + else: + return adv @staticmethod def normalise_advantage(batch_adv: List[float]) -> List[float]: @@ -183,82 +247,30 @@ def normalise_advantage(batch_adv: List[float]) -> List[float]: adv = (adv - adv.mean()) / (adv.std() + np.finfo(float).eps) return list(adv) - def train_batch( - self, - ) -> Tuple[ - List[torch.Tensor], - List[torch.Tensor], - List[torch.Tensor], - List[torch.Tensor], - List[torch.Tensor], - ]: + @staticmethod + def _init_past_pw(asset_num, device) -> torch.Tensor: """ - Logic for generating trajectory data to train policy and value network + Init past portfolio value and weights to [1, 1, 0, ..., 0], since after the + portfolio is reset for each trajector p0=1, w0[0]=1 (USD relative price is + always 1). + """ + past_pw = torch.ones(asset_num + 2).to(device) + # past_pw[:2] = 1 + return past_pw.unsqueeze(0) - Yield: - Tuple of Lists containing tensors for states, actions, log probs, qvals and - advantage. + @staticmethod + def _update_past_pw(p1: float, action: torch.Tensor, device) -> torch.Tensor: """ - past_pw = self._init_past_pw() - for step in range(self.hparams.steps_per_epoch): - pi, action, actor_logits, log_prob, value = self.agent( - self.state, past_pw, self.device - ) - next_state, reward, done, info = self.env.step(action.cpu().numpy()) - - # drop first batch dim so dataloader later can resample them for backprop - self.batch_states.append([s.squeeze(0) for s in self.state]) - self.batch_past_pw.append(past_pw.squeeze(0)) - self.batch_actions.append(action) - self.batch_logp.append(log_prob) - self.ep_rewards.append(reward) - self.ep_values.append(value.item()) - self.state = next_state - - past_pw = self._update_past_pw(info["portfolio_value"], actor_logits) - epoch_end = step == (self.hparams.steps_per_epoch - 1) - terminal = len(self.ep_rewards) == self.hparams.max_episode_length - if epoch_end or done or terminal: - # discounted cumulative reward - self.batch_qvals += self.discount_rewards( - self.ep_rewards, self.hparams.gamma - )[:-1] - # advantage - self.batch_adv += self.calc_advantage(self.ep_rewards, self.ep_values) - # logs - self.done_episodes += 1 - self.epoch_rewards += np.sum(self.ep_rewards) - # reset params - self.ep_rewards = [] - self.ep_values = [] - self.state = self.env.reset() - - if epoch_end: - train_data = zip( - self.batch_states, - self.batch_past_pw, - self.batch_actions, - self.batch_logp, - self.batch_qvals, - PPO.normalise_advantage(self.batch_adv), - ) - - for state, past_pw, action, logp_old, qval, adv in train_data: - yield state, past_pw, action, logp_old, qval, adv - - self.batch_states.clear() - self.batch_past_pw.clear() - self.batch_actions.clear() - self.batch_adv.clear() - self.batch_logp.clear() - self.batch_qvals.clear() - - self.avg_ep_reward = self.epoch_rewards / self.done_episodes - self.avg_reward = self.epoch_rewards / self.hparams.steps_per_epoch - self.avg_ep_len = self.hparams.steps_per_epoch / self.done_episodes - - self.epoch_rewards = 0 - self.done_episodes = 0 + After each interaction, update the past weight / portfolio value vector as for + the next interaction the actor and critic networks take that in along with the + new state to form their outputs. + """ + p1 = torch.Tensor([p1]).to(device) + return torch.cat([p1.unsqueeze(0), action], -1) + + # ---------------------------------------------------------------------------------- + # LOSSES AND OPTIMIZERS + # ---------------------------------------------------------------------------------- def actor_loss(self, state, past_pw, action, logp_old, adv) -> torch.Tensor: pi, _, _ = self.actor(state, past_pw) @@ -278,6 +290,22 @@ def critic_loss(self, state, past_pw, qval) -> torch.Tensor: loss_critic = (qval - value).pow(2).mean() return loss_critic + def configure_optimizers(self) -> List[optim.Optimizer]: + """ Initialize Adam optimizer""" + optimizer_actor = optim.Adam(self.actor.parameters(), lr=self.hparams.lr_actor) + optimizer_critic = optim.Adam( + self.critic.parameters(), lr=self.hparams.lr_critic + ) + return optimizer_actor, optimizer_critic + + def optimizer_step(self, *args, **kwargs): + for _ in range(self.hparams.n_optim_iters): + super().optimizer_step(*args, **kwargs) + + # ---------------------------------------------------------------------------------- + # TRAINING PHASE + # ---------------------------------------------------------------------------------- + def training_step( self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx, optimizer_idx ): @@ -340,43 +368,6 @@ def training_step( ) return loss_critic - def configure_optimizers(self) -> List[optim.Optimizer]: - """ Initialize Adam optimizer""" - optimizer_actor = optim.Adam(self.actor.parameters(), lr=self.hparams.lr_actor) - optimizer_critic = optim.Adam( - self.critic.parameters(), lr=self.hparams.lr_critic - ) - return optimizer_actor, optimizer_critic - - def optimizer_step(self, *args, **kwargs): - for _ in range(self.hparams.n_optim_iters): - super().optimizer_step(*args, **kwargs) - - def train_dataloader(self) -> DataLoader: - """Initialize the Replay Buffer dataset used for retrieving experiences""" - dataset = ExperienceSourceDataset(self.train_batch) - dataloader = DataLoader(dataset=dataset, batch_size=self.hparams.batch_size) - return dataloader - - def _init_past_pw(self) -> torch.Tensor: - """ - Init past portfolio value and weights to [1, 1, 0, ..., 0], since after the - portfolio is reset for each trajector p0=1, w0[0]=1 (USD relative price is - always 1). - """ - past_pw = torch.ones(len(self.hparams.asset_names) + 2).to(self.device) - # past_pw[:2] = 1 - return past_pw.unsqueeze(0) - - def _update_past_pw(self, p1: float, action: torch.Tensor) -> torch.Tensor: - """ - After each interaction, update the past weight / portfolio value vector as for - the next interaction the actor and critic networks take that in along with the - new state to form their outputs. - """ - p1 = torch.Tensor([p1]).to(self.device) - return torch.cat([p1.unsqueeze(0), action], -1) - @staticmethod def _pre_sanity_check(hparams: Namespace): # ensure we have the rl specific target column in the config diff --git a/src/dagobert/modelling/rl/utils.py b/src/dagobert/modelling/rl/utils.py index f2651087..25520783 100644 --- a/src/dagobert/modelling/rl/utils.py +++ b/src/dagobert/modelling/rl/utils.py @@ -1,10 +1,211 @@ -"""Util functions for portfolio optimization and other RL related tasks""" +""" +Util functions for portfolio optimization and other RL related tasks, including the +classes for gathering experience in parallel. +""" +# pylint: disable=no-member +from typing import List +from multiprocessing import Process, Queue +import gym +import torch import numpy as np +from dagobert.modelling.rl import PPO, ActorCriticAgent + eps = np.finfo(float).eps +class ExperienceBuffer: + """ + Object holding all states, rewards, actions, logp vals, etc of a rollout session, + i.e. the phase of the training when we're collecting experience to train on later + using the current policy of the actor. + + This is designed to work both with a single worker (single process) or with + multiple workers collecting experience in parallel. + """ + + def __init__(self): + """Class constructor""" + # step vars + self.states = [] + self.past_pws = [] + self.actions = [] + self.advs = [] + self.qvals = [] + self.logps = [] + self.infos = [] + + # episode vars + self.ep_rewards = [] + self.ep_values = [] + self.done_episodes = 0 + self.epoch_rewards = 0 + + def append( + self, + state: torch.Tensor, + past_pw: torch.Tensor, + action: torch.Tensor, + logp: torch.Tensor, + reward: float, + value: torch.Tensor, + info: dict, + ): + """ + Appends the state (including portfolio value and weights), actions, logp, + reward to the buffer after a single step taken in the environment. + + Args: + state: State that went into the agent (i.e. both actor and critic). + past_pw: Past portfolio value and weights that went into the agent. + action: Agent's action to the state and past_pw. + logp: Log-probability of the action sampled from the actor's distribution. + reward: Reward obtained by the action. + value: Estimated (by critic) reward we should have got with this action. + info: Portfolio related information returned by the env after the step. + """ + # drop first batch dim so dataloader later can resample them for backprop + self.states.append([s.squeeze(0) for s in state]) + self.past_pws.append(past_pw.squeeze(0)) + self.actions.append(action) + self.logps.append(logp) + self.infos.append(info) + self.ep_rewards.append(reward) + self.ep_values.append(value.item()) + + def merge_buffers(self, buffers: List[ExperienceBuffer]): + """ + Merges the passed in ExperienceBuffers and overwrites the current state with it. + + Args: + buffers: List of smaller ExpereinceBuffers to merge together from parallel + processes. + """ + pass + + def yield_dataset( + self, + ) -> Tuple[ + List[torch.Tensor], + List[torch.Tensor], + List[torch.Tensor], + List[torch.Tensor], + List[torch.Tensor], + ]: + """ + Yields an iterable dataset for Pytorch Lightning from the contents of the + ExperienceBuffer. + + Yield: + Tuple of Lists containing tensors for states, actions, log probs, qvals and + advantage. + """ + data = zip( + self.batch_states, + self.batch_past_pw, + self.batch_actions, + self.batch_logp, + self.batch_qvals, + PPO.normalise_advantage(self.batch_adv), + ) + for state, past_pw, action, logp_old, qval, adv in data: + yield state, past_pw, action, logp_old, qval, adv + + def clear_buffer(self): + """Resets the ExperienceBuffer.""" + self.batch_states.clear() + self.batch_past_pw.clear() + self.batch_actions.clear() + self.batch_adv.clear() + self.batch_logp.clear() + self.batch_qvals.clear() + + +class ParallelExperiences: + def __init__(self): + self.exp_queue = Queue() + self.processes = [] + + def collect_experiences(self) -> List[ExperienceBuffer]: + """Returns the experiences from parallel workers. You need to wait for these.""" + exp_buffers = [] + # gather results from workers using the queue and merge them into one + for process in self.processes: + exp_buffers.append(self.exp_queue.get()) # will block + for process in self.processes: + process.join() + return exp_buffers + + def create_worker(self, *args): + """Creates a new worker, with the args passed in for `_gather_experience`.""" + process = Process(target=self.gather_experience, args=args) + self.processes.append(process) + process.start() + + def gather_experience( + self, + env: gym.Env, + agent: ActorCriticAgent, + device: torch.device, + max_steps: int, + max_episode_length: int, + asset_num: int, + gamma: float, + lam: float, + ): + """ + Workhorse function of the parallel experience gathering. This function can be + called as many times as many CPUs are available on the system, to collect the + desired number of steps and store them into an `ExperienceBuffer` that is then + passed back (via a `multiprocessing.Queue` object) to the main process that + spawned the parallel processes. + + Args: + env: An instance of the environment to act on. + agent: An instance of the PPO's `ActorCriticAgent`. + device: Device where the agent lives (GPU or CPU). + max_steps: Total number of steps (over multiple episodes) a worker can take. + max_episode_length: Maximum length of a trajectory / episode. + asset_num: Number of assets we are modelling (not including USD). + gamma: See docs of :func:`PPO.calc_advantage` + lam: See docs of :func:`PPO.calc_advantage` + + Returns: + Adds the results to `exp_queue` so it can be processed in the main process. + """ + buffer = ExperienceBuffer() + state = env.reset() + past_pw = PPO._init_past_pw(asset_num, device) + for step in range(max_steps): + # get action, make step, get reward and info from env + pi, action, actor_logits, logp, value = agent(state, past_pw, device) + next_state, reward, done, info = env.step(action.cpu().numpy()) + + # store everything and update state, past_pw + buffer.append(state, past_pw, action, logp, reward, value, info) + state = next_state + past_pw = PPO._update_past_pw(info["portfolio_value"], actor_logits, device) + + terminal = len(buffer.ep_rewards) == max_episode_length + if done or terminal: + buffer.qvals += PPO.discount_rewards(buffer.ep_rewards, gamma)[:-1] + buffer.advs += PPO.calc_advantage( + buffer.ep_rewards, buffer.ep_values, gamma, lam + ) + buffer.done_episodes += 1 + buffer.epoch_rewards += np.sum(buffer.ep_rewards) + + # episode over, reset the env and the buffer + buffer.ep_rewards = [] + buffer.ep_values = [] + state = env.reset() + past_pw = PPO._init_past_pw(asset_num, device) + + # add collected experience to the queue so it can be returned to master process + self.exp_queue.put(buffer) + + def sharpe_ratio(returns, freq: int = 30, rfr: int = 0): """ Given a set of returns, calculates naive (rfr=0) sharpe (eq 28). From d90a86b2683320fe38868dd7c8fe809a52bba74e Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Tue, 12 Jan 2021 14:37:20 +0000 Subject: [PATCH 23/62] optimizers --- .../modelling/augmentation/timegan.py | 107 +++++++++++++++++- 1 file changed, 101 insertions(+), 6 deletions(-) diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 94a2b076..76ad3c94 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -20,11 +20,11 @@ from pytorch_lightning import LightningModule +from dagobert.modelling.dl import AdaBelief + class RnnBlock(nn.Module): - """ - Generate time-series data in latent space. - """ + """""" def __init__( self, @@ -67,9 +67,7 @@ def forward(self, z): class Supervisor(nn.Module): - """ - Generate next sequence using the previous sequence. - """ + """""" def __init__( self, @@ -248,6 +246,7 @@ def __init__(self, hparams: Namespace): all_inputs = sum(num_inputs) # components of network + # Generate time-series data in latent space. self.generator = RnnBlock( input_size=self.hparams.z_dim, hidden_size=self.hparams.hidden_size, @@ -264,6 +263,7 @@ def __init__(self, hparams: Namespace): batch_first=True, rnn=self.hparams.rnn, ) + # Generate next sequence using the previous sequence. self.supervisor = RnnBlock( input_size=self.hparams.hidden_size, hidden_size=self.hparams.hidden_size, @@ -300,3 +300,98 @@ def __init__(self, hparams: Namespace): ) self.recovery_linear = nn.Linear(self.hparams.hidden_size, all_inputs) self.discriminator_linear = nn.Linear(self.hparams.hidden_size, 1) + + def forward(self, *x): + pass + + def configure_optimizers(self): + """ + Optimizer setup. list of optimizers accessed by idx in training step. + """ + optimizers = [] + param_pairs = [ + list(self.embedder.parameters()) + list(self.recovery.parameters()), + list(self.generator.parameters()) + list(self.supervisor.parameters()), + list(self.generator.parameters()) + list(self.supervisor.parameters()), + list(self.embedder.parameters()) + list(self.recovery.parameters()), + list(self.discriminator.parameters()), + ] + if "adam" in self.hparams.optimizer.lower(): + for param_pair in param_pairs: + optimizer = torch.optim.AdamW(param_pair, lr=self.hparams.lr) + optimizers.append(optimizer) + elif "adabelief" in self.hparams.optimizer.lower(): + for param_pair in param_pairs: + optimizer = AdaBelief(param_pair, lr=self.hparams.lr) + optimizers.append(optimizer) + return optimizers + + # ---------------------------------------------------------------------------------- + # SETUP FUNCTIONS + # ---------------------------------------------------------------------------------- + + def _setup_loss(self, loss_name): + """ + Different losses are used to direct updates in the components of TimeGAN. + """ + if loss_name == "BCEWithLogits": + return nn.BCEWithLogitsLoss() + elif self.hparams.regression: + return nn.MSELoss() + + else: + if self.hparams.no_class_weights: + if self.hparams.output_size > 1: + return nn.CrossEntropyLoss() + else: + return nn.BCEWithLogitsLoss() + else: + if self.hparams.output_size > 1: + return nn.CrossEntropyLoss(self._get_class_weights()) + else: + pos_weight = self._get_class_weights()[1] + return nn.BCEWithLogitsLoss(pos_weight=pos_weight) + + # ---------------------------------------------------------------------------------- + # CALCULATION + # ---------------------------------------------------------------------------------- + + def _calculate_loss(self, x, y_true): + """ + Calculates the appropriate loss, given the `classification` flag. + + Args: + x: A batch of X. + y_true: A batch of target. + + Returns: + Tuple of loss, y_true and y_pred. + """ + y_pred = self(*[xi.float() for xi in x]) + if self.hparams.output_size == 1 and self.hparams.last_y: + y_true = y_true.reshape(-1, 1).float() + elif self.hparams.output_size == 1 and not self.hparams.last_y: + y_true = y_true.float() + elif self.hparams.output_size == 3 and not self.hparams.regression: + # convert triple barrier method's -1/0/1 into 0, 1, 2 torch's cross-entropy + y_true = y_true.long() + 1 + if not self.hparams.last_y: + # we only keep the latest fraction of labels of the mini-series + keep = int(self.hparams.mini_series_length * self.hparams.non_last_y_frac) + keep_ix = self.hparams.mini_series_length - keep + y_true = y_true[:, keep_ix:] + if self.hparams.output_size == 1: + y_pred = y_pred[:, keep_ix:] + else: + y_pred = y_pred.transpose(1, 2)[:, :, keep_ix:] + loss = self.loss_f(y_pred, y_true) + + if self.hparams.mix_density_net: + # for mix density nets we need to estimate y_preds as a mixture of mus + y_pred = self.loss_f.get_mu_preds(y_pred) + elif not self.hparams.last_y: + # we only use the last timepoint's pred for plotting and metric calculation + # otherwise we often run out of memory at the end of the epoch + y_pred = y_pred[:, -1] + y_true = y_true[:, -1] + return loss, y_true, y_pred From 7630ab2376c9498e4e7a2f9de49dc44517bd1338 Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Tue, 12 Jan 2021 17:59:39 +0000 Subject: [PATCH 24/62] clean up some of mess, adding flow of training step --- .../modelling/augmentation/timegan.py | 214 ++++-------------- 1 file changed, 46 insertions(+), 168 deletions(-) diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 76ad3c94..1790bf9b 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -15,6 +15,7 @@ import torch import torch.nn as nn +import torch.optim as optim import torch.nn.functional as f from torch.nn.utils import weight_norm @@ -24,16 +25,21 @@ class RnnBlock(nn.Module): - """""" + """ + Class for creating 5 components of TimeGAN. + """ def __init__( self, input_size: int, hidden_size: int, num_layers: int, + linear_input_size: int, + linear_output_size: int, dropout: float = 0.2, batch_first: bool = True, rnn: str = "lstm", + linear_activation: bool = True, ): super(RnnBlock, self).__init__() @@ -55,165 +61,27 @@ def __init__( batch_first=batch_first, ) self.tanh = nn.Tanh() - # TODO: whats' size of output in latent space - self.linear = nn.Linear(hidden_size, hidden_size) - self.sigmoid = nn.Sigmoid() - - def forward(self, z): - # lstm_out = (batch_size, seq_len, hidden_size) - rnn_out, _hidden = self.rnn(z) - output = self.tanh(rnn_out) - return output - - -class Supervisor(nn.Module): - """""" - - def __init__( - self, - input_size, - hidden_size, - num_layers, - dropout, - batch_first=True, - ): - super(Supervisor, self).__init__() - - # input/output: (batch, seq, feature) - # TODO: hparams? - self.lstm = nn.LSTM( - input_size=input_size, - hidden_size=hidden_size, - num_layers=num_layers, - dropout=dropout, - batch_first=batch_first, - ) - self.tanh = nn.Tanh() - # TODO: whats' size of output in latent space - self.linear = nn.Linear(hidden_size, hidden_size) - self.sigmoid = nn.Sigmoid() - - def forward(self, z): - # lstm_out = (batch_size, seq_len, hidden_size) - lstm_out, _hidden = self.lstm(z) - lstm_out = self.tanh(lstm_out) - synthetic_series = self.sigmoid(self.linear(lstm_out)) - return synthetic_series - - -class Discriminator(nn.Module): - """ - Discriminate the original and synthetic time-series data - """ - - def __init__( - self, - input_size, - hidden_size, - num_layers, - dropout, - batch_first=True, - ): - super(Discriminator, self).__init__() - - # input/output: (batch, seq, feature) - # TODO: hparams? - self.lstm = nn.LSTM( - input_size=input_size, - hidden_size=hidden_size, - num_layers=num_layers, - dropout=dropout, - batch_first=batch_first, - ) - self.tanh = nn.Tanh() - # TODO: whats' size of output in latent space - self.linear = nn.Linear(hidden_size, 1) - - def forward(self, z): - # lstm_out = (batch_size, seq_len, hidden_size) - lstm_out, _hidden = self.lstm(z) - lstm_out = self.tanh(lstm_out) - synthetic_series = self.linear(lstm_out) - return synthetic_series - - -class Embedder(nn.Module): - """ - Embedding network between original feature space to latent space. - """ - - def __init__( - self, - input_size, - hidden_size, - num_layers, - dropout, - batch_first=True, - ): - super(Embedder, self).__init__() - - # input/output: (batch, seq, feature) - # TODO: hparams? - self.lstm = nn.LSTM( - input_size=input_size, - hidden_size=hidden_size, - num_layers=num_layers, - dropout=dropout, - batch_first=batch_first, - ) - self.tanh = nn.Tanh() - # TODO: whats' size of output in latent space - self.linear = nn.Linear(hidden_size, hidden_size) + self.linear = nn.Linear(linear_input_size, linear_output_size) self.sigmoid = nn.Sigmoid() + self.linear_activation = linear_activation def forward(self, x): - # lstm_out = (batch_size, seq_len, hidden_size) - lstm_out, _hidden = self.lstm(x) - lstm_out = self.tanh(lstm_out) - embedded_real = self.sigmoid(self.linear(lstm_out)) - return embedded_real - - -class Recovery(nn.Module): - """ - Recovery network from latent space to original space. - """ - - def __init__( - self, - input_size, - hidden_size, - num_layers, - dropout, - batch_first=True, - ): - super(Recovery, self).__init__() - - # input/output: (batch, seq, feature) - # TODO: hparams? - self.lstm = nn.LSTM( - input_size=input_size, - hidden_size=hidden_size, - num_layers=num_layers, - dropout=dropout, - batch_first=batch_first, - ) - self.tanh = nn.Tanh() - # TODO: output size is same as original number of features - self.linear = nn.Linear(hidden_size, input_size) - self.sigmoid = nn.Sigmoid() - - def forward(self, x): - # lstm_out = (batch_size, seq_len, hidden_size) - lstm_out, _hidden = self.lstm(x) - lstm_out = self.tanh(lstm_out) - X_tilde = self.sigmoid(self.linear(lstm_out)) - return X_tilde + rnn_out, _hidden = self.rnn(x) + rnn_out = self.tanh(rnn_out) + output = self.linear(rnn_out) + if self.linear_activation: + output = self.sigmoid(output) + return output class TimeGANLightning(LightningModule): """ - Lightning model made of RNN nets working together. + Lightning model made of 5 RNN nets working together: + - Embedding network between original feature space to latent space. + - Recovery network from latent space to original space. + - Generator function: generate time-series data in latent space. + - Discriminate the original and synthetic time-series data + - Supervisor generating next sequence using the previous sequence. """ # ---------------------------------------------------------------------------------- @@ -246,65 +114,75 @@ def __init__(self, hparams: Namespace): all_inputs = sum(num_inputs) # components of network - # Generate time-series data in latent space. self.generator = RnnBlock( input_size=self.hparams.z_dim, hidden_size=self.hparams.hidden_size, num_layers=self.hparams.num_layers, + linear_input_size=self.hparams.hidden_size, + linear_output_size=self.hparams.hidden_size, dropout=self.hparams.dropout, batch_first=True, rnn=self.hparams.rnn, + linear_activation=True, ) self.embedder = RnnBlock( input_size=all_inputs, hidden_size=self.hparams.hidden_size, num_layers=self.hparams.num_layers, + linear_input_size=self.hparams.hidden_size, + linear_output_size=self.hparams.hidden_size, dropout=self.hparams.dropout, batch_first=True, rnn=self.hparams.rnn, + linear_activation=True, ) # Generate next sequence using the previous sequence. self.supervisor = RnnBlock( input_size=self.hparams.hidden_size, hidden_size=self.hparams.hidden_size, num_layers=self.hparams.num_layers, + linear_input_size=self.hparams.hidden_size, + linear_output_size=self.hparams.hidden_size, dropout=self.hparams.dropout, batch_first=True, rnn=self.hparams.rnn, + linear_activation=True, ) self.recovery = RnnBlock( input_size=self.hparams.hidden_size, hidden_size=self.hparams.hidden_size, num_layers=self.hparams.num_layers, + linear_input_size=self.hparams.hidden_size, + linear_output_size=all_inputs, dropout=self.hparams.dropout, batch_first=True, rnn=self.hparams.rnn, + linear_activation=True, ) self.discriminator = RnnBlock( input_size=self.hparams.hidden_size, hidden_size=self.hparams.hidden_size, num_layers=self.hparams.num_layers, + linear_input_size=self.hparams.hidden_size, + linear_output_size=1, dropout=self.hparams.dropout, batch_first=True, rnn=self.hparams.rnn, + linear_activation=False, ) - # final linear layers - self.generator_linear = nn.Linear( - self.hparams.hidden_size, self.hparams.hidden_size - ) - self.embedder_linear = nn.Linear( - self.hparams.hidden_size, self.hparams.hidden_size - ) - self.supervisor_linear = nn.Linear( - self.hparams.hidden_size, self.hparams.hidden_size - ) - self.recovery_linear = nn.Linear(self.hparams.hidden_size, all_inputs) - self.discriminator_linear = nn.Linear(self.hparams.hidden_size, 1) - def forward(self, *x): + def training_step(self, batch, batch_idx, optimizer_idx): + real, _ = batch + # embedding + h = torch.tanh(self.embedder(real)) + h = torch.sigmoid(self.embedder_linear(h)) + # recover embedding + x_tilde = torch.tanh(self.recovery(h)) + x_tilde = torch.sigmoid(self.recovery_linear(x_tilde)) + pass - def configure_optimizers(self): + def configure_optimizers(self) -> List[optim.Optimizer]: """ Optimizer setup. list of optimizers accessed by idx in training step. """ From 1cb5a8e6219fd2b692fccabcbada232c9d01a767 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Wed, 13 Jan 2021 07:43:35 +0000 Subject: [PATCH 25/62] gave up on parallel ppo on windows, let's try it on aws/linux --- config/rl_config.yaml | 2 +- src/dagobert/modelling/rl/__init__.py | 1 - src/dagobert/modelling/rl/environment.py | 1 - src/dagobert/modelling/rl/ppo.py | 281 ++++++++++++++++++----- src/dagobert/modelling/rl/utils.py | 196 ---------------- 5 files changed, 229 insertions(+), 252 deletions(-) diff --git a/config/rl_config.yaml b/config/rl_config.yaml index e6488e7a..5f273987 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -17,7 +17,7 @@ auto_scale_batch_size: # -------------------------------------------------------------------------------------- log_dir: logs -num_workers: 4 +num_workers: 1 exp_name: RL-PPO-TCN tags: - RL_test diff --git a/src/dagobert/modelling/rl/__init__.py b/src/dagobert/modelling/rl/__init__.py index 72758a44..d4900664 100644 --- a/src/dagobert/modelling/rl/__init__.py +++ b/src/dagobert/modelling/rl/__init__.py @@ -1,4 +1,3 @@ from .environment import RLData, RLPortfolio, RLEnv from .networks import ActorCriticTCN, ActorCriticAgent, ActorContinous from .ppo import PPO -from .utils import ExperienceBuffer, ParallelExperiences diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index a7074f91..11efe152 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -13,7 +13,6 @@ from dagobert.naming import NPreprocessingArgs as npa from dagobert.modelling.dl import PortfolioCryptoDataset -from dagobert.modelling.rl.utils import sharpe_ratio, max_drawdown logger = logging.getLogger(__name__) diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index e90241e9..6b5c187d 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -9,7 +9,6 @@ from typing import List, Tuple from argparse import Namespace - import gym import torch import numpy as np @@ -21,15 +20,12 @@ from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.trainer import seed_everything - from dagobert.naming import NRL, NStudy, NPreprocessingArgs as npa from dagobert.modelling.rl import ( RLEnv, ActorCriticTCN, ActorContinous, ActorCriticAgent, - ParallelExperiences, - ExperienceBuffer, ) from dagobert.modelling.dl import ( ExperienceSourceDataset, @@ -39,6 +35,7 @@ logger = logging.getLogger(__name__) +mp = torch.multiprocessing.get_context("spawn") def run_rl(args): @@ -111,14 +108,16 @@ def __init__(self, hparams: Namespace): self.hparams = TCNLightning._check_mini_series_lookback(hparams) # create env, policy/value networks and experience buffer + tracking vars - self.env = RLEnv(self.hparams) - n_actions = self.envs.action_space.shape[0] + self.envs = [RLEnv(self.hparams) for _ in range(self.hparams.num_workers)] + n_actions = self.envs[0].action_space.shape[0] self.critic = ActorCriticTCN( self.hparams, n_actions=n_actions, output_size=1, actor=False ) + self.critic.share_memory() self.actor = ActorContinous( ActorCriticTCN(self.hparams, n_actions=n_actions, output_size=n_actions) ) + self.actor.actor_net.share_memory() self.agent = ActorCriticAgent(self.actor, self.critic) self.buffer = ExperienceBuffer() self.avg_ep_reward = 0 @@ -152,15 +151,18 @@ def generate_experience_buffer( `num_assets` in the portfolio is large. Yield: - Tuple of Lists containing tensors for states, actions, log probs, qvals and - advantage. + Tuple of Lists containing tensors for states, actions, log probs, qvals + and advantage. """ # setup workers and pass them the env, agent, vars to work with max_worker_steps = int(self.hparams.steps_per_epoch / self.hparams.num_workers) parallel_experiences = ParallelExperiences() + from IPython import embed + + embed() for i in range(self.hparams.num_workers): args = ( - deepcopy(self.env), + self.envs[i], self.agent, self.device, max_worker_steps, @@ -169,7 +171,7 @@ def generate_experience_buffer( self.hparams.gamma, self.hparams.lam, ) - parallel_experiences.create_worker(args) + parallel_experiences.create_worker(*args) # collect experiences in parallel, then merge them and create dataset self.buffer.merge_buffers(parallel_experiences.collect_experiences()) @@ -208,7 +210,6 @@ def calc_advantage( values: List[float], gamma: float = 0.99, lam: float = 0.95, - norm: bool = True, ) -> List[float]: """ Calculate the advantage given rewards, state values, and last value of episode. @@ -220,7 +221,6 @@ def calc_advantage( values: list of state values from critic gamma: Gamma for discounting the long-term rewards. lam: Lambda for the GAE advantage calculation. - norm: If True, the advantages are normalised to mean=0, std=1. Returns: List of advantages. @@ -231,10 +231,7 @@ def calc_advantage( for i in range(len(rewards) - 1) ] adv = PPO.discount_rewards(delta, gamma * lam) - if norm: - return PPO.normalise_advantage(adv) - else: - return adv + return adv @staticmethod def normalise_advantage(batch_adv: List[float]) -> List[float]: @@ -321,51 +318,21 @@ def training_step( loss """ state, past_pw, action, old_logp, qval, adv = batch - self.log( - "avg_ep_len", self.avg_ep_len, prog_bar=True, on_step=False, on_epoch=True - ) - self.log( - "avg_ep_reward", - self.avg_ep_reward, - prog_bar=True, - on_step=False, - on_epoch=True, - ) - self.log( - "avg_reward", self.avg_reward, prog_bar=True, on_step=False, on_epoch=True - ) + adv = PPO.normalise_advantage(adv) + self.log("avg_ep_len", self.avg_ep_len, on_step=False, on_epoch=True) + self.log("avg_ep_reward", self.avg_ep_reward, on_step=False, on_epoch=True) + self.log("avg_reward", self.avg_reward, on_step=False, on_epoch=True) if optimizer_idx == 0: loss_actor, approx_kl = self.actor_loss( state, past_pw, action, old_logp, adv ) - self.log( - "loss_actor", - loss_actor, - on_step=False, - on_epoch=True, - prog_bar=True, - logger=True, - ) - self.log( - "approx_kl", - approx_kl, - on_step=False, - on_epoch=True, - prog_bar=True, - logger=True, - ) + self.log("loss_actor", loss_actor, on_epoch=True, on_step=False) + self.log("approx_kl", approx_kl, on_epoch=True, on_step=False) return loss_actor elif optimizer_idx == 1: loss_critic = self.critic_loss(state, past_pw, qval) - self.log( - "loss_critic", - loss_critic, - on_step=False, - on_epoch=True, - prog_bar=True, - logger=True, - ) + self.log("loss_critic", loss_critic, on_epoch=True, on_step=False) return loss_critic @staticmethod @@ -382,3 +349,211 @@ def _pre_sanity_check(hparams: Namespace): hparams.cols_to_model[npa.anchor] ) return hparams + + +# -------------------------------------------------------------------------------------- +# HELPER CLASSES FOR PARALLEL EXPERIENCE COLLECTION +# +# Moving this to another module would result in circular dependencies. Been there, +# done that, it was painful, so let's just leave these here. +# -------------------------------------------------------------------------------------- + + +class ExperienceBuffer: + """ + Object holding all states, rewards, actions, logp vals, etc of a rollout session, + i.e. the phase of the training when we're collecting experience to train on later + using the current policy of the actor. + + This is designed to work both with a single worker (single process) or with + multiple workers collecting experience in parallel. + """ + + def __init__(self): + """Class constructor""" + # step vars + self.states = [] + self.past_pws = [] + self.actions = [] + self.advs = [] + self.qvals = [] + self.logps = [] + self.infos = [] + + # episode vars + self.ep_rewards = [] + self.ep_values = [] + self.done_episodes = 0 + self.epoch_rewards = 0 + + def append( + self, + state: torch.Tensor, + past_pw: torch.Tensor, + action: torch.Tensor, + logp: torch.Tensor, + reward: float, + value: torch.Tensor, + info: dict, + ): + """ + Appends the state (including portfolio value and weights), actions, logp, + reward to the buffer after a single step taken in the environment. + + Args: + state: State that went into the agent (i.e. both actor and critic). + past_pw: Past portfolio value and weights that went into the agent. + action: Agent's action to the state and past_pw. + logp: Log-probability of the action sampled from the actor's distribution. + reward: Reward obtained by the action. + value: Estimated (by critic) reward we should have got with this action. + info: Portfolio related information returned by the env after the step. + """ + # drop first batch dim so dataloader later can resample them for backprop + self.states.append([s.squeeze(0) for s in state]) + self.past_pws.append(past_pw.squeeze(0)) + self.actions.append(action) + self.logps.append(logp) + self.infos.append(info) + self.ep_rewards.append(reward) + self.ep_values.append(value.item()) + + def merge_buffers(self, buffers): + """ + Merges the passed in ExperienceBuffers and overwrites the current state with it. + + Args: + buffers: List of smaller ExpereinceBuffers to merge together from parallel + processes. + """ + pass + + def yield_dataset( + self, + ) -> Tuple[ + List[torch.Tensor], + List[torch.Tensor], + List[torch.Tensor], + List[torch.Tensor], + List[torch.Tensor], + ]: + """ + Yields an iterable dataset for Pytorch Lightning from the contents of the + ExperienceBuffer. + + Yield: + Tuple of Lists containing tensors for states, actions, log probs, qvals and + advantage. + """ + data = zip( + self.states, + self.past_pws, + self.actions, + self.logps, + self.qvals, + self.advs, + ) + for state, past_pw, action, logp_old, qval, adv in data: + yield state, past_pw, action, logp_old, qval, adv + + def clear_buffer(self): + """Resets the ExperienceBuffer.""" + self.states.clear() + self.past_pws.clear() + self.actions.clear() + self.advs.clear() + self.logps.clear() + self.qvals.clear() + self.ep_rewards.clear() + self.ep_values.clear() + self.done_episodes = 0 + self.epoch_rewards = 0 + + +class ParallelExperiences: + """ + Parallelised experience gathering, idea from https://stackoverflow.com/a/45829852 + """ + + def __init__(self): + """Class constructor.""" + self.exp_queue = mp.Queue() + self.processes = [] + + def collect_experiences(self) -> List[ExperienceBuffer]: + """Returns the experiences from parallel workers. You need to wait for these.""" + buffers = [] + # gather results from workers using the queue and merge them into one + for process in self.processes: + buffers.append(self.exp_queue.get()) # will block + for process in self.processes: + process.join() + return buffers + + def create_worker(self, *args): + """Creates a new worker, with the args passed in for `_gather_experience`.""" + process = mp.Process(target=self.gather_experience, args=args) + self.processes.append(process) + process.start() + + def gather_experience( + self, + env: gym.Env, + agent: ActorCriticAgent, + device: torch.device, + max_steps: int, + max_episode_length: int, + asset_num: int, + gamma: float, + lam: float, + ): + """ + Workhorse function of the parallel experience gathering. This function can be + called as many times as many CPUs are available on the system, to collect the + desired number of steps and store them into an `ExperienceBuffer` that is then + passed back (via a `multiprocessing.Queue` object) to the main process that + spawned the parallel processes. + + Args: + env: An instance of the environment to act on. + agent: An instance of the PPO's `ActorCriticAgent`. + device: Device where the agent lives (GPU or CPU). + max_steps: Total number of steps (over multiple episodes) a worker can take. + max_episode_length: Maximum length of a trajectory / episode. + asset_num: Number of assets we are modelling (not including USD). + gamma: See docs of :func:`PPO.calc_advantage` + lam: See docs of :func:`PPO.calc_advantage` + + Returns: + Adds the results to `exp_queue` so it can be processed in the main process. + """ + buffer = ExperienceBuffer() + state = env.reset() + past_pw = PPO._init_past_pw(asset_num, device) + for step in range(max_steps): + # get action, make step, get reward and info from env + pi, action, actor_logits, logp, value = agent(state, past_pw, device) + next_state, reward, done, info = env.step(action.cpu().numpy()) + + # store everything and update state, past_pw + buffer.append(state, past_pw, action, logp, reward, value, info) + state = next_state + past_pw = PPO._update_past_pw(info["portfolio_value"], actor_logits, device) + + terminal = len(buffer.ep_rewards) == max_episode_length + if done or terminal: + buffer.qvals += PPO.discount_rewards(buffer.ep_rewards, gamma)[:-1] + buffer.advs += PPO.calc_advantage( + buffer.ep_rewards, buffer.ep_values, gamma, lam + ) + buffer.done_episodes += 1 + buffer.epoch_rewards += np.sum(buffer.ep_rewards) + + # episode over, reset the env and the buffer + buffer.ep_rewards = [] + buffer.ep_values = [] + state = env.reset() + past_pw = PPO._init_past_pw(asset_num, device) + + # add collected experience to the queue so it can be returned to master process + self.exp_queue.put(buffer) diff --git a/src/dagobert/modelling/rl/utils.py b/src/dagobert/modelling/rl/utils.py index 25520783..fad0f6c7 100644 --- a/src/dagobert/modelling/rl/utils.py +++ b/src/dagobert/modelling/rl/utils.py @@ -3,209 +3,13 @@ classes for gathering experience in parallel. """ # pylint: disable=no-member -from typing import List -from multiprocessing import Process, Queue -import gym -import torch import numpy as np -from dagobert.modelling.rl import PPO, ActorCriticAgent eps = np.finfo(float).eps -class ExperienceBuffer: - """ - Object holding all states, rewards, actions, logp vals, etc of a rollout session, - i.e. the phase of the training when we're collecting experience to train on later - using the current policy of the actor. - - This is designed to work both with a single worker (single process) or with - multiple workers collecting experience in parallel. - """ - - def __init__(self): - """Class constructor""" - # step vars - self.states = [] - self.past_pws = [] - self.actions = [] - self.advs = [] - self.qvals = [] - self.logps = [] - self.infos = [] - - # episode vars - self.ep_rewards = [] - self.ep_values = [] - self.done_episodes = 0 - self.epoch_rewards = 0 - - def append( - self, - state: torch.Tensor, - past_pw: torch.Tensor, - action: torch.Tensor, - logp: torch.Tensor, - reward: float, - value: torch.Tensor, - info: dict, - ): - """ - Appends the state (including portfolio value and weights), actions, logp, - reward to the buffer after a single step taken in the environment. - - Args: - state: State that went into the agent (i.e. both actor and critic). - past_pw: Past portfolio value and weights that went into the agent. - action: Agent's action to the state and past_pw. - logp: Log-probability of the action sampled from the actor's distribution. - reward: Reward obtained by the action. - value: Estimated (by critic) reward we should have got with this action. - info: Portfolio related information returned by the env after the step. - """ - # drop first batch dim so dataloader later can resample them for backprop - self.states.append([s.squeeze(0) for s in state]) - self.past_pws.append(past_pw.squeeze(0)) - self.actions.append(action) - self.logps.append(logp) - self.infos.append(info) - self.ep_rewards.append(reward) - self.ep_values.append(value.item()) - - def merge_buffers(self, buffers: List[ExperienceBuffer]): - """ - Merges the passed in ExperienceBuffers and overwrites the current state with it. - - Args: - buffers: List of smaller ExpereinceBuffers to merge together from parallel - processes. - """ - pass - - def yield_dataset( - self, - ) -> Tuple[ - List[torch.Tensor], - List[torch.Tensor], - List[torch.Tensor], - List[torch.Tensor], - List[torch.Tensor], - ]: - """ - Yields an iterable dataset for Pytorch Lightning from the contents of the - ExperienceBuffer. - - Yield: - Tuple of Lists containing tensors for states, actions, log probs, qvals and - advantage. - """ - data = zip( - self.batch_states, - self.batch_past_pw, - self.batch_actions, - self.batch_logp, - self.batch_qvals, - PPO.normalise_advantage(self.batch_adv), - ) - for state, past_pw, action, logp_old, qval, adv in data: - yield state, past_pw, action, logp_old, qval, adv - - def clear_buffer(self): - """Resets the ExperienceBuffer.""" - self.batch_states.clear() - self.batch_past_pw.clear() - self.batch_actions.clear() - self.batch_adv.clear() - self.batch_logp.clear() - self.batch_qvals.clear() - - -class ParallelExperiences: - def __init__(self): - self.exp_queue = Queue() - self.processes = [] - - def collect_experiences(self) -> List[ExperienceBuffer]: - """Returns the experiences from parallel workers. You need to wait for these.""" - exp_buffers = [] - # gather results from workers using the queue and merge them into one - for process in self.processes: - exp_buffers.append(self.exp_queue.get()) # will block - for process in self.processes: - process.join() - return exp_buffers - - def create_worker(self, *args): - """Creates a new worker, with the args passed in for `_gather_experience`.""" - process = Process(target=self.gather_experience, args=args) - self.processes.append(process) - process.start() - - def gather_experience( - self, - env: gym.Env, - agent: ActorCriticAgent, - device: torch.device, - max_steps: int, - max_episode_length: int, - asset_num: int, - gamma: float, - lam: float, - ): - """ - Workhorse function of the parallel experience gathering. This function can be - called as many times as many CPUs are available on the system, to collect the - desired number of steps and store them into an `ExperienceBuffer` that is then - passed back (via a `multiprocessing.Queue` object) to the main process that - spawned the parallel processes. - - Args: - env: An instance of the environment to act on. - agent: An instance of the PPO's `ActorCriticAgent`. - device: Device where the agent lives (GPU or CPU). - max_steps: Total number of steps (over multiple episodes) a worker can take. - max_episode_length: Maximum length of a trajectory / episode. - asset_num: Number of assets we are modelling (not including USD). - gamma: See docs of :func:`PPO.calc_advantage` - lam: See docs of :func:`PPO.calc_advantage` - - Returns: - Adds the results to `exp_queue` so it can be processed in the main process. - """ - buffer = ExperienceBuffer() - state = env.reset() - past_pw = PPO._init_past_pw(asset_num, device) - for step in range(max_steps): - # get action, make step, get reward and info from env - pi, action, actor_logits, logp, value = agent(state, past_pw, device) - next_state, reward, done, info = env.step(action.cpu().numpy()) - - # store everything and update state, past_pw - buffer.append(state, past_pw, action, logp, reward, value, info) - state = next_state - past_pw = PPO._update_past_pw(info["portfolio_value"], actor_logits, device) - - terminal = len(buffer.ep_rewards) == max_episode_length - if done or terminal: - buffer.qvals += PPO.discount_rewards(buffer.ep_rewards, gamma)[:-1] - buffer.advs += PPO.calc_advantage( - buffer.ep_rewards, buffer.ep_values, gamma, lam - ) - buffer.done_episodes += 1 - buffer.epoch_rewards += np.sum(buffer.ep_rewards) - - # episode over, reset the env and the buffer - buffer.ep_rewards = [] - buffer.ep_values = [] - state = env.reset() - past_pw = PPO._init_past_pw(asset_num, device) - - # add collected experience to the queue so it can be returned to master process - self.exp_queue.put(buffer) - - def sharpe_ratio(returns, freq: int = 30, rfr: int = 0): """ Given a set of returns, calculates naive (rfr=0) sharpe (eq 28). From 0dc2b310a4393c1ef00319a34cd8310a86275489 Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Wed, 13 Jan 2021 20:37:44 +0000 Subject: [PATCH 26/62] gotta commit --- config/timegan_config.yaml | 3 + .../modelling/augmentation/__init__.py | 1 + .../modelling/augmentation/timegan.py | 156 +++++++++++++++++- src/dagobert/modelling/augmentation/utils.py | 21 +++ 4 files changed, 173 insertions(+), 8 deletions(-) create mode 100644 src/dagobert/modelling/augmentation/utils.py diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index e1f8d377..cd65fefd 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -26,6 +26,8 @@ batch_size: 256 # gru or lstm rnn: lstm +# embedding weight in cost of generator loss +emb_weight: 1 # -------------------------------------------------------------------------------------- # MODEL @@ -35,6 +37,7 @@ dropout: 0.2 num_layers: 2 hidden_size: 50 z_dim: 50 +mini_series_length: 240 # -------------------------------------------------------------------------------------- # DATA diff --git a/src/dagobert/modelling/augmentation/__init__.py b/src/dagobert/modelling/augmentation/__init__.py index a4400910..dbab2837 100644 --- a/src/dagobert/modelling/augmentation/__init__.py +++ b/src/dagobert/modelling/augmentation/__init__.py @@ -1 +1,2 @@ from .augmentation import augment +from .timegan import RnnBlock, TimeGANLightning diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 1790bf9b..31d0f532 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -22,6 +22,7 @@ from pytorch_lightning import LightningModule from dagobert.modelling.dl import AdaBelief +from dagobert.modelling.augmentation.utils import get_noise class RnnBlock(nn.Module): @@ -172,15 +173,84 @@ def __init__(self, hparams: Namespace): ) def training_step(self, batch, batch_idx, optimizer_idx): - real, _ = batch + """ + Carries out updates to networks from a batch of real samples. + Args: + batch: batch of + batch_idx: + optimizer_idx: idx that controls optimizing the 5 networks + + Returns: + Loss + """ + x, label = batch + batch_len = len(x) + z = get_noise( + batch_len, + self.hparams.mini_series_length, + self.hparams.z_dim, + device=self.tgan_device, + ) + # embedding - h = torch.tanh(self.embedder(real)) - h = torch.sigmoid(self.embedder_linear(h)) - # recover embedding - x_tilde = torch.tanh(self.recovery(h)) - x_tilde = torch.sigmoid(self.recovery_linear(x_tilde)) + h = self.embedder(x) + x_tilde = self.recovery(h) + if optimizer_idx == 0: + # recover embedding + embed_loss0 = TimeGANLightning.embed_loss0( + x_tilde, + x, + optimizer_idx, + ) + self.log( + "embed_loss0", + embed_loss0, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) + return embed_loss0 - pass + # generator + e_hat = self.generator(z) + h_hat = self.supervisor(e_hat) + h_hat_supervise = self.supervisor(h) + + if optimizer_idx == 1: + # supervisor and generator + gen_sup_loss = TimeGANLightning.gen_loss_sup( + h_hat_supervise, + h, + optimizer_idx, + ) + self.log( + "gen_sup_loss", + gen_sup_loss, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) + return gen_sup_loss + + # synthetic data + x_hat = self.recovery(h_hat) + + if optimizer_idx in [2, 3]: + with torch.no_grad(): + y_fake = self.discriminator(h_hat) + y_fake_e = self.discriminator(e_hat) + gen_loss = TimeGANLightning.generator_loss( + y_fake, + y_fake_e, + h, + h_hat_supervise, + x, + x_hat, + self.hparams.emb_weight, + ) + return gen_loss def configure_optimizers(self) -> List[optim.Optimizer]: """ @@ -234,7 +304,7 @@ def _setup_loss(self, loss_name): # CALCULATION # ---------------------------------------------------------------------------------- - def _calculate_loss(self, x, y_true): + def _calculate_loss2(self, x, y_true): """ Calculates the appropriate loss, given the `classification` flag. @@ -273,3 +343,73 @@ def _calculate_loss(self, x, y_true): y_pred = y_pred[:, -1] y_true = y_true[:, -1] return loss, y_true, y_pred + + @staticmethod + def embed_loss0(x_tilde, x, optimizer_idx): + """ + + Args: + x_tilde: + x: + optimizer_idx: + + Returns: + + """ + if optimizer_idx == 0: + e_loss_t0 = nn.MSELoss()(x_tilde, x) + e_loss0 = 10 * torch.sqrt(e_loss_t0) + return e_loss0 + + @staticmethod + def gen_loss_sup(h_hat_supervise, h, optimizer_idx): + """ + + Args: + h_hat_supervise: + h: + optimizer_idx: + + Returns: + + """ + if optimizer_idx == 1: + gen_sup_loss = nn.MSELoss()(h_hat_supervise[:, 1:, :], h[:, 1:, :]) + return gen_sup_loss + + @staticmethod + def generator_loss( + y_fake, + y_fake_e, + h, + h_hat_supervise, + x, + x_hat, + emb_weight, + ): + """ + + Args: + y_fake: + y_fake_e: + h: + h_hat_supervise: + x: + x_hat: + emb_weight: + optimizer_idx: + + Returns: + + """ + + # adversarial + g_loss_u = nn.BCELoss()(y_fake, torch.ones_like(y_fake)) + g_loss_u_e = nn.BCELoss()(y_fake_e, torch.ones_like(y_fake_e)) + # supervisor + g_loss_s = nn.MSELoss()(h_hat_supervise[:, 1:, :], h[:, 1:, :]) + # 2 moments + d = torch.sqrt(torch.var(x_hat, 0) + 1e-6) - torch.sqrt(torch.var(x, 0) + 1e-6) + g_loss_v1 = torch.mean(torch.abs(d)) + g_loss_v2 = torch.mean(torch.abs(torch.mean(x_hat, 0) - torch.mean(x, 0))) + g_loss_v = g_loss_v1 + g_loss_v2 diff --git a/src/dagobert/modelling/augmentation/utils.py b/src/dagobert/modelling/augmentation/utils.py new file mode 100644 index 00000000..72e162eb --- /dev/null +++ b/src/dagobert/modelling/augmentation/utils.py @@ -0,0 +1,21 @@ +"""Util functions for TimeGAN and other augmentation related tasks""" + +import torch + + +def get_noise(n_samples: int, mini_series_length: int, z_dim: int, device: str = "cpu"): + """ + Function for creating noise vectors given the dimensions (n_samples, + mini_series_length, z_dim). Research shows that it is not hyperimportant which + distribution is the noise from, here we'll use uniform + + Args: + n_samples: the number of samples to generate + mini_series_length: length of series + z_dim: dimension for generator input at given time point + device: the device type + + Returns: + Tensor of filled with random numbers from uniform distribution. + """ + return torch.rand(n_samples, mini_series_length, z_dim, device=device) From 4e7e0a89fa9cf035ed1cd3fe036fd388975d13b4 Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Thu, 14 Jan 2021 17:11:56 +0000 Subject: [PATCH 27/62] on track --- .../modelling/augmentation/timegan.py | 170 ++++++++++-------- 1 file changed, 92 insertions(+), 78 deletions(-) diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 31d0f532..71d9e8d9 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -200,7 +200,6 @@ def training_step(self, batch, batch_idx, optimizer_idx): embed_loss0 = TimeGANLightning.embed_loss0( x_tilde, x, - optimizer_idx, ) self.log( "embed_loss0", @@ -222,7 +221,6 @@ def training_step(self, batch, batch_idx, optimizer_idx): gen_sup_loss = TimeGANLightning.gen_loss_sup( h_hat_supervise, h, - optimizer_idx, ) self.log( "gen_sup_loss", @@ -237,7 +235,9 @@ def training_step(self, batch, batch_idx, optimizer_idx): # synthetic data x_hat = self.recovery(h_hat) - if optimizer_idx in [2, 3]: + # TODO: If you need to control how often those optimizers step or override + # the default .step() schedule, override the optimizer_step() hook. + if optimizer_idx == 2: with torch.no_grad(): y_fake = self.discriminator(h_hat) y_fake_e = self.discriminator(e_hat) @@ -250,8 +250,50 @@ def training_step(self, batch, batch_idx, optimizer_idx): x_hat, self.hparams.emb_weight, ) + self.log( + "gen_loss", + gen_loss, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) return gen_loss + if optimizer_idx == 3: + embed_loss = TimeGANLightning.embed_loss( + x_tilde, + x, + h_hat_supervise, + h, + ) + self.log( + "embed_loss", + embed_loss, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) + return embed_loss + + if optimizer_idx == 4: + # y_fake = + # y_fake_e = + # y_real = + disc_loss = TimeGANLightning.discriminator_loss( + y_fake, y_fake_e, y_real, self.hparams.emb_weight + ) + self.log( + "disc_loss", + disc_loss, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) + return disc_loss + def configure_optimizers(self) -> List[optim.Optimizer]: """ Optimizer setup. list of optimizers accessed by idx in training step. @@ -278,104 +320,38 @@ def configure_optimizers(self) -> List[optim.Optimizer]: # SETUP FUNCTIONS # ---------------------------------------------------------------------------------- - def _setup_loss(self, loss_name): - """ - Different losses are used to direct updates in the components of TimeGAN. - """ - if loss_name == "BCEWithLogits": - return nn.BCEWithLogitsLoss() - elif self.hparams.regression: - return nn.MSELoss() - - else: - if self.hparams.no_class_weights: - if self.hparams.output_size > 1: - return nn.CrossEntropyLoss() - else: - return nn.BCEWithLogitsLoss() - else: - if self.hparams.output_size > 1: - return nn.CrossEntropyLoss(self._get_class_weights()) - else: - pos_weight = self._get_class_weights()[1] - return nn.BCEWithLogitsLoss(pos_weight=pos_weight) - # ---------------------------------------------------------------------------------- # CALCULATION # ---------------------------------------------------------------------------------- - def _calculate_loss2(self, x, y_true): - """ - Calculates the appropriate loss, given the `classification` flag. - - Args: - x: A batch of X. - y_true: A batch of target. - - Returns: - Tuple of loss, y_true and y_pred. - """ - y_pred = self(*[xi.float() for xi in x]) - if self.hparams.output_size == 1 and self.hparams.last_y: - y_true = y_true.reshape(-1, 1).float() - elif self.hparams.output_size == 1 and not self.hparams.last_y: - y_true = y_true.float() - elif self.hparams.output_size == 3 and not self.hparams.regression: - # convert triple barrier method's -1/0/1 into 0, 1, 2 torch's cross-entropy - y_true = y_true.long() + 1 - if not self.hparams.last_y: - # we only keep the latest fraction of labels of the mini-series - keep = int(self.hparams.mini_series_length * self.hparams.non_last_y_frac) - keep_ix = self.hparams.mini_series_length - keep - y_true = y_true[:, keep_ix:] - if self.hparams.output_size == 1: - y_pred = y_pred[:, keep_ix:] - else: - y_pred = y_pred.transpose(1, 2)[:, :, keep_ix:] - loss = self.loss_f(y_pred, y_true) - - if self.hparams.mix_density_net: - # for mix density nets we need to estimate y_preds as a mixture of mus - y_pred = self.loss_f.get_mu_preds(y_pred) - elif not self.hparams.last_y: - # we only use the last timepoint's pred for plotting and metric calculation - # otherwise we often run out of memory at the end of the epoch - y_pred = y_pred[:, -1] - y_true = y_true[:, -1] - return loss, y_true, y_pred - @staticmethod - def embed_loss0(x_tilde, x, optimizer_idx): + def embed_loss0(x_tilde, x): """ Args: x_tilde: x: - optimizer_idx: Returns: """ - if optimizer_idx == 0: - e_loss_t0 = nn.MSELoss()(x_tilde, x) - e_loss0 = 10 * torch.sqrt(e_loss_t0) - return e_loss0 + e_loss_t0 = nn.MSELoss()(x_tilde, x) + e_loss0 = 10 * torch.sqrt(e_loss_t0) + return e_loss0 @staticmethod - def gen_loss_sup(h_hat_supervise, h, optimizer_idx): + def gen_loss_sup(h_hat_supervise, h): """ Args: h_hat_supervise: h: - optimizer_idx: Returns: """ - if optimizer_idx == 1: - gen_sup_loss = nn.MSELoss()(h_hat_supervise[:, 1:, :], h[:, 1:, :]) - return gen_sup_loss + gen_sup_loss = nn.MSELoss()(h_hat_supervise[:, 1:, :], h[:, 1:, :]) + return gen_sup_loss @staticmethod def generator_loss( @@ -397,15 +373,14 @@ def generator_loss( x: x_hat: emb_weight: - optimizer_idx: Returns: """ - # adversarial g_loss_u = nn.BCELoss()(y_fake, torch.ones_like(y_fake)) g_loss_u_e = nn.BCELoss()(y_fake_e, torch.ones_like(y_fake_e)) + w_g_loss_u_e = emb_weight * g_loss_u_e # supervisor g_loss_s = nn.MSELoss()(h_hat_supervise[:, 1:, :], h[:, 1:, :]) # 2 moments @@ -413,3 +388,42 @@ def generator_loss( g_loss_v1 = torch.mean(torch.abs(d)) g_loss_v2 = torch.mean(torch.abs(torch.mean(x_hat, 0) - torch.mean(x, 0))) g_loss_v = g_loss_v1 + g_loss_v2 + # sum + g_loss = g_loss_u + w_g_loss_u_e + 100 * torch.sqrt(g_loss_s) + 100 * g_loss_v + return g_loss + + @staticmethod + def embed_loss(x_tilde, x, h_hat_supervise, h): + """ + + Args: + x_tilde: + x: + h_hat_supervise: + h: + + Returns: + + """ + e_loss_t0 = nn.MSELoss()(x_tilde, x) + e_loss0 = 10 * torch.sqrt(e_loss_t0) + e_loss = e_loss0 + 0.1 * nn.MSELoss()(h_hat_supervise[:, 1:, :], h[:, 1:, :]) + return e_loss + + @staticmethod + def discriminator_loss(y_fake, y_fake_e, y_real, emb_weight): + """ + + Args: + y_fake: + y_fake_e: + y_real: + emb_weight: + + Returns: + + """ + d_loss_fake_e = nn.BCELoss()(y_fake_e, torch.zeros_like(y_fake_e)) + d_loss_fake = nn.BCELoss()(y_fake, torch.zeros_like(y_fake)) + d_loss_real = nn.BCELoss()(y_real, torch.ones_like(y_real)) + return emb_weight * d_loss_fake_e + d_loss_fake + d_loss_real From 2ee9d54fc82a81956330920f36db94c1a19feb71 Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Thu, 14 Jan 2021 17:23:52 +0000 Subject: [PATCH 28/62] disc --- src/dagobert/modelling/augmentation/timegan.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 71d9e8d9..7ee32983 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -278,9 +278,13 @@ def training_step(self, batch, batch_idx, optimizer_idx): return embed_loss if optimizer_idx == 4: - # y_fake = - # y_fake_e = - # y_real = + e_hat = self.generator() + h_hat = self.supervisor(e_hat) + + y_fake = self.discriminator(h_hat.detach()) + y_fake_e = self.discriminator(e_hat.detach()) + y_real = self.discriminator(h.detach()) + disc_loss = TimeGANLightning.discriminator_loss( y_fake, y_fake_e, y_real, self.hparams.emb_weight ) From f528469a0c13d64f76a3e84d1b6a10bb79699149 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Fri, 15 Jan 2021 09:50:01 +0000 Subject: [PATCH 29/62] got parallel experience gathering to a point, but now I simpy can't make it work unless I reengineer everything, see https://github.com/danielhomola/dagobert/issues/65 --- config/rl_config.yaml | 5 ++-- src/dagobert/modelling/dl/tcn_args.py | 13 +++++++++ src/dagobert/modelling/dl/tcn_net.py | 42 +++++++++++++++------------ src/dagobert/modelling/rl/networks.py | 3 +- src/dagobert/modelling/rl/ppo.py | 8 +++-- 5 files changed, 46 insertions(+), 25 deletions(-) diff --git a/config/rl_config.yaml b/config/rl_config.yaml index 5f273987..cce7b3d7 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -4,7 +4,7 @@ # LIGHTNING # -------------------------------------------------------------------------------------- -gpus: 1 +gpus: 0 pin_memory: True profiler: True #val_check_interval: 0.5 @@ -17,7 +17,7 @@ auto_scale_batch_size: # -------------------------------------------------------------------------------------- log_dir: logs -num_workers: 1 +num_workers: 4 exp_name: RL-PPO-TCN tags: - RL_test @@ -49,6 +49,7 @@ target_col: rl_return to_label: False no_sample_weights: True binariser_method: +no_weight_norm: True # -------------------------------------------------------------------------------------- # MODEL diff --git a/src/dagobert/modelling/dl/tcn_args.py b/src/dagobert/modelling/dl/tcn_args.py index d08b0a0b..21df5792 100644 --- a/src/dagobert/modelling/dl/tcn_args.py +++ b/src/dagobert/modelling/dl/tcn_args.py @@ -188,6 +188,19 @@ def add_model_specific_args(parent_parser): "multi-class (3) classification with CrossEntropyLoss." ), ) + parser.add_argument( + "--no_weight_norm", + action="store_true", + help=( + " Weight norm is registered as a pre_forward_hook on the 1D convolutional " + "layers of the TemporalBlock, and these cannot be serialised when training " + "with parallel processes interacting with the model concurrently. If True, " + "we add weight normalisation around these layers, and TCN cannot be used " + "in a multiprocessing setting. If False, then it can be used, even staying " + "on GPU in linux (CPU only on Windows)." + ), + ) + parser.add_argument( "--no_class_weights", action="store_true", diff --git a/src/dagobert/modelling/dl/tcn_net.py b/src/dagobert/modelling/dl/tcn_net.py index b49b139a..344ad328 100644 --- a/src/dagobert/modelling/dl/tcn_net.py +++ b/src/dagobert/modelling/dl/tcn_net.py @@ -48,33 +48,34 @@ def __init__( dilation, padding, dropout=0.2, + no_weight_norm=False, ): super(TemporalBlock, self).__init__() - self.conv1 = weight_norm( - nn.Conv1d( - n_inputs, - n_outputs, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - ) + self.conv1 = nn.Conv1d( + n_inputs, + n_outputs, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, ) + if not no_weight_norm: + self.conv1 = weight_norm(self.conv1) self.chomp1 = Chomp1d(padding) self.relu1 = nn.ReLU() self.dropout1 = nn.Dropout(dropout) self.batch_norm1 = nn.BatchNorm1d(n_outputs) - self.conv2 = weight_norm( - nn.Conv1d( - n_outputs, - n_outputs, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - ) + self.conv2 = nn.Conv1d( + n_outputs, + n_outputs, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, ) + if not no_weight_norm: + self.conv2 = weight_norm(self.conv2) self.chomp2 = Chomp1d(padding) self.relu2 = nn.ReLU() self.dropout2 = nn.Dropout(dropout) @@ -112,6 +113,7 @@ def __init__( dropout: float = 0.2, time_feat_n: int = 1, time_embed_dim: int = 12, + no_weight_norm: bool = False, ): """ Class constructor. @@ -126,6 +128,8 @@ def __init__( time_feat_n: Number of time features per input DF. Note this has to be consistent across all input DFs, you can't mix and match. time_embed_dim: Dimensionality of time2vec vectors. + no_weight_norm: If True, we don't add weight_norm to 1dconv layers. See + no_weight_norm param help in `tcn_args.py` for more info. """ super(TemporalConvNet, self).__init__() @@ -147,6 +151,7 @@ def __init__( dilation=1, padding=(kernel_size - 1), dropout=dropout, + no_weight_norm=no_weight_norm, ) ) @@ -167,6 +172,7 @@ def __init__( dilation=dilation_size, padding=(kernel_size - 1) * dilation_size, dropout=dropout, + no_weight_norm=no_weight_norm, ) ] self.later_layers = nn.Sequential(*layers) diff --git a/src/dagobert/modelling/rl/networks.py b/src/dagobert/modelling/rl/networks.py index 4082a7cf..061e7366 100644 --- a/src/dagobert/modelling/rl/networks.py +++ b/src/dagobert/modelling/rl/networks.py @@ -49,11 +49,11 @@ def __init__( dropout=dropout, time_feat_n=hparams.time_feat_n, time_embed_dim=hparams.time_embed_dim, + no_weight_norm=hparams.no_weight_norm, ) self.linear_a = nn.Linear(n_actions + 1, num_channels[-1]) self.linear1 = nn.Linear(hparams.mini_series_length, 1) self.linear2 = nn.Linear(num_channels[-1] * 2, output_size) - # self.linear2 = nn.Linear(num_channels[-1], output_size) def forward(self, state, past_pw): s1 = self.tcn(*state) @@ -64,7 +64,6 @@ def forward(self, state, past_pw): s2 = torch.tanh(self.linear1(s1).squeeze(-1)) # bring together the state and past_pw representations make residual connection return past_pw[:, 1:] + self.linear2(torch.cat([s2, a1], dim=1)) - # return self.linear2(s2) class ActorContinous(nn.Module): diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index 6b5c187d..df900659 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -157,14 +157,16 @@ def generate_experience_buffer( # setup workers and pass them the env, agent, vars to work with max_worker_steps = int(self.hparams.steps_per_epoch / self.hparams.num_workers) parallel_experiences = ParallelExperiences() - from IPython import embed - embed() + self.agent.critic_net.cpu() + self.agent.critic_net.eval() + self.agent.actor_net.cpu() + self.agent.actor_net.eval() for i in range(self.hparams.num_workers): args = ( self.envs[i], self.agent, - self.device, + "cpu", max_worker_steps, self.hparams.max_episode_length, len(self.hparams.asset_names), From 7c144d4efd089d9ce7f2a634b6f2e0112353eef7 Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Fri, 15 Jan 2021 15:16:38 +0000 Subject: [PATCH 30/62] training_step is done for now --- .../modelling/augmentation/timegan.py | 210 +++++++++--------- 1 file changed, 106 insertions(+), 104 deletions(-) diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 7ee32983..7b853d1a 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -106,7 +106,6 @@ def __init__(self, hparams: Namespace): self.tgan_device = "cuda" if hparams.gpus > 0 else "cpu" # TODO: check if real data is the right one, get data in # TODO: any sanity checks on data, hypermparams - # TODO set up losses self.real_logging = None self.comet_logging = not self.hparams.no_comet_logger @@ -183,124 +182,127 @@ def training_step(self, batch, batch_idx, optimizer_idx): Returns: Loss """ + # TODO: is there any label to give back? x, label = batch batch_len = len(x) - z = get_noise( - batch_len, - self.hparams.mini_series_length, - self.hparams.z_dim, - device=self.tgan_device, - ) - # embedding h = self.embedder(x) - x_tilde = self.recovery(h) - if optimizer_idx == 0: - # recover embedding - embed_loss0 = TimeGANLightning.embed_loss0( - x_tilde, - x, - ) - self.log( - "embed_loss0", - embed_loss0, - on_step=False, - on_epoch=True, - prog_bar=True, - logger=True, - ) - return embed_loss0 - - # generator - e_hat = self.generator(z) - h_hat = self.supervisor(e_hat) - h_hat_supervise = self.supervisor(h) - - if optimizer_idx == 1: - # supervisor and generator - gen_sup_loss = TimeGANLightning.gen_loss_sup( - h_hat_supervise, - h, - ) + + # optimizers #0 & #3 update embedder nets + if optimizer_idx in [0, 3]: + x_tilde = self.recovery(h) + # optimize embedding via embedder and recovery nets + if optimizer_idx == 0: + e_loss = TimeGANLightning.embed_loss0(x_tilde, x) + self.log( + "e_loss", + e_loss, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) + return e_loss + + elif optimizer_idx == 3: + h_hat_supervise = self.supervisor(h) + embed_loss = TimeGANLightning.embedder_loss( + x_tilde, + x, + h_hat_supervise, + h, + ) + self.log( + "embed_loss", + embed_loss, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) + return embed_loss + + # optimize supervisor + elif optimizer_idx == 1: + h_hat_supervise = self.supervisor(h) + supervise_loss = TimeGANLightning.supervisor_loss(h_hat_supervise, h) self.log( - "gen_sup_loss", - gen_sup_loss, + "supervise_loss", + supervise_loss, on_step=False, on_epoch=True, prog_bar=True, logger=True, ) - return gen_sup_loss - - # synthetic data - x_hat = self.recovery(h_hat) + return supervise_loss # TODO: If you need to control how often those optimizers step or override # the default .step() schedule, override the optimizer_step() hook. - if optimizer_idx == 2: - with torch.no_grad(): - y_fake = self.discriminator(h_hat) - y_fake_e = self.discriminator(e_hat) - gen_loss = TimeGANLightning.generator_loss( - y_fake, - y_fake_e, - h, - h_hat_supervise, - x, - x_hat, - self.hparams.emb_weight, - ) - self.log( - "gen_loss", - gen_loss, - on_step=False, - on_epoch=True, - prog_bar=True, - logger=True, - ) - return gen_loss - - if optimizer_idx == 3: - embed_loss = TimeGANLightning.embed_loss( - x_tilde, - x, - h_hat_supervise, - h, - ) - self.log( - "embed_loss", - embed_loss, - on_step=False, - on_epoch=True, - prog_bar=True, - logger=True, - ) - return embed_loss - - if optimizer_idx == 4: - e_hat = self.generator() - h_hat = self.supervisor(e_hat) - - y_fake = self.discriminator(h_hat.detach()) - y_fake_e = self.discriminator(e_hat.detach()) - y_real = self.discriminator(h.detach()) - - disc_loss = TimeGANLightning.discriminator_loss( - y_fake, y_fake_e, y_real, self.hparams.emb_weight - ) - self.log( - "disc_loss", - disc_loss, - on_step=False, - on_epoch=True, - prog_bar=True, - logger=True, + elif optimizer_idx in [2, 4]: + # random input to generator + z = get_noise( + batch_len, + self.hparams.mini_series_length, + self.hparams.z_dim, + device=self.tgan_device, ) - return disc_loss + # update generator + if optimizer_idx == 2: + + e_hat = self.generator(z) + h_hat = self.supervisor(e_hat) + h_hat_supervise = self.supervisor(h) + + # synthetic data + x_hat = self.recovery(h_hat) + # no_grad to leave discriminator unchanged + with torch.no_grad(): + y_fake = self.discriminator(h_hat) + y_fake_e = self.discriminator(e_hat) + gen_loss = TimeGANLightning.generator_loss( + y_fake, + y_fake_e, + h, + h_hat_supervise, + x, + x_hat, + self.hparams.emb_weight, + ) + self.log( + "gen_loss", + gen_loss, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) + return gen_loss + + # update discriminator + elif optimizer_idx == 4: + e_hat = self.generator(z) + h_hat = self.supervisor(e_hat) + # detach to update only discriminator + y_fake = self.discriminator(h_hat.detach()) + y_fake_e = self.discriminator(e_hat.detach()) + y_real = self.discriminator(h.detach()) + + disc_loss = TimeGANLightning.discriminator_loss( + y_fake, y_fake_e, y_real, self.hparams.emb_weight + ) + self.log( + "disc_loss", + disc_loss, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) + return disc_loss def configure_optimizers(self) -> List[optim.Optimizer]: """ - Optimizer setup. list of optimizers accessed by idx in training step. + Optimizer setup. List of optimizers accessed by idx in training step. """ optimizers = [] param_pairs = [ @@ -344,7 +346,7 @@ def embed_loss0(x_tilde, x): return e_loss0 @staticmethod - def gen_loss_sup(h_hat_supervise, h): + def supervisor_loss(h_hat_supervise, h): """ Args: @@ -397,7 +399,7 @@ def generator_loss( return g_loss @staticmethod - def embed_loss(x_tilde, x, h_hat_supervise, h): + def embedder_loss(x_tilde, x, h_hat_supervise, h): """ Args: From 2c9edd4a13f68bd2d49a4f6d2090969cf13fb37d Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Fri, 15 Jan 2021 17:29:48 +0000 Subject: [PATCH 31/62] start data --- .../modelling/augmentation/timegan.py | 141 ++++++++++-------- 1 file changed, 82 insertions(+), 59 deletions(-) diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 7b853d1a..44268be1 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -1,6 +1,8 @@ """ TimeGAN network, following the original implementation: https://bitbucket.org/mvdschaar/mlforhealthlabpub/src/master/alg/timegan/tgan.py. +& +https://papers.nips.cc/paper/2019/file/c9efe5f26cd17ba6216bbe2a7d26d490-Paper.pdf """ from typing import List, Optional from argparse import Namespace @@ -18,6 +20,7 @@ import torch.optim as optim import torch.nn.functional as f from torch.nn.utils import weight_norm +from torch.utils.data import Dataset, WeightedRandomSampler, RandomSampler, DataLoader from pytorch_lightning import LightningModule @@ -86,7 +89,7 @@ class TimeGANLightning(LightningModule): """ # ---------------------------------------------------------------------------------- - # INIT, FORWARD, OPTIMIZER SETUP + # INIT, (FORWARD) # ---------------------------------------------------------------------------------- def __init__(self, hparams: Namespace): @@ -101,7 +104,7 @@ def __init__(self, hparams: Namespace): # define main vars (other than model) super().__init__() - # TODO: sanity check, define hparams + # TODO: pre sanity check, define hparams # lightning sets this to cuda too late for some of our setup to work self.tgan_device = "cuda" if hparams.gpus > 0 else "cpu" # TODO: check if real data is the right one, get data in @@ -170,6 +173,11 @@ def __init__(self, hparams: Namespace): rnn=self.hparams.rnn, linear_activation=False, ) + self = self.float() + + # ---------------------------------------------------------------------------------- + # OPTIMIZER SETUP & TRAIN + # ---------------------------------------------------------------------------------- def training_step(self, batch, batch_idx, optimizer_idx): """ @@ -193,48 +201,48 @@ def training_step(self, batch, batch_idx, optimizer_idx): x_tilde = self.recovery(h) # optimize embedding via embedder and recovery nets if optimizer_idx == 0: - e_loss = TimeGANLightning.embed_loss0(x_tilde, x) + loss_e = TimeGANLightning.embed_loss0(x_tilde, x) self.log( - "e_loss", - e_loss, + "loss_e", + loss_e, on_step=False, on_epoch=True, prog_bar=True, logger=True, ) - return e_loss + return loss_e elif optimizer_idx == 3: h_hat_supervise = self.supervisor(h) - embed_loss = TimeGANLightning.embedder_loss( + loss_embed = TimeGANLightning.embedder_loss( x_tilde, x, h_hat_supervise, h, ) self.log( - "embed_loss", - embed_loss, + "loss_embed", + loss_embed, on_step=False, on_epoch=True, prog_bar=True, logger=True, ) - return embed_loss + return loss_embed # optimize supervisor elif optimizer_idx == 1: h_hat_supervise = self.supervisor(h) - supervise_loss = TimeGANLightning.supervisor_loss(h_hat_supervise, h) + loss_supervisor = TimeGANLightning.supervisor_loss(h_hat_supervise, h) self.log( - "supervise_loss", - supervise_loss, + "loss_supervisor", + loss_supervisor, on_step=False, on_epoch=True, prog_bar=True, logger=True, ) - return supervise_loss + return loss_supervisor # TODO: If you need to control how often those optimizers step or override # the default .step() schedule, override the optimizer_step() hook. @@ -259,7 +267,7 @@ def training_step(self, batch, batch_idx, optimizer_idx): with torch.no_grad(): y_fake = self.discriminator(h_hat) y_fake_e = self.discriminator(e_hat) - gen_loss = TimeGANLightning.generator_loss( + loss_gen = TimeGANLightning.generator_loss( y_fake, y_fake_e, h, @@ -269,14 +277,14 @@ def training_step(self, batch, batch_idx, optimizer_idx): self.hparams.emb_weight, ) self.log( - "gen_loss", - gen_loss, + "loss_gen", + loss_gen, on_step=False, on_epoch=True, prog_bar=True, logger=True, ) - return gen_loss + return loss_gen # update discriminator elif optimizer_idx == 4: @@ -287,18 +295,18 @@ def training_step(self, batch, batch_idx, optimizer_idx): y_fake_e = self.discriminator(e_hat.detach()) y_real = self.discriminator(h.detach()) - disc_loss = TimeGANLightning.discriminator_loss( + loss_disc = TimeGANLightning.discriminator_loss( y_fake, y_fake_e, y_real, self.hparams.emb_weight ) self.log( - "disc_loss", - disc_loss, + "loss_disc", + loss_disc, on_step=False, on_epoch=True, prog_bar=True, logger=True, ) - return disc_loss + return loss_disc def configure_optimizers(self) -> List[optim.Optimizer]: """ @@ -325,21 +333,30 @@ def configure_optimizers(self) -> List[optim.Optimizer]: # ---------------------------------------------------------------------------------- # SETUP FUNCTIONS # ---------------------------------------------------------------------------------- + def train_dataloader( + self, + ) -> DataLoader: + """ + + Returns: + + """ + return Dataloader(dataset=dataset, batch_size=self.hparams.batch_size) # ---------------------------------------------------------------------------------- # CALCULATION # ---------------------------------------------------------------------------------- - @staticmethod def embed_loss0(x_tilde, x): """ - + Loss guiding reversible mapping between feature and latent spaces to enable + embedding and recovery nets to reconstruct original data. Args: - x_tilde: - x: + x_tilde: decoded real samples + x: real samples Returns: - + Loss """ e_loss_t0 = nn.MSELoss()(x_tilde, x) e_loss0 = 10 * torch.sqrt(e_loss_t0) @@ -348,13 +365,14 @@ def embed_loss0(x_tilde, x): @staticmethod def supervisor_loss(h_hat_supervise, h): """ - + This loss further ensures that generator produces similar stepwise transitions + (evaluated by ground-truth targets). Args: - h_hat_supervise: - h: + h_hat_supervise: supervisors output from feeding h (real embedding) through + h: real embedding defined by embedder net Returns: - + Loss """ gen_sup_loss = nn.MSELoss()(h_hat_supervise[:, 1:, :], h[:, 1:, :]) return gen_sup_loss @@ -370,25 +388,26 @@ def generator_loss( emb_weight, ): """ - + Loss of generator combining adversarial & supervisor losses together with + looking at difference between final synthetic output and original data. Args: - y_fake: - y_fake_e: - h: - h_hat_supervise: - x: - x_hat: - emb_weight: + y_fake: logits for classification of fakes (from h_hat) + y_fake_e: logits for classification of fake embeddings (from e_hat) + h: real embedding defined by embedder net + h_hat_supervise: supervisors output from feeding h (real embedding) through + x: real samples + x_hat: decoded samples of embedding created by generator + emb_weight: weight defining how much embedded fake contributes to loss Returns: - + Loss """ # adversarial g_loss_u = nn.BCELoss()(y_fake, torch.ones_like(y_fake)) g_loss_u_e = nn.BCELoss()(y_fake_e, torch.ones_like(y_fake_e)) w_g_loss_u_e = emb_weight * g_loss_u_e # supervisor - g_loss_s = nn.MSELoss()(h_hat_supervise[:, 1:, :], h[:, 1:, :]) + g_loss_s = TimeGANLightning.supervisor_loss(h_hat_supervise, h) # 2 moments d = torch.sqrt(torch.var(x_hat, 0) + 1e-6) - torch.sqrt(torch.var(x, 0) + 1e-6) g_loss_v1 = torch.mean(torch.abs(d)) @@ -401,35 +420,39 @@ def generator_loss( @staticmethod def embedder_loss(x_tilde, x, h_hat_supervise, h): """ - + Loss to further improve reversible mapping between feature and latent space, + combined with Args: - x_tilde: - x: - h_hat_supervise: - h: + x_tilde: decoded real samples + x: real samples + h_hat_supervise: supervisors output from feeding h (real embedding) through + h: real embedding defined by embedder net Returns: - + Loss """ - e_loss_t0 = nn.MSELoss()(x_tilde, x) - e_loss0 = 10 * torch.sqrt(e_loss_t0) - e_loss = e_loss0 + 0.1 * nn.MSELoss()(h_hat_supervise[:, 1:, :], h[:, 1:, :]) + e_loss0 = TimeGANLightning.embed_loss0(x_tilde, x) + e_loss = e_loss0 + 0.1 * TimeGANLightning.supervisor_loss(h_hat_supervise, h) return e_loss @staticmethod def discriminator_loss(y_fake, y_fake_e, y_real, emb_weight): """ - + Discriminator’s binary adversarial feedback, both on fake and real data. Args: - y_fake: - y_fake_e: - y_real: - emb_weight: + y_fake: logits for classification of fakes (from h_hat) + y_fake_e: logits for classification of fake embeddings (from e_hat) + y_real: logits for classification of real embeddings (from h) + emb_weight: weight defining how much embedded fake contributes to loss Returns: - + Loss """ - d_loss_fake_e = nn.BCELoss()(y_fake_e, torch.zeros_like(y_fake_e)) - d_loss_fake = nn.BCELoss()(y_fake, torch.zeros_like(y_fake)) - d_loss_real = nn.BCELoss()(y_real, torch.ones_like(y_real)) + # TODO: is this the correct loss? discriminator returns logits w/out activation. + # changed compared to original TF implementation + criterion = nn.BCEWithLogitsLoss() + d_loss_fake_e = criterion(y_fake_e, torch.zeros_like(y_fake_e)) + d_loss_fake = criterion(y_fake, torch.zeros_like(y_fake)) + d_loss_real = criterion(y_real, torch.ones_like(y_real)) + # TODO: any use of dividing loss by (2 + emb_weight)? return emb_weight * d_loss_fake_e + d_loss_fake + d_loss_real From 1123074ea2612bec0848a7f5a9e87adc770efe95 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Sat, 16 Jan 2021 09:58:27 +0000 Subject: [PATCH 32/62] I cannot fucking believe it but I think I managed to crack this multiprocessing.. --- src/dagobert/modelling/rl/ppo.py | 137 ++++++++++++++++--------------- 1 file changed, 73 insertions(+), 64 deletions(-) diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index df900659..02fe9f70 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -157,11 +157,13 @@ def generate_experience_buffer( # setup workers and pass them the env, agent, vars to work with max_worker_steps = int(self.hparams.steps_per_epoch / self.hparams.num_workers) parallel_experiences = ParallelExperiences() - self.agent.critic_net.cpu() self.agent.critic_net.eval() self.agent.actor_net.cpu() self.agent.actor_net.eval() + from IPython import embed + + embed() for i in range(self.hparams.num_workers): args = ( self.envs[i], @@ -479,7 +481,7 @@ class ParallelExperiences: def __init__(self): """Class constructor.""" - self.exp_queue = mp.Queue() + self.queue = mp.Queue() self.processes = [] def collect_experiences(self) -> List[ExperienceBuffer]: @@ -487,75 +489,82 @@ def collect_experiences(self) -> List[ExperienceBuffer]: buffers = [] # gather results from workers using the queue and merge them into one for process in self.processes: - buffers.append(self.exp_queue.get()) # will block + buffers.append(self.queue.get()) # will block for process in self.processes: process.join() return buffers - def create_worker(self, *args): + def create_worker(self, *args, **kwargs): """Creates a new worker, with the args passed in for `_gather_experience`.""" - process = mp.Process(target=self.gather_experience, args=args) + args_for_wrapper = [gather_experience, self.queue, args, kwargs] + process = mp.Process(target=self._wrapper, args=args_for_wrapper) self.processes.append(process) process.start() - def gather_experience( - self, - env: gym.Env, - agent: ActorCriticAgent, - device: torch.device, - max_steps: int, - max_episode_length: int, - asset_num: int, - gamma: float, - lam: float, - ): - """ - Workhorse function of the parallel experience gathering. This function can be - called as many times as many CPUs are available on the system, to collect the - desired number of steps and store them into an `ExperienceBuffer` that is then - passed back (via a `multiprocessing.Queue` object) to the main process that - spawned the parallel processes. - - Args: - env: An instance of the environment to act on. - agent: An instance of the PPO's `ActorCriticAgent`. - device: Device where the agent lives (GPU or CPU). - max_steps: Total number of steps (over multiple episodes) a worker can take. - max_episode_length: Maximum length of a trajectory / episode. - asset_num: Number of assets we are modelling (not including USD). - gamma: See docs of :func:`PPO.calc_advantage` - lam: See docs of :func:`PPO.calc_advantage` + @staticmethod + def _wrapper(func, queue, args, kwargs): + """This NEEDS to be a static method for multiprocessing to work""" + buffer = func(*args, **kwargs) + # add collected experience to the queue so it can be returned to master process + queue.put(buffer) + + +def gather_experience( + env: gym.Env, + agent: ActorCriticAgent, + device: torch.device, + max_steps: int, + max_episode_length: int, + asset_num: int, + gamma: float, + lam: float, +): + """ + Workhorse function of the parallel experience gathering. This function can be + called as many times as many CPUs are available on the system, to collect the + desired number of steps and store them into an `ExperienceBuffer` that is then + passed back (via a `multiprocessing.Queue` object) to the main process that + spawned the parallel processes. + + Args: + env: An instance of the environment to act on. + agent: An instance of the PPO's `ActorCriticAgent`. + device: Device where the agent lives (GPU or CPU). + max_steps: Total number of steps (over multiple episodes) a worker can take. + max_episode_length: Maximum length of a trajectory / episode. + asset_num: Number of assets we are modelling (not including USD). + gamma: See docs of :func:`PPO.calc_advantage` + lam: See docs of :func:`PPO.calc_advantage` + + Returns: + Adds the results to `exp_queue` so it can be processed in the main process. + """ + buffer = ExperienceBuffer() + state = env.reset() + past_pw = PPO._init_past_pw(asset_num, device) + for step in range(max_steps): + # get action, make step, get reward and info from env + pi, action, actor_logits, logp, value = agent(state, past_pw, device) + next_state, reward, done, info = env.step(action.cpu().numpy()) + + # store everything and update state, past_pw + buffer.append(state, past_pw, action, logp, reward, value, info) + state = next_state + past_pw = PPO._update_past_pw(info["portfolio_value"], actor_logits, device) + + terminal = len(buffer.ep_rewards) == max_episode_length + if done or terminal: + buffer.qvals += PPO.discount_rewards(buffer.ep_rewards, gamma)[:-1] + buffer.advs += PPO.calc_advantage( + buffer.ep_rewards, buffer.ep_values, gamma, lam + ) + buffer.done_episodes += 1 + buffer.epoch_rewards += np.sum(buffer.ep_rewards) - Returns: - Adds the results to `exp_queue` so it can be processed in the main process. - """ - buffer = ExperienceBuffer() - state = env.reset() - past_pw = PPO._init_past_pw(asset_num, device) - for step in range(max_steps): - # get action, make step, get reward and info from env - pi, action, actor_logits, logp, value = agent(state, past_pw, device) - next_state, reward, done, info = env.step(action.cpu().numpy()) - - # store everything and update state, past_pw - buffer.append(state, past_pw, action, logp, reward, value, info) - state = next_state - past_pw = PPO._update_past_pw(info["portfolio_value"], actor_logits, device) - - terminal = len(buffer.ep_rewards) == max_episode_length - if done or terminal: - buffer.qvals += PPO.discount_rewards(buffer.ep_rewards, gamma)[:-1] - buffer.advs += PPO.calc_advantage( - buffer.ep_rewards, buffer.ep_values, gamma, lam - ) - buffer.done_episodes += 1 - buffer.epoch_rewards += np.sum(buffer.ep_rewards) - - # episode over, reset the env and the buffer - buffer.ep_rewards = [] - buffer.ep_values = [] - state = env.reset() - past_pw = PPO._init_past_pw(asset_num, device) + # episode over, reset the env and the buffer + buffer.ep_rewards = [] + buffer.ep_values = [] + state = env.reset() + past_pw = PPO._init_past_pw(asset_num, device) - # add collected experience to the queue so it can be returned to master process - self.exp_queue.put(buffer) + return buffer From 86dcdfdcc68c2c2694eacd9b90dac38600be007a Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Sat, 16 Jan 2021 16:05:17 +0000 Subject: [PATCH 33/62] it works, even on windows but after a while it crashes with RuntimeError: Couldn't open shared event -> let's try it on linux now --- config/rl_config.yaml | 2 +- src/dagobert/modelling/rl/networks.py | 2 +- src/dagobert/modelling/rl/ppo.py | 150 ++++++++++++-------------- 3 files changed, 72 insertions(+), 82 deletions(-) diff --git a/config/rl_config.yaml b/config/rl_config.yaml index cce7b3d7..170585df 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -4,7 +4,7 @@ # LIGHTNING # -------------------------------------------------------------------------------------- -gpus: 0 +gpus: 1 pin_memory: True profiler: True #val_check_interval: 0.5 diff --git a/src/dagobert/modelling/rl/networks.py b/src/dagobert/modelling/rl/networks.py index 061e7366..40b81a9d 100644 --- a/src/dagobert/modelling/rl/networks.py +++ b/src/dagobert/modelling/rl/networks.py @@ -63,7 +63,7 @@ def forward(self, state, past_pw): else: s2 = torch.tanh(self.linear1(s1).squeeze(-1)) # bring together the state and past_pw representations make residual connection - return past_pw[:, 1:] + self.linear2(torch.cat([s2, a1], dim=1)) + return self.linear2(torch.cat([s2, a1], dim=1)) class ActorContinous(nn.Module): diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index 02fe9f70..8a8ee757 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -3,11 +3,13 @@ modified from https://github.com/sid-sundrani/ppo_lightning. """ # pylint: disable=no-member +import sys import logging from copy import deepcopy from pathlib import Path from typing import List, Tuple from argparse import Namespace +from itertools import chain import gym import torch @@ -36,6 +38,7 @@ logger = logging.getLogger(__name__) mp = torch.multiprocessing.get_context("spawn") +eps = np.finfo(float).eps def run_rl(args): @@ -71,6 +74,7 @@ def run_rl(args): # define trainer and and lightning module args.multiprocessing = True if args.gpus != 1 else False args.num_workers = 1 if args.num_workers == 0 else args.num_workers + args.windows = True if "win" in sys.platform else False trainer = Trainer.from_argparse_args( args, logger=tcn_loggers, @@ -113,11 +117,9 @@ def __init__(self, hparams: Namespace): self.critic = ActorCriticTCN( self.hparams, n_actions=n_actions, output_size=1, actor=False ) - self.critic.share_memory() self.actor = ActorContinous( ActorCriticTCN(self.hparams, n_actions=n_actions, output_size=n_actions) ) - self.actor.actor_net.share_memory() self.agent = ActorCriticAgent(self.actor, self.critic) self.buffer = ExperienceBuffer() self.avg_ep_reward = 0 @@ -154,21 +156,14 @@ def generate_experience_buffer( Tuple of Lists containing tensors for states, actions, log probs, qvals and advantage. """ - # setup workers and pass them the env, agent, vars to work with max_worker_steps = int(self.hparams.steps_per_epoch / self.hparams.num_workers) parallel_experiences = ParallelExperiences() - self.agent.critic_net.cpu() - self.agent.critic_net.eval() - self.agent.actor_net.cpu() - self.agent.actor_net.eval() - from IPython import embed - - embed() + device = self.setup_model_for_experience_gathering() for i in range(self.hparams.num_workers): args = ( self.envs[i], self.agent, - "cpu", + device, max_worker_steps, self.hparams.max_episode_length, len(self.hparams.asset_names), @@ -177,15 +172,49 @@ def generate_experience_buffer( ) parallel_experiences.create_worker(*args) - # collect experiences in parallel, then merge them and create dataset + # collect experiences in parallel, then merge them self.buffer.merge_buffers(parallel_experiences.collect_experiences()) - self.buffer.yield_dataset() # this will yield a dataset for dataloader + # update metrics we log about the current performance of the agent + self.avg_ep_reward = self.buffer.epoch_rewards / self.buffer.done_episodes + eps + self.avg_reward = self.buffer.epoch_rewards / self.hparams.steps_per_epoch + self.avg_ep_len = self.hparams.steps_per_epoch / self.buffer.done_episodes + eps + + # yield a dataset for dataloader for updating actor/critic + self.setup_model_for_training() + for state, past_pw, action, logp_old, qval, adv in zip( + self.buffer.states, + self.buffer.past_pws, + self.buffer.actions, + self.buffer.logps, + self.buffer.qvals, + self.buffer.advs, + ): + yield state, past_pw, action, logp_old, qval, adv self.buffer.clear_buffer() - # finally update metrics we log - self.avg_ep_reward = self.buffer.epoch_rewards / self.buffer.done_episodes - self.avg_reward = self.buffer.epoch_rewards / self.hparams.steps_per_epoch - self.avg_ep_len = self.hparams.steps_per_epoch / self.buffer.done_episodes + def setup_model_for_experience_gathering(self): + """Helper function to move model to CPU if necessary""" + # dropout and batch-norm doesn't make sense for experience gathering + self.agent.critic_net.eval() + self.agent.actor_net.eval() + # we cannot use cuda tensor sharing on windows (necessary for multiprocessing) + if self.hparams.windows: + device = "cpu" + self.agent.critic_net.cpu() + self.agent.actor_net.cpu() + else: + device = self.device + self.agent.critic_net.share_memory() + self.agent.actor_net.share_memory() + return device + + def setup_model_for_training(self): + """Helper function to move model back to GPU if necessary""" + if self.hparams.windows and self.hparams.gpus != 0: + self.agent.critic_net.cuda() + self.agent.actor_net.cuda() + self.agent.critic_net.train() + self.agent.actor_net.train() @staticmethod def discount_rewards(rewards: List[float], discount: float) -> List[float]: @@ -237,17 +266,6 @@ def calc_advantage( adv = PPO.discount_rewards(delta, gamma * lam) return adv - @staticmethod - def normalise_advantage(batch_adv: List[float]) -> List[float]: - """ - Normalise across all episodes within the epoch. Apparently this helps with - covergence. - """ - # normalise advantage - adv = np.array(batch_adv) - adv = (adv - adv.mean()) / (adv.std() + np.finfo(float).eps) - return list(adv) - @staticmethod def _init_past_pw(asset_num, device) -> torch.Tensor: """ @@ -322,7 +340,9 @@ def training_step( loss """ state, past_pw, action, old_logp, qval, adv = batch - adv = PPO.normalise_advantage(adv) + # normalize advantages within batch + adv = (adv - adv.mean()) / adv.std() + self.log("avg_ep_len", self.avg_ep_len, on_step=False, on_epoch=True) self.log("avg_ep_reward", self.avg_ep_reward, on_step=False, on_epoch=True) self.log("avg_reward", self.avg_reward, on_step=False, on_epoch=True) @@ -375,20 +395,7 @@ class ExperienceBuffer: def __init__(self): """Class constructor""" - # step vars - self.states = [] - self.past_pws = [] - self.actions = [] - self.advs = [] - self.qvals = [] - self.logps = [] - self.infos = [] - - # episode vars - self.ep_rewards = [] - self.ep_values = [] - self.done_episodes = 0 - self.epoch_rewards = 0 + self.clear_buffer() def append( self, @@ -430,46 +437,29 @@ def merge_buffers(self, buffers): buffers: List of smaller ExpereinceBuffers to merge together from parallel processes. """ - pass - - def yield_dataset( - self, - ) -> Tuple[ - List[torch.Tensor], - List[torch.Tensor], - List[torch.Tensor], - List[torch.Tensor], - List[torch.Tensor], - ]: - """ - Yields an iterable dataset for Pytorch Lightning from the contents of the - ExperienceBuffer. - - Yield: - Tuple of Lists containing tensors for states, actions, log probs, qvals and - advantage. - """ - data = zip( - self.states, - self.past_pws, - self.actions, - self.logps, - self.qvals, - self.advs, - ) - for state, past_pw, action, logp_old, qval, adv in data: - yield state, past_pw, action, logp_old, qval, adv + self.states = chain(*[buffer.states for buffer in buffers]) + self.past_pws = chain(*[buffer.past_pws for buffer in buffers]) + self.actions = chain(*[buffer.actions for buffer in buffers]) + self.logps = chain(*[buffer.logps for buffer in buffers]) + self.qvals = chain(*[buffer.qvals for buffer in buffers]) + self.advs = chain(*[buffer.advs for buffer in buffers]) + self.done_episodes = sum([buffer.done_episodes for buffer in buffers]) + self.epoch_rewards = sum([buffer.epoch_rewards for buffer in buffers]) def clear_buffer(self): """Resets the ExperienceBuffer.""" - self.states.clear() - self.past_pws.clear() - self.actions.clear() - self.advs.clear() - self.logps.clear() - self.qvals.clear() - self.ep_rewards.clear() - self.ep_values.clear() + # step vars + self.states = [] + self.past_pws = [] + self.actions = [] + self.advs = [] + self.qvals = [] + self.logps = [] + self.infos = [] + + # episode / epoch vars + self.ep_rewards = [] + self.ep_values = [] self.done_episodes = 0 self.epoch_rewards = 0 From f78c606d1a27db65a534ec1d1842dcb8a2700de7 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Sun, 17 Jan 2021 08:49:45 +0000 Subject: [PATCH 34/62] now it works reliably both on win/linux! hallefuckingluja --- config/rl_config.yaml | 21 +++++++++++---------- src/dagobert/modelling/rl/ppo.py | 5 ++++- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/config/rl_config.yaml b/config/rl_config.yaml index 170585df..af4a8ce6 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -23,7 +23,7 @@ tags: - RL_test no_comet_logger: True seed: 42 -batch_size: 256 +batch_size: 512 # -------------------------------------------------------------------------------------- # RL @@ -35,15 +35,16 @@ asset_names: trading_cost: 0.002 reward_type: return max_episode_length: 500 -steps_per_epoch: 5000 -n_optim_iters: 4 -gamma: 0.99 -lam: 0.95 +steps_per_epoch: 10000 +n_optim_iters: 8 +gamma: 0.95 +lam: 0.9 lr_actor: 0.001 lr_critic: 0.001 -clip_ratio: 0.25 +clip_ratio: 0.2 target_kl: 0.01 + # don't change these, or preprocessing won't work target_col: rl_return to_label: False @@ -55,11 +56,11 @@ no_weight_norm: True # MODEL # -------------------------------------------------------------------------------------- -actor_num_channels: [50, 50, 50, 50, 50] -actor_kernel_size: 5 +actor_num_channels: [100, 100, 100, 100, 100] +actor_kernel_size: 3 actor_dropout: 0.25 -critic_num_channels: [50, 50, 50, 50, 50] -critic_kernel_size: 5 +critic_num_channels: [100, 100, 100, 100, 100] +critic_kernel_size: 3 critic_dropout: 0.25 use_last_timepoint: True diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index 8a8ee757..411eb99c 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -37,6 +37,7 @@ logger = logging.getLogger(__name__) +torch.multiprocessing.set_sharing_strategy("file_system") mp = torch.multiprocessing.get_context("spawn") eps = np.finfo(float).eps @@ -471,7 +472,7 @@ class ParallelExperiences: def __init__(self): """Class constructor.""" - self.queue = mp.Queue() + self.queue = mp.JoinableQueue() self.processes = [] def collect_experiences(self) -> List[ExperienceBuffer]: @@ -480,6 +481,7 @@ def collect_experiences(self) -> List[ExperienceBuffer]: # gather results from workers using the queue and merge them into one for process in self.processes: buffers.append(self.queue.get()) # will block + self.queue.task_done() for process in self.processes: process.join() return buffers @@ -497,6 +499,7 @@ def _wrapper(func, queue, args, kwargs): buffer = func(*args, **kwargs) # add collected experience to the queue so it can be returned to master process queue.put(buffer) + queue.join() def gather_experience( From 5785c57f67af5e922f316f644cb6f09cd8fcf491 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Wed, 20 Jan 2021 08:55:59 +0000 Subject: [PATCH 35/62] adding logging of weights/portfolio and making few fixes to how actions are sampled --- config/rl_config.yaml | 100 ++++++++++++--------- src/dagobert/modelling/dl/data.py | 20 +++-- src/dagobert/modelling/rl/environment.py | 11 +-- src/dagobert/modelling/rl/networks.py | 34 +++++-- src/dagobert/modelling/rl/ppo.py | 109 +++++++++++++++++------ src/dagobert/modelling/rl/rl_args.py | 12 +++ 6 files changed, 193 insertions(+), 93 deletions(-) diff --git a/config/rl_config.yaml b/config/rl_config.yaml index af4a8ce6..7576287c 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -23,7 +23,7 @@ tags: - RL_test no_comet_logger: True seed: 42 -batch_size: 512 +batch_size: 240 # -------------------------------------------------------------------------------------- # RL @@ -32,16 +32,18 @@ batch_size: 512 asset_names: - BTC - ETH + - XRP + - LTC trading_cost: 0.002 reward_type: return -max_episode_length: 500 -steps_per_epoch: 10000 -n_optim_iters: 8 -gamma: 0.95 -lam: 0.9 -lr_actor: 0.001 -lr_critic: 0.001 -clip_ratio: 0.2 +max_episode_length: 240 +steps_per_epoch: 24000 +n_optim_iters: 2 +gamma: 0.0 +lam: 0.90 +lr_actor: 0.0003 +lr_critic: 0.0003 +clip_ratio: 0.25 target_kl: 0.01 @@ -56,13 +58,15 @@ no_weight_norm: True # MODEL # -------------------------------------------------------------------------------------- -actor_num_channels: [100, 100, 100, 100, 100] +actor_num_channels: [50, 50, 50, 50, 50] actor_kernel_size: 3 -actor_dropout: 0.25 -critic_num_channels: [100, 100, 100, 100, 100] +actor_dropout: 0.2 +# sample size - exp abs diff to mean | 20 - 5% | 50 - 3% | 100 - 2% | 500 - 1% +actor_dirichlet_sample_size: 20 +critic_num_channels: [50, 50, 50, 50, 50] critic_kernel_size: 3 -critic_dropout: 0.25 -use_last_timepoint: True +critic_dropout: 0.2 +use_last_timepoint: False # -------------------------------------------------------------------------------------- # DATA @@ -75,8 +79,14 @@ lookback: auto mini_series_length: auto df_train: - anchor: std_bar_BTCUSDT_tick_1.feather - df2: std_bar_ETHUSDT_tick_1.feather + anchor: std_bar_BTCUSDT_volume_100.feather + df2: std_bar_ETHUSDT_volume_500.feather + df3: std_bar_XRPUSDT_volume_125000.feather + df4: std_bar_LTCUSDT_volume_1000.feather + # anchor: std_bar_BTCUSDT_tick_1.feather + # df2: std_bar_ETHUSDT_tick_1.feather + # df3: std_bar_XRPUSDT_tick_1.feather + # df4: std_bar_LTCUSDT_tick_1.feather df_val: df_test: cols_to_model: @@ -86,14 +96,14 @@ cols_to_model: - high - low - close - - open_fd_0.0 - - high_fd_0.0 - - low_fd_0.0 - - close_fd_0.0 - - open_fd_tuned - - high_fd_tuned - - low_fd_tuned - - close_fd_tuned + # - open_fd_0.0 + # - high_fd_0.0 + # - low_fd_0.0 + # - close_fd_0.0 + # - open_fd_tuned + # - high_fd_tuned + # - low_fd_tuned + # - close_fd_tuned - cum_ticks - cum_dollar - volume @@ -106,24 +116,26 @@ cols_to_model: - cos_date - sin_time - cos_time - - boll - - boll_lb - - boll_ub - - macd - - macds - - macdh - - wr_60 - - rsi_60 - - rsv_60 - - atr_60 - - cci_60 - - kdjk_60 - - kdjd_60 - - kdjj_60 - - pdi_60 - - mdi_60 - - vr_60 + # - boll + # - boll_lb + # - boll_ub + # - macd + # - macds + # - macdh + # - wr_60 + # - rsi_60 + # - rsv_60 + # - atr_60 + # - cci_60 + # - kdjk_60 + # - kdjd_60 + # - kdjj_60 + # - pdi_60 + # - mdi_60 + # - vr_60 df2: + df3: + df4: # the cols of the secondary DFs will automatically be set to anchor's if not defined time_feat_n: 1 @@ -138,9 +150,9 @@ augment_dfs_mix: 0 # PREPROCESSING # -------------------------------------------------------------------------------------- -train_start_date: "2018-06-01" -train_days: 730 -val_days: 60 +train_start_date: "2019-01-01" +train_days: 500 +val_days: 30 val_train_offset_days: 1 val_puffer_days: 1 test_days: 30 diff --git a/src/dagobert/modelling/dl/data.py b/src/dagobert/modelling/dl/data.py index 969a2a56..dc9c77df 100644 --- a/src/dagobert/modelling/dl/data.py +++ b/src/dagobert/modelling/dl/data.py @@ -6,7 +6,6 @@ from pathlib import Path from argparse import Namespace from typing import List, Tuple, Union, Iterable, Callable - import torch import numpy as np import pandas as pd @@ -571,15 +570,20 @@ class PortfolioCryptoDataset(CryptoDataset): is achieved by adding the rl_return target column to the cols_to_model at init, and then fishing it out for each sample before returning it. - This convulated way was used so we can repurpose and keep as much of the original + This convoluted way was used so we can repurposed and keep as much of the original CryptoDataset as possible, without extensive refactoring. """ - def __init__(self, *args, **kw): + def __init__(self, *args, **kwargs): # for each instrument, we add the rl_return target col to their cols_to_model - for df_name, _ in kw[npa.cols_to_model].items(): - kw[npa.cols_to_model][df_name].append(NRL.rl_return) - super().__init__(*args, **kw) + for df_name, _ in kwargs[npa.cols_to_model].items(): + kwargs[npa.cols_to_model][df_name].append(NRL.rl_return) + # lazy way to check if we have datediff as first feature, if so, cumsum it + if kwargs[npa.cols_to_model][df_name][0] == "date_diff": + self.sum_date_diffs = True + else: + self.sum_date_diffs = False + super().__init__(*args, **kwargs) def __getitem__(self, idx): """ @@ -596,6 +600,10 @@ def __getitem__(self, idx): for i, X in enumerate(Xs): ys[i] = X[-1, -1] Xs[i] = X[:-1, :] + if self.sum_date_diffs: + # make the cumulative flow from right (present) to left (past) + cs = Xs[i][0][::-1].cumsum()[::-1] + Xs[i][0] = MinMaxScaler().fit_transform(cs.reshape([-1, 1])).ravel() return Xs, ys diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index 11efe152..760d9639 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -137,6 +137,9 @@ def step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: # (eq16) cost to change portfolio: # excluding change in cash to avoid double counting for transaction cost + # TODO: isn't this unrealistic? this would only work if there's an efficient + # market between all pairs within the portfilio, i.e XRP -> LTC, althought + # this is what pgportfolio uses too mu = self.trading_cost * (np.abs(dw1[1:] - w1[1:])).sum() # (eq11) final portfolio value: I thought this should be w1 (at the end), but @@ -167,7 +170,6 @@ def step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: "portfolio_value": p1, "market_return": y1.mean(), "rate_of_return": rho1, - "weights_mean": w1.mean(), "weights_std": w1.std(), "rebalancing_cost": mu, } @@ -245,13 +247,8 @@ def step(self, action: np.array): action: Portfolio weights for the N assets and the cash (first item). They should all be between 0 and 1 (no shorting) and sum to 1. """ - # cut and normalise action (just in case) - action = np.clip(action, 0, 1) - weights = action - weights /= weights.sum() + eps - next_state, y1 = self.data.step() - reward, info, done = self.portfolio.step(weights, y1) + reward, info, done = self.portfolio.step(action, y1) self.infos.append(info) return next_state, reward, done, info diff --git a/src/dagobert/modelling/rl/networks.py b/src/dagobert/modelling/rl/networks.py index 40b81a9d..2a0365b5 100644 --- a/src/dagobert/modelling/rl/networks.py +++ b/src/dagobert/modelling/rl/networks.py @@ -2,6 +2,7 @@ from argparse import Namespace from typing import Union, Tuple +import numpy as np import torch from torch import nn from torch.distributions import Dirichlet @@ -36,6 +37,7 @@ def __init__( super().__init__() self.hparams = hparams self.n_actions = n_actions + self.actor = actor num_inputs = [len(cols) for dataset, cols in hparams.cols_to_model.items()] num_channels = ( hparams.actor_num_channels if actor else hparams.critic_num_channels @@ -62,17 +64,17 @@ def forward(self, state, past_pw): s2 = s1[:, :, -1] else: s2 = torch.tanh(self.linear1(s1).squeeze(-1)) - # bring together the state and past_pw representations make residual connection + # bring together the state and past_pw representations return self.linear2(torch.cat([s2, a1], dim=1)) -class ActorContinous(nn.Module): +class ActorContinuous(nn.Module): """ - Policy network, for continous action spaces, which returns a distribution + Policy network, for continuous action spaces, which returns a distribution and an action given an observation """ - def __init__(self, actor_net): + def __init__(self, actor_net: TemporalConvNet, sample_size: int = 1): """ The original PPO can be used for discrete action spaces with a Categorical distribution or for a continuous actions space with a multivariate Gaussian, @@ -91,23 +93,41 @@ def __init__(self, actor_net): and sampling our actions from that. This by design returns a probability summing to one and there's no need to learn a separate std param. + We can make this more deterministic with the sample_size param, see docs. + NOTE! I'm not sure how well this works or how legit it is, as I haven't found any papers or implementations actually doing this. Args: actor_net: Initialized actor net. + sample_size: Determines how deterministic our Dirichlet based sampling is. + At default (1), we return a single sample from the dist. With higher + sample sizes the returned weights are closer and closer to the actual + mean of the distribution. If it's set to zero, we return the mean and + basically the model becomes deterministic. """ super().__init__() self.actor_net = actor_net self.inv_lin = InverseLinear() + self.sample_size = sample_size def forward(self, states, past_pw): # get params for Dirichlet, and drop batch dim if batch_size=1 logits = self.actor_net(states, past_pw) concentrations = self.inv_lin(logits).squeeze(0) pi = Dirichlet(concentrations) - actions = pi.sample() + + # take 20 samples - corresponds to +/- 5% compared to returning the mean + if self.sample_size > 0: + actions = pi.sample((self.sample_size,)).mean(dim=0) + else: + actions = pi.sample.mean + + # very rarely we get actions that don't sum to 1 or are negative, fix it here + if actions.sum() != past_pw.shape[0]: + actions = torch.clip(actions, 0.01, 0.99) + actions = (actions.T / actions.sum(dim=1)).T return pi, actions, logits def get_log_prob(self, pi: Dirichlet, actions: torch.Tensor): @@ -119,7 +139,7 @@ def get_log_prob(self, pi: Dirichlet, actions: torch.Tensor): pi: torch distribution actions: actions taken by distribution Returns: - log probability of the acition under pi + log probability of the action under pi """ return pi.log_prob(actions) @@ -170,7 +190,7 @@ def get_log_prob(self, pi: Dirichlet, actions: torch.Tensor) -> torch.Tensor: pi: torch distribution actions: actions taken by distribution Returns: - log probability of the acition under pi + log probability of the action under pi """ return self.actor_net.get_log_prob(pi, actions) diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index 411eb99c..ff488143 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -14,6 +14,7 @@ import gym import torch import numpy as np +import pandas as pd import torch.optim as optim from torch.utils.data import DataLoader from pytorch_lightning import LightningModule @@ -26,7 +27,7 @@ from dagobert.modelling.rl import ( RLEnv, ActorCriticTCN, - ActorContinous, + ActorContinuous, ActorCriticAgent, ) from dagobert.modelling.dl import ( @@ -65,7 +66,7 @@ def run_rl(args): # setup callbacks checkpoint_callback = ModelCheckpoint( - monitor="avg_reward", + monitor="avg_total_reward", filename="_{epoch:02d}_{avg_reward:.10f}", dirpath=f"{args.log_dir}/models/{args.exp_name}_{tb_logger.version}", save_top_k=3, @@ -118,14 +119,13 @@ def __init__(self, hparams: Namespace): self.critic = ActorCriticTCN( self.hparams, n_actions=n_actions, output_size=1, actor=False ) - self.actor = ActorContinous( - ActorCriticTCN(self.hparams, n_actions=n_actions, output_size=n_actions) + self.actor = ActorContinuous( + ActorCriticTCN(self.hparams, n_actions=n_actions, output_size=n_actions), + self.hparams.actor_dirichlet_sample_size, ) self.agent = ActorCriticAgent(self.actor, self.critic) self.buffer = ExperienceBuffer() - self.avg_ep_reward = 0 - self.avg_ep_len = 0 - self.avg_reward = 0 + self.to_log = {} # ---------------------------------------------------------------------------------- # EXPERIENCE COLLECTION FOR TRAIN DATALOADER @@ -173,16 +173,13 @@ def generate_experience_buffer( ) parallel_experiences.create_worker(*args) - # collect experiences in parallel, then merge them + # collect experiences in parallel, then merge them, calculate metrics self.buffer.merge_buffers(parallel_experiences.collect_experiences()) - # update metrics we log about the current performance of the agent - self.avg_ep_reward = self.buffer.epoch_rewards / self.buffer.done_episodes + eps - self.avg_reward = self.buffer.epoch_rewards / self.hparams.steps_per_epoch - self.avg_ep_len = self.hparams.steps_per_epoch / self.buffer.done_episodes + eps + self.update_metrics_to_log() - # yield a dataset for dataloader for updating actor/critic + # yield a dataset for dataloader for updating actor/critic and clear buffer self.setup_model_for_training() - for state, past_pw, action, logp_old, qval, adv in zip( + for state, past_pw, action, old_logp, qval, adv in zip( self.buffer.states, self.buffer.past_pws, self.buffer.actions, @@ -190,7 +187,7 @@ def generate_experience_buffer( self.buffer.qvals, self.buffer.advs, ): - yield state, past_pw, action, logp_old, qval, adv + yield state, past_pw, action, old_logp, qval, adv self.buffer.clear_buffer() def setup_model_for_experience_gathering(self): @@ -217,8 +214,33 @@ def setup_model_for_training(self): self.agent.critic_net.train() self.agent.actor_net.train() + def update_metrics_to_log(self): + """Helper function recalculating metrics we track at end of each epoch""" + done_eps = self.buffer.done_episodes + eps + ep_rewards = self.buffer.epoch_rewards + e = "episode" + p = "portfolio" + + # pytorch lightning model checkpoint needs metric name without / + self.to_log["avg_total_reward"] = ep_rewards / done_eps + self.to_log[f"{e}/avg_total_reward"] = ep_rewards / done_eps + self.to_log[f"{e}/avg_step_reward"] = ep_rewards / self.hparams.steps_per_epoch + self.to_log[f"{e}/avg_len"] = self.hparams.steps_per_epoch / done_eps + self.to_log[f"{p}/avg_value_epend"] = ( + sum(list(self.buffer.p_ep_end_value)) / done_eps + ) + + # need this otherwise the generator won't work multiple times + infos = pd.DataFrame(list(self.buffer.infos)).mean() + self.to_log[f"{p}/avg_value"] = infos["portfolio_value"] + self.to_log[f"{p}/avg_weight_std"] = infos["weights_std"] + self.to_log[f"{p}/avg_rebalancing_cost"] = infos["rebalancing_cost"] + self.to_log[f"{p}/avg_market_return"] = infos["market_return"] + for w in infos.index[infos.index.str.contains("weight_")]: + self.to_log[f"weights/{w}"] = infos[w] + @staticmethod - def discount_rewards(rewards: List[float], discount: float) -> List[float]: + def discount_rewards(rewards: List[float], gamma: float = 0.99) -> List[float]: """ Calculate the discounted rewards of all rewards in list. This is used as Q-values for training the critic network so it becomes better approximating @@ -226,6 +248,7 @@ def discount_rewards(rewards: List[float], discount: float) -> List[float]: Args: rewards: list of rewards/advantages + gamma: Gamma for discounting the long-term rewards. Returns: list of discounted rewards/advantages @@ -234,7 +257,7 @@ def discount_rewards(rewards: List[float], discount: float) -> List[float]: cumul_reward = [] sum_r = 0.0 for r in reversed(rewards): - sum_r = (sum_r * discount) + r + sum_r = (sum_r * gamma) + r cumul_reward.append(sum_r) return list(reversed(cumul_reward)) @@ -292,10 +315,10 @@ def _update_past_pw(p1: float, action: torch.Tensor, device) -> torch.Tensor: # LOSSES AND OPTIMIZERS # ---------------------------------------------------------------------------------- - def actor_loss(self, state, past_pw, action, logp_old, adv) -> torch.Tensor: + def actor_loss(self, state, past_pw, action, old_logp, adv) -> torch.Tensor: pi, _, _ = self.actor(state, past_pw) logp = self.actor.get_log_prob(pi, action) - old_new_diff = logp - logp_old + old_new_diff = logp - old_logp ratio = torch.exp(old_new_diff) # idea taken from spinningup PPO implemenetation to prevent exploding loss approx_kl = old_new_diff.mean().item() @@ -344,20 +367,25 @@ def training_step( # normalize advantages within batch adv = (adv - adv.mean()) / adv.std() - self.log("avg_ep_len", self.avg_ep_len, on_step=False, on_epoch=True) - self.log("avg_ep_reward", self.avg_ep_reward, on_step=False, on_epoch=True) - self.log("avg_reward", self.avg_reward, on_step=False, on_epoch=True) + # log all metrics (other than loss) + for k, v in self.to_log.items(): + self.log(k, v, on_step=False, on_epoch=True) if optimizer_idx == 0: + loss_actor, approx_kl = self.actor_loss( state, past_pw, action, old_logp, adv ) - self.log("loss_actor", loss_actor, on_epoch=True, on_step=False) - self.log("approx_kl", approx_kl, on_epoch=True, on_step=False) + if torch.isnan(loss_actor): + from IPython import embed + + embed() + self.log("loss/actor", loss_actor, on_epoch=True, on_step=False) + self.log("loss/approx_kl", approx_kl, on_epoch=True, on_step=False) return loss_actor elif optimizer_idx == 1: loss_critic = self.critic_loss(state, past_pw, qval) - self.log("loss_critic", loss_critic, on_epoch=True, on_step=False) + self.log("loss/critic", loss_critic, on_epoch=True, on_step=False) return loss_critic @staticmethod @@ -430,6 +458,24 @@ def append( self.ep_rewards.append(reward) self.ep_values.append(value.item()) + def shift_rewards(self): + """ + The reward at time t was is realised as a consequence of action t-1. This is + special to our environment (see last paragraph of page 9 in the article: + https://arxiv.org/pdf/1706.10059.pdf). This means, at the end of each episode + we need to drop the very last element of state/action/logp/value/info and + shift the rewards by one to the right, i.e. making r0 align with a1, r1 with a2, + ... rn-1 with an. + """ + self.ep_rewards = self.ep_rewards[1:] + self.states.pop(-1) + self.past_pws.pop(-1) + self.actions.pop(-1) + self.logps.pop(-1) + self.infos.pop(-1) + self.ep_rewards.pop(-1) + self.ep_values.pop(-1) + def merge_buffers(self, buffers): """ Merges the passed in ExperienceBuffers and overwrites the current state with it. @@ -444,6 +490,8 @@ def merge_buffers(self, buffers): self.logps = chain(*[buffer.logps for buffer in buffers]) self.qvals = chain(*[buffer.qvals for buffer in buffers]) self.advs = chain(*[buffer.advs for buffer in buffers]) + self.infos = chain(*[buffer.infos for buffer in buffers]) + self.p_ep_end_value = chain(*[buffer.p_ep_end_value for buffer in buffers]) self.done_episodes = sum([buffer.done_episodes for buffer in buffers]) self.epoch_rewards = sum([buffer.epoch_rewards for buffer in buffers]) @@ -457,6 +505,7 @@ def clear_buffer(self): self.qvals = [] self.logps = [] self.infos = [] + self.p_ep_end_value = [] # episode / epoch vars self.ep_rewards = [] @@ -530,7 +579,7 @@ def gather_experience( lam: See docs of :func:`PPO.calc_advantage` Returns: - Adds the results to `exp_queue` so it can be processed in the main process. + Experience collected in this parallel worker. """ buffer = ExperienceBuffer() state = env.reset() @@ -547,17 +596,19 @@ def gather_experience( terminal = len(buffer.ep_rewards) == max_episode_length if done or terminal: - buffer.qvals += PPO.discount_rewards(buffer.ep_rewards, gamma)[:-1] + # this is specific to our special environment setup + # buffer.shift_rewards() + buffer.qvals += PPO.discount_rewards(buffer.ep_rewards, gamma) buffer.advs += PPO.calc_advantage( buffer.ep_rewards, buffer.ep_values, gamma, lam ) buffer.done_episodes += 1 buffer.epoch_rewards += np.sum(buffer.ep_rewards) + buffer.p_ep_end_value.append(info["portfolio_value"]) - # episode over, reset the env and the buffer + # episode over, reset the env and the episode buffer buffer.ep_rewards = [] buffer.ep_values = [] state = env.reset() past_pw = PPO._init_past_pw(asset_num, device) - return buffer diff --git a/src/dagobert/modelling/rl/rl_args.py b/src/dagobert/modelling/rl/rl_args.py index 32853ad6..4f2db348 100644 --- a/src/dagobert/modelling/rl/rl_args.py +++ b/src/dagobert/modelling/rl/rl_args.py @@ -110,6 +110,18 @@ def add_rl_specific_args(parent_parser): default=0.2, help="Clipping parameter for the PPO's policy upgrade cost function.", ) + parser.add_argument( + "--actor_dirichlet_sample_size", + type=float, + default=1, + help=( + "Determines how deterministic our Dirichlet based sampling is. At default " + "(1), we return a single sample from the dist. With higher sample sizes " + "the returned weights are closer and closer to the actual mean of the " + "distribution. If it's set to zero, we return the mean and basically " + "the model becomes deterministic." + ), + ) return parser From 24b28376f834481657667b55a20a3b3fb1eea2dd Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Thu, 21 Jan 2021 18:52:03 +0000 Subject: [PATCH 36/62] data, runner, cleanup --- config/tcn_config_data.yaml | 3 +- config/timegan_config.yaml | 35 +- notebooks/modelling/test_cryptodataset.ipynb | 341 +++++++++++++++++- .../modelling/augmentation/timegan.py | 175 +++++++-- src/dagobert/modelling/dl/__init__.py | 8 +- src/dagobert/modelling/dl/data.py | 39 +- 6 files changed, 550 insertions(+), 51 deletions(-) diff --git a/config/tcn_config_data.yaml b/config/tcn_config_data.yaml index da8eca4b..ea9a5926 100644 --- a/config/tcn_config_data.yaml +++ b/config/tcn_config_data.yaml @@ -59,7 +59,8 @@ no_sample_weights: False # DATA # -------------------------------------------------------------------------------------- -data_dir: "/home/ubuntu/dagobert/data/modelling" +#data_dir: "/home/ubuntu/dagobert/data/modelling" +data_dir: "C:/Users/u164428/Desktop/Dagobert/data/modelling" lookback: auto mini_series_length: auto diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index cd65fefd..9c664c4d 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -12,7 +12,7 @@ gpus: 0 log_dir: logs num_workers: 4 -exp_name: Time-GAN +exp_name: TGAN-test tags: - time_gan_test no_comet_logger: True @@ -29,6 +29,12 @@ rnn: lstm # embedding weight in cost of generator loss emb_weight: 1 +# don't change these, or preprocessing won't work +target_col: +to_label: False +no_sample_weights: True +binariser_method: + # -------------------------------------------------------------------------------------- # MODEL # -------------------------------------------------------------------------------------- @@ -37,7 +43,7 @@ dropout: 0.2 num_layers: 2 hidden_size: 50 z_dim: 50 -mini_series_length: 240 +mini_series_length: 50 # -------------------------------------------------------------------------------------- # DATA @@ -47,6 +53,31 @@ mini_series_length: 240 #data_dir: "/home/daniel/dagobert_data/modelling" data_dir: "C:/Users/u164428/Desktop/Dagobert/data/modelling" + +df_train: + anchor: std_bar_ETHUSDT_tick_1.feather + +cols_to_model: + anchor: + - date_diff + - open + - high + - low + - close +# - cum_ticks +# - cum_dollar +# - volume +# - cum_volume_buy +# - cum_volume_sell +# - cum_volume_quote +# - cum_volume_quote_buy +# - cum_volume_quote_sell +# - sin_date +# - cos_date +# - sin_time +# - cos_time + + # -------------------------------------------------------------------------------------- # PREPROCESSING # -------------------------------------------------------------------------------------- diff --git a/notebooks/modelling/test_cryptodataset.ipynb b/notebooks/modelling/test_cryptodataset.ipynb index c6d23254..3cf26b90 100644 --- a/notebooks/modelling/test_cryptodataset.ipynb +++ b/notebooks/modelling/test_cryptodataset.ipynb @@ -2,9 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", @@ -12,8 +21,9 @@ "import pandas as pd\n", "import numpy as np\n", "import torch\n", + "from pathlib import Path\n", "\n", - "from dagobert.modelling.dl import CryptoDataset\n", + "from dagobert.modelling.dl import CryptoDataset, GeneratorCryptoDataset\n", "from dagobert.preprocessing.utils import set_dt_index" ] }, @@ -27,7 +37,8 @@ "output_type": "stream", "text": [ " dev\n", - "* test/cryptodata\n" + " feat/orderbook_data\n", + "* feat/tgan\n" ] } ], @@ -39,11 +50,229 @@ "cell_type": "code", "execution_count": 3, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# conda environments:\n", + "#\n", + "base C:\\Users\\u164428\\AppData\\Local\\Continuum\\anaconda3\n", + "dagobert * C:\\Users\\u164428\\AppData\\Local\\Continuum\\anaconda3\\envs\\dagobert\n", + "tensorenviron C:\\Users\\u164428\\AppData\\Local\\Continuum\\anaconda3\\envs\\tensorenviron\n", + "\n" + ] + } + ], + "source": [ + "! conda env list" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, "outputs": [], "source": [ "df = pd.read_feather(\"C:/Users/u164428/Desktop/Dagobert/data/modelling/std_bar_XRPUSDT_volume_125000.feather\")" ] }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
date_diffsin_timecos_timesin_datecos_datedate_timeopenclosehighlowcum_tickscum_dollarvolumecum_volume_buycum_volume_sellcum_volume_quotecum_volume_quote_buycum_volume_quote_sell
00.0-0.9469300.3214390.587785-0.8090172018-05-26 19:15:000.620490.620730.622300.61990118.082411.086333132750.4565058.6067691.8582434.10089940405.12935942028.971540
1840.0-0.9255410.3786490.587785-0.8090172018-05-26 19:29:000.620740.621240.621810.62000119.081044.155573130494.8188327.5442167.2781042.37997554868.96517926173.414796
2420.0-0.9135450.4067370.587785-0.8090172018-05-26 19:36:000.620690.621510.621570.6200171.079926.853613128650.06126545.462104.6079913.08364278606.0554881307.028154
3420.0-0.9006980.4344450.587785-0.8090172018-05-26 19:43:000.621510.622250.622250.6201097.084404.854894135782.73129644.416138.3284370.10654080558.6915123811.415028
4840.0-0.8724960.4886210.587785-0.8090172018-05-26 19:57:000.621080.620560.622240.61925118.091415.801332147278.7893035.0254243.7691370.89939557733.40753533637.491861
\n", + "
" + ], + "text/plain": [ + " date_diff sin_time cos_time sin_date cos_date date_time \\\n", + "0 0.0 -0.946930 0.321439 0.587785 -0.809017 2018-05-26 19:15:00 \n", + "1 840.0 -0.925541 0.378649 0.587785 -0.809017 2018-05-26 19:29:00 \n", + "2 420.0 -0.913545 0.406737 0.587785 -0.809017 2018-05-26 19:36:00 \n", + "3 420.0 -0.900698 0.434445 0.587785 -0.809017 2018-05-26 19:43:00 \n", + "4 840.0 -0.872496 0.488621 0.587785 -0.809017 2018-05-26 19:57:00 \n", + "\n", + " open close high low cum_ticks cum_dollar volume \\\n", + "0 0.62049 0.62073 0.62230 0.61990 118.0 82411.086333 132750.45 \n", + "1 0.62074 0.62124 0.62181 0.62000 119.0 81044.155573 130494.81 \n", + "2 0.62069 0.62151 0.62157 0.62001 71.0 79926.853613 128650.06 \n", + "3 0.62151 0.62225 0.62225 0.62010 97.0 84404.854894 135782.73 \n", + "4 0.62108 0.62056 0.62224 0.61925 118.0 91415.801332 147278.78 \n", + "\n", + " cum_volume_buy cum_volume_sell cum_volume_quote cum_volume_quote_buy \\\n", + "0 65058.60 67691.85 82434.100899 40405.129359 \n", + "1 88327.54 42167.27 81042.379975 54868.965179 \n", + "2 126545.46 2104.60 79913.083642 78606.055488 \n", + "3 129644.41 6138.32 84370.106540 80558.691512 \n", + "4 93035.02 54243.76 91370.899395 57733.407535 \n", + "\n", + " cum_volume_quote_sell \n", + "0 42028.971540 \n", + "1 26173.414796 \n", + "2 1307.028154 \n", + "3 3811.415028 \n", + "4 33637.491861 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.iloc[:, :18]\n", + "df.head()" + ] + }, { "cell_type": "code", "execution_count": 73, @@ -1389,6 +1618,110 @@ "source": [ "x.searchsorted(5, side=\"left\"), x.searchsorted(5, side=\"right\")" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TimeGAN" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "BATCH_SIZE = 4" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "std_bar_XRPUSDT_tick_1.feather doesn't have enough bars to use with the anchor after restricting it to the max date of the anchor's index:std_bar_XRPUSDT_tick_1.feather: 1279233 bars / anchor: 1624303 bars.\n", + "The samples from this augment_dfs will be less unique as we approachthe end date of the anchor 2020-10-17T00:06:00.000000000.\n", + "std_bar_XRPUSDT_tick_1.feather doesn't have adequate time-coverage for anchor DF. This could lead to non-unique samples from this augment_dfs.\n", + "\n", + "Anchor min/max dates: 2017-08-19T00:39:00.000000000/2020-10-17T00:06:00.000000000. \n", + "std_bar_XRPUSDT_tick_1.feather min/max dates: 2018-05-06 00:09:00/2020-10-17 00:06:00.\n" + ] + } + ], + "source": [ + "dataset = GeneratorCryptoDataset(\n", + " df_to_load = {\"anchor\": \"std_bar_ETHUSDT_tick_1.feather\",\n", + " \"df2\": \"std_bar_XRPUSDT_tick_1.feather\"},\n", + " cols_to_model = {\"anchor\": [\"date_diff\", \"open\", \"high\", \"low\", \"close\"],\n", + " \"df2\": [\"date_diff\", \"open\"]},\n", + " target_col = None,\n", + " mini_series_length = 3,\n", + " last_y = True,\n", + " date_col = \"date_time\",\n", + " data_dir = Path(\"C:/Users/u164428/Desktop/Dagobert/data/modelling\"),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "data_loaded = list(torch.utils.data.DataLoader(dataset, batch_size = BATCH_SIZE))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "541435" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(data_loaded)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([3, 3, 7])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_loaded[0].shape " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 44268be1..cf621d5c 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -7,6 +7,7 @@ from typing import List, Optional from argparse import Namespace import logging +from copy import deepcopy from pathlib import Path import numpy as np @@ -23,11 +24,67 @@ from torch.utils.data import Dataset, WeightedRandomSampler, RandomSampler, DataLoader from pytorch_lightning import LightningModule - -from dagobert.modelling.dl import AdaBelief +from pytorch_lightning.trainer import seed_everything +from pytorch_lightning.callbacks import ModelCheckpoint +from pytorch_lightning import Trainer, Callback, loggers + +from dagobert.naming import NStudy, NPreprocessingArgs as npa +from dagobert.modelling.dl import ( + GeneratorCryptoDataset, + TemporalConvNet, + Preprocessing, + AdaBelief, + LogCoshLoss, + FocalLoss, +) from dagobert.modelling.augmentation.utils import get_noise +logger = logging.getLogger(__name__) + + +def run_tgan(args): + # setup loggers + seed_everything(args.seed) + tb_logger_name = None + comet_name = args.exp_name + gan_loggers = [] + tb_logger = loggers.TensorBoardLogger( + save_dir=Path(args.log_dir), name=args.exp_name, version=tb_logger_name + ) + gan_loggers.append(tb_logger) + if not args.no_comet_logger: + gan_loggers.append( + loggers.CometLogger( + api_key=NStudy.comet_api_key, + workspace=NStudy.comet_workspace, + save_dir=args.log_dir, + project_name=NStudy.comet_project_name, + experiment_name=f"{comet_name}_{tb_logger.version}", + ) + ) + + # setup callbacks + checkpoint_callback = ModelCheckpoint( + monitor="loss_gen", + filename="_{epoch:02d}_{avg_reward:.10f}", + dirpath=f"{args.log_dir}/models/{args.exp_name}_{tb_logger.version}", + save_top_k=3, + mode="max", + ) + + # define trainer and and lightning module + args.multiprocessing = True if args.gpus != 1 else False + trainer = Trainer.from_argparse_args( + args, + logger=gan_loggers, + checkpoint_callback=checkpoint_callback, + ) + model = TimeGANLightning(args) + trainer.fit(model) + # trainer.test() + + class RnnBlock(nn.Module): """ Class for creating 5 components of TimeGAN. @@ -105,6 +162,7 @@ def __init__(self, hparams: Namespace): # define main vars (other than model) super().__init__() # TODO: pre sanity check, define hparams + hparams = TimeGANLightning._pre_sanity_check(hparams) # lightning sets this to cuda too late for some of our setup to work self.tgan_device = "cuda" if hparams.gpus > 0 else "cpu" # TODO: check if real data is the right one, get data in @@ -190,8 +248,7 @@ def training_step(self, batch, batch_idx, optimizer_idx): Returns: Loss """ - # TODO: is there any label to give back? - x, label = batch + x = batch batch_len = len(x) h = self.embedder(x) @@ -256,35 +313,35 @@ def training_step(self, batch, batch_idx, optimizer_idx): ) # update generator if optimizer_idx == 2: - - e_hat = self.generator(z) - h_hat = self.supervisor(e_hat) - h_hat_supervise = self.supervisor(h) - - # synthetic data - x_hat = self.recovery(h_hat) - # no_grad to leave discriminator unchanged - with torch.no_grad(): - y_fake = self.discriminator(h_hat) - y_fake_e = self.discriminator(e_hat) - loss_gen = TimeGANLightning.generator_loss( - y_fake, - y_fake_e, - h, - h_hat_supervise, - x, - x_hat, - self.hparams.emb_weight, - ) - self.log( - "loss_gen", - loss_gen, - on_step=False, - on_epoch=True, - prog_bar=True, - logger=True, - ) - return loss_gen + for i in range(2): + e_hat = self.generator(z) + h_hat = self.supervisor(e_hat) + h_hat_supervise = self.supervisor(h) + + # synthetic data + x_hat = self.recovery(h_hat) + # no_grad to leave discriminator unchanged + with torch.no_grad(): + y_fake = self.discriminator(h_hat) + y_fake_e = self.discriminator(e_hat) + loss_gen = TimeGANLightning.generator_loss( + y_fake, + y_fake_e, + h, + h_hat_supervise, + x, + x_hat, + self.hparams.emb_weight, + ) + self.log( + "loss_gen", + loss_gen, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) + return loss_gen # update discriminator elif optimizer_idx == 4: @@ -330,18 +387,43 @@ def configure_optimizers(self) -> List[optim.Optimizer]: optimizers.append(optimizer) return optimizers + def train_dataloader(self): + return self._get_dataloader(self.hparams.df_train, "train") + + def val_dataloader(self): + return self._get_dataloader(self.hparams.df_val, "val") + # ---------------------------------------------------------------------------------- # SETUP FUNCTIONS # ---------------------------------------------------------------------------------- - def train_dataloader( - self, - ) -> DataLoader: + def _get_dataloader(self, dfs_to_load: dict, prefix: str) -> DataLoader: """ - + Returns a dataloader for train and validation sets. + Args: + dfs_to_load: Either train, validation or test DFs to load. + prefix: Name of phase, either train or val. Returns: - + Instantiated DataLoader. """ - return Dataloader(dataset=dataset, batch_size=self.hparams.batch_size) + # define dataset and plot it + if prefix == "train": + shuffle = True + else: + shuffle = False + dataset = GeneratorCryptoDataset( + df_to_load=dfs_to_load, + cols_to_model=self.hparams.cols_to_model, + target_col=self.hparams.target_col, + mini_series_length=self.hparams.mini_series_length, + last_y=self.hparams.last_y, + data_dir=self.hparams.data_dir, + ) + return DataLoader( + dataset, + batch_size=self.hparams.batch_size, + shuffle=shuffle, + num_workers=self.hparams.num_workers, + ) # ---------------------------------------------------------------------------------- # CALCULATION @@ -456,3 +538,18 @@ def discriminator_loss(y_fake, y_fake_e, y_real, emb_weight): d_loss_real = criterion(y_real, torch.ones_like(y_real)) # TODO: any use of dividing loss by (2 + emb_weight)? return emb_weight * d_loss_fake_e + d_loss_fake + d_loss_real + + @staticmethod + def _pre_sanity_check(hparams: Namespace): + # ensure we have the rl specific target column in the config + if hparams.target_col: + raise ValueError("target_col has to be None for GAn development.") + + # fill in the same cols for any df that doesn't have the cols_to_model defined + if len(hparams.cols_to_model) > 1: + for df_name, cols in hparams.cols_to_model.items(): + if df_name != npa.anchor and (cols is None or len(cols) == 0): + hparams.cols_to_model[df_name] = deepcopy( + hparams.cols_to_model[npa.anchor] + ) + return hparams diff --git a/src/dagobert/modelling/dl/__init__.py b/src/dagobert/modelling/dl/__init__.py index 4dd9f930..a2849ec8 100644 --- a/src/dagobert/modelling/dl/__init__.py +++ b/src/dagobert/modelling/dl/__init__.py @@ -1,4 +1,10 @@ -from .data import CryptoDataset, PortfolioCryptoDataset, ExperienceSourceDataset +from .data import ( + CryptoDataset, + PortfolioCryptoDataset, + ExperienceSourceDataset, + GeneratorCryptoDataset, +) + from .tcn_net import TemporalConvNet from .utils import LogCoshLoss, FocalLoss, MixedNormalPDFLoss from .adabelief import AdaBelief diff --git a/src/dagobert/modelling/dl/data.py b/src/dagobert/modelling/dl/data.py index 91e96391..b670c3bd 100644 --- a/src/dagobert/modelling/dl/data.py +++ b/src/dagobert/modelling/dl/data.py @@ -146,6 +146,7 @@ def __init__( simple_lookahead_reg: If True, instead of returning a 0/1 label, we return the log return between the end of the mini-series and the lookahead time as a label for a regression task. + """ self.df_to_load = df_to_load self.cols_to_model = cols_to_model @@ -200,7 +201,7 @@ def __getitem__(self, idx): def _load_df_anchor( self, - ) -> pd.DatetimeIndex: + ) -> pd.DataFrame: """ Loads the anchor DF, and returns it. We use the anchor df for plotting and to extract the master index which we measure everything else against in batching. @@ -266,7 +267,7 @@ def _load_dfs_indices_targets(self) -> Tuple[dict, dict, list]: if df_name == npa.anchor: targets.append(self._get_target(df)) - # load augmnet DFs - dict of list of paths + # load augment DFs - dict of list of paths if self.augment_dfs: for df_name, df_paths in self.augment_dfs.items(): if isinstance(df_paths, str): @@ -334,6 +335,9 @@ def _get_target(self, df: pd.DataFrame) -> np.array: .bfill() .ffill() ).values + # dummy solution for GAN - benefit is that rest of code is unchanged + elif not self.target_col: + return np.zeros(len(df)) else: return df[self.target_col].values @@ -387,7 +391,7 @@ def _get_from_upto_idxs( Returns the from and upto idx for a given sample in the batch given the idx. Since we are indexing with numerical idxes and not dates, if we have multiple dfs in df_train (e.g. anchor and df2), we need to ensure that df2's from and - upto idx-es are at not leaking info from the future and are from roughly the + upto idx-es are not leaking info from the future and are from roughly the same date time period. The same holds for the situation when we replaced anchor df with one of its augment_dfs. Therefore we always return lists of from_idxs and upto_idxs for each df in `batch_dfs`. @@ -571,7 +575,7 @@ class PortfolioCryptoDataset(CryptoDataset): is achieved by adding the rl_return target column to the cols_to_model at init, and then fishing it out for each sample before returning it. - This convulated way was used so we can repurpose and keep as much of the original + This convuluted way was used so we can repurpose and keep as much of the original CryptoDataset as possible, without extensive refactoring. """ @@ -616,3 +620,30 @@ def __init__(self, generate_batch: Callable): def __iter__(self) -> Iterable: iterator = self.generate_batch() return iterator + + +class GeneratorCryptoDataset(CryptoDataset): + """ + This extends :class:`dagobert.modelling.dl.data.CryptoDataset` to make it + suitable for synthetic data generation through generative adversarial learning. + + Instead of returning an array of Xs and single y, this returns for only X. + This convuluted way was used so we can repurpose and keep as much of the original + CryptoDataset as possible, without extensive refactoring. + """ + + def __init__(self, *args, **kw): + super().__init__(*args, **kw) + + def __getitem__(self, idx): + """ + We don't need to calculate or fetch y, as we only need X to be modelled. + """ + idx = idx.tolist() if torch.is_tensor(idx) else idx + batch_dfs, batch_indices, _ = self._get_batch_dfs_indices_target() + from_idx, upto_idx = self._get_from_upto_idxs(idx, batch_indices) + Xs = self._get_Xs(batch_dfs, from_idx, upto_idx) + # from IPython import embed + # embed() + X = np.concatenate(Xs).T + return X From 232bdd31c7e2902f30a708956ff1faa7faf39373 Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Fri, 22 Jan 2021 17:40:23 +0000 Subject: [PATCH 37/62] preproc, volume scaling --- config/timegan_config.yaml | 5 +++ .../modelling/augmentation/timegan.py | 5 +++ src/dagobert/modelling/dl/preprocessing.py | 32 +++++++++++++------ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index 9c664c4d..cf1b838c 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -56,7 +56,10 @@ data_dir: "C:/Users/u164428/Desktop/Dagobert/data/modelling" df_train: anchor: std_bar_ETHUSDT_tick_1.feather +df_val: +df_test: +# the cols of the secondary DFs will automatically be set to anchor's if not defined cols_to_model: anchor: - date_diff @@ -81,3 +84,5 @@ cols_to_model: # -------------------------------------------------------------------------------------- # PREPROCESSING # -------------------------------------------------------------------------------------- + +scaling_method: minmax \ No newline at end of file diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index cf621d5c..6dadf8df 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -121,6 +121,7 @@ def __init__( dropout=dropout, batch_first=batch_first, ) + self.linear_input_size = linear_input_size self.tanh = nn.Tanh() self.linear = nn.Linear(linear_input_size, linear_output_size) self.sigmoid = nn.Sigmoid() @@ -129,6 +130,8 @@ def __init__( def forward(self, x): rnn_out, _hidden = self.rnn(x) rnn_out = self.tanh(rnn_out) + # todo: is there reshaping needed? + # rnn_out = rnn_out.reshape(-1, self.linear_input_size) output = self.linear(rnn_out) if self.linear_activation: output = self.sigmoid(output) @@ -165,6 +168,8 @@ def __init__(self, hparams: Namespace): hparams = TimeGANLightning._pre_sanity_check(hparams) # lightning sets this to cuda too late for some of our setup to work self.tgan_device = "cuda" if hparams.gpus > 0 else "cpu" + # prepare datafiles if necessary + hparams = Preprocessing().preprocess_train_dfs(hparams) # TODO: check if real data is the right one, get data in # TODO: any sanity checks on data, hypermparams self.real_logging = None diff --git a/src/dagobert/modelling/dl/preprocessing.py b/src/dagobert/modelling/dl/preprocessing.py index ffa6916a..5654064b 100644 --- a/src/dagobert/modelling/dl/preprocessing.py +++ b/src/dagobert/modelling/dl/preprocessing.py @@ -187,7 +187,7 @@ def preprocess_train_dfs(hparams: Namespace) -> Namespace: supplied parameters. If the `df_train` is defined, but the `df_val` and `df_test` are not, then we split the train data into 3, scale them, add sample weights to the train portion and if required binarise the label too. If a - particular combination of preprocessing parameters where already used and + particular combination of preprocessing parameters were already used and therefore we have an existing file already on the machine, we skip the input DF. Args: @@ -466,12 +466,13 @@ def _get_scalers_for_train_dfs(hparams) -> dict: def _get_scalers_from_cols(cols: list, scaling_method: str) -> list: """ For a given dataset's columns, this works out which columns to scale together - (OHLC). It will also creates a scaler for the non-OHLC columns. Furthermore - it will create groups of the supplied OHLC columns depending on the fractional - differencing suffix _fd_x at the end of theses columns. Then it will add the - first element of each group with a scaler then the rest of the group - with the same scaler instance. This is to ensure that OHLC columns are scaled - together and not independently. + (OHLC & volume). It will also creates a scaler for the non-OHLC/volume columns. + Furthermore, it will create groups of the supplied OHLC columns depending on the + fractional differencing suffix _fd_x at the end of these columns. It will also + create groups of the supplied volume cols based on base or quote quantity. + Then it will add the first element of each group with a scaler then the rest of + the group with the same scaler instance. This is to ensure that OHLC & volume + columns are scaled together and not independently. Args: cols: Columns to work on and check for OHLC columns. @@ -489,10 +490,14 @@ def _instantiate_scaler(scaling_method): return MinMaxScaler() scalers = [] - # extrack OHLC cols + # extract OHLC and volume cols s_cols = pd.Series(list(cols)) ohlc_cols = s_cols[s_cols.str.contains("open|high|low|close")] - non_ohlc_cols = list(s_cols[~s_cols.str.contains("open|high|low|close")]) + vol_cols = list( + s_cols[(s_cols.str.contains("volume")) & (~s_cols.str.contains("quote"))] + ) + vol_quote_cols = list(s_cols[s_cols.str.contains("volume_quote")]) + non_ohlc_cols = list(s_cols[~s_cols.str.contains("open|high|low|close|volume")]) # find groups of OHLC cols for multiple fd values (easiest to with a df) dh_ohlc_cols_data = [x.split("_fd_") for x in ohlc_cols] + [["na", "na"]] @@ -508,7 +513,7 @@ def _instantiate_scaler(scaling_method): boolean_group_mask = df_ohlc_cols.suffix == unique_group_suffix ohlc_cols_groups.append(list(ohlc_cols.values[boolean_group_mask])) - # add scaler to non OHLC cols + # add scaler to non OHLC/volume cols scalers.append((_instantiate_scaler(scaling_method), non_ohlc_cols)) # add scalers to OHLC col groups: one for the 1st col; then same for rest @@ -516,6 +521,13 @@ def _instantiate_scaler(scaling_method): ohlc_scaler = _instantiate_scaler(scaling_method) scalers.append((ohlc_scaler, ohlc_cols_group.pop(0))) scalers.append((ohlc_scaler, ohlc_cols_group)) + + # add scaler to volume cols, like OHLC + for vol_group in [vol_cols, vol_quote_cols]: + vol_scaler = _instantiate_scaler(scaling_method) + scalers.append((vol_scaler, vol_group.pop(0))) + scalers.append((vol_scaler, vol_group)) + return scalers @staticmethod From 57b4f564086afec81dd0e5656a099d7d71eb652d Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Sat, 23 Jan 2021 18:39:20 +0000 Subject: [PATCH 38/62] adding immediate reward like in the pgportfolio paper@ --- config/rl_config.yaml | 36 +++++++------- notebooks/modelling/rl_env.ipynb | 60 ++++++++++++++++-------- src/dagobert/modelling/rl/__init__.py | 2 +- src/dagobert/modelling/rl/environment.py | 4 +- src/dagobert/modelling/rl/networks.py | 8 ++-- src/dagobert/modelling/rl/ppo.py | 45 ++++++++++++------ 6 files changed, 94 insertions(+), 61 deletions(-) diff --git a/config/rl_config.yaml b/config/rl_config.yaml index 7576287c..b5f29836 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -23,7 +23,7 @@ tags: - RL_test no_comet_logger: True seed: 42 -batch_size: 240 +batch_size: 256 # -------------------------------------------------------------------------------------- # RL @@ -36,13 +36,13 @@ asset_names: - LTC trading_cost: 0.002 reward_type: return -max_episode_length: 240 -steps_per_epoch: 24000 -n_optim_iters: 2 -gamma: 0.0 -lam: 0.90 -lr_actor: 0.0003 -lr_critic: 0.0003 +max_episode_length: 500 +steps_per_epoch: 10000 +n_optim_iters: 4 +gamma: 0 +lam: 0 +lr_actor: 0.0001 +lr_critic: 0.0002 clip_ratio: 0.25 target_kl: 0.01 @@ -59,12 +59,12 @@ no_weight_norm: True # -------------------------------------------------------------------------------------- actor_num_channels: [50, 50, 50, 50, 50] -actor_kernel_size: 3 +actor_kernel_size: 5 actor_dropout: 0.2 # sample size - exp abs diff to mean | 20 - 5% | 50 - 3% | 100 - 2% | 500 - 1% actor_dirichlet_sample_size: 20 critic_num_channels: [50, 50, 50, 50, 50] -critic_kernel_size: 3 +critic_kernel_size: 5 critic_dropout: 0.2 use_last_timepoint: False @@ -79,14 +79,14 @@ lookback: auto mini_series_length: auto df_train: - anchor: std_bar_BTCUSDT_volume_100.feather - df2: std_bar_ETHUSDT_volume_500.feather - df3: std_bar_XRPUSDT_volume_125000.feather - df4: std_bar_LTCUSDT_volume_1000.feather - # anchor: std_bar_BTCUSDT_tick_1.feather - # df2: std_bar_ETHUSDT_tick_1.feather - # df3: std_bar_XRPUSDT_tick_1.feather - # df4: std_bar_LTCUSDT_tick_1.feather + # anchor: std_bar_BTCUSDT_volume_100.feather + # df2: std_bar_ETHUSDT_volume_500.feather + # df3: std_bar_XRPUSDT_volume_125000.feather + # df4: std_bar_LTCUSDT_volume_1000.feather + anchor: std_bar_BTCUSDT_tick_1.feather + df2: std_bar_ETHUSDT_tick_1.feather + df3: std_bar_XRPUSDT_tick_1.feather + df4: std_bar_LTCUSDT_tick_1.feather df_val: df_test: cols_to_model: diff --git a/notebooks/modelling/rl_env.ipynb b/notebooks/modelling/rl_env.ipynb index e4e1b0f2..e0172cb0 100644 --- a/notebooks/modelling/rl_env.ipynb +++ b/notebooks/modelling/rl_env.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -37,7 +37,7 @@ "array([0.48192771, 0.26506024, 0.25301205])" ] }, - "execution_count": 9, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -70,7 +70,7 @@ "0.00020481927710843396" ] }, - "execution_count": 7, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -87,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -96,7 +96,7 @@ "0.9997951807228915" ] }, - "execution_count": 53, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -107,7 +107,27 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p0" + ] + }, + { + "cell_type": "code", + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -116,7 +136,7 @@ "1.0372875000000001" ] }, - "execution_count": 45, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -129,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -138,7 +158,7 @@ "0.03728750000000014" ] }, - "execution_count": 46, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -160,7 +180,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -169,11 +189,11 @@ "text": [ "p1 1.0366750000000002\n", "rho 0.036675000000000235\n", - "p1 1.0674461056875002\n", - "rho 0.029682500000000056\n", - "p1 1.0911367376956023\n", - "rho 0.022193750000000012\n", - "p1 1.0911367376956023\n", + "p1 1.0519309684687503\n", + "rho 0.014716249999999986\n", + "p1 1.05953774428449\n", + "rho 0.007231250000000022\n", + "p1 1.05953774428449\n", "rho 0.0\n" ] } @@ -196,16 +216,16 @@ "w1 = np.array([.8, .1, .1])\n", "p1 = step(y1, w1, w0, p0)\n", "\n", - "y2 = np.array([1, 1.2, 1.1])\n", + "y2 = np.array([1, 1.1, 1.05])\n", "w2 = np.array([.9, .05, .05])\n", "p2 = step(y2, w2, w1, p1)\n", "\n", "\n", - "y3 = np.array([1, 1.3, 1.15])\n", + "y3 = np.array([1, 1.1, 1.05])\n", "w3 = np.array([1, 0, 0])\n", "p3 = step(y3, w3, w2, p2)\n", "\n", - "y4 = np.array([1, 1.5, 1.5])\n", + "y4 = np.array([1, 1.1, 1.05])\n", "w4 = np.array([1, 0, 0])\n", "p4 = step(y4, w4, w3, p3)\n" ] diff --git a/src/dagobert/modelling/rl/__init__.py b/src/dagobert/modelling/rl/__init__.py index d4900664..fd00b0bd 100644 --- a/src/dagobert/modelling/rl/__init__.py +++ b/src/dagobert/modelling/rl/__init__.py @@ -1,3 +1,3 @@ from .environment import RLData, RLPortfolio, RLEnv -from .networks import ActorCriticTCN, ActorCriticAgent, ActorContinous +from .networks import ActorCriticTCN, ActorCriticAgent, ActorContinuous from .ppo import PPO diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index 760d9639..f31b1cb7 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -48,6 +48,7 @@ def __init__( train_val_test: Whether we are training, validating or testing, it must be either train, val or test. """ + # TODO: make multi head environment self.hparams = hparams if train_val_test == "train": @@ -137,9 +138,6 @@ def step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: # (eq16) cost to change portfolio: # excluding change in cash to avoid double counting for transaction cost - # TODO: isn't this unrealistic? this would only work if there's an efficient - # market between all pairs within the portfilio, i.e XRP -> LTC, althought - # this is what pgportfolio uses too mu = self.trading_cost * (np.abs(dw1[1:] - w1[1:])).sum() # (eq11) final portfolio value: I thought this should be w1 (at the end), but diff --git a/src/dagobert/modelling/rl/networks.py b/src/dagobert/modelling/rl/networks.py index 2a0365b5..69c11abc 100644 --- a/src/dagobert/modelling/rl/networks.py +++ b/src/dagobert/modelling/rl/networks.py @@ -122,12 +122,12 @@ def forward(self, states, past_pw): if self.sample_size > 0: actions = pi.sample((self.sample_size,)).mean(dim=0) else: - actions = pi.sample.mean + actions = pi.mean # very rarely we get actions that don't sum to 1 or are negative, fix it here - if actions.sum() != past_pw.shape[0]: - actions = torch.clip(actions, 0.01, 0.99) - actions = (actions.T / actions.sum(dim=1)).T + if actions.sum() != past_pw.shape[0] or torch.any(actions < 0): + actions = torch.clamp(actions, 0.01, 0.99) + actions = (actions.T / actions.sum(dim=-1)).T return pi, actions, logits def get_log_prob(self, pi: Dirichlet, actions: torch.Tensor): diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index ff488143..e93e3000 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -157,6 +157,7 @@ def generate_experience_buffer( Tuple of Lists containing tensors for states, actions, log probs, qvals and advantage. """ + # TODO: make this optional and multi head env the default and set no_weight_norm max_worker_steps = int(self.hparams.steps_per_epoch / self.hparams.num_workers) parallel_experiences = ParallelExperiences() device = self.setup_model_for_experience_gathering() @@ -254,12 +255,12 @@ def discount_rewards(rewards: List[float], gamma: float = 0.99) -> List[float]: list of discounted rewards/advantages """ assert isinstance(rewards[0], float) - cumul_reward = [] + cum_r = [] sum_r = 0.0 for r in reversed(rewards): sum_r = (sum_r * gamma) + r - cumul_reward.append(sum_r) - return list(reversed(cumul_reward)) + cum_r.append(sum_r) + return list(reversed(cum_r)) @staticmethod def calc_advantage( @@ -269,9 +270,9 @@ def calc_advantage( lam: float = 0.95, ) -> List[float]: """ - Calculate the advantage given rewards, state values, and last value of episode. + Calculate the advantage given rewards and state values for an episode. The advantage compares how much better the actor did compared to what the - critic thought the given state is worth in reward. + critic thought the given state is worth in reward (value). Args: rewards: list of episode rewards @@ -342,6 +343,7 @@ def configure_optimizers(self) -> List[optim.Optimizer]: return optimizer_actor, optimizer_critic def optimizer_step(self, *args, **kwargs): + # TODO: figure out a way to do kl divergence clipping for _ in range(self.hparams.n_optim_iters): super().optimizer_step(*args, **kwargs) @@ -363,10 +365,11 @@ def training_step( Returns: loss """ + # TODO: investigate rewards, advantage and why we get negative actor loss + state, past_pw, action, old_logp, qval, adv = batch # normalize advantages within batch adv = (adv - adv.mean()) / adv.std() - # log all metrics (other than loss) for k, v in self.to_log.items(): self.log(k, v, on_step=False, on_epoch=True) @@ -405,7 +408,7 @@ def _pre_sanity_check(hparams: Namespace): # -------------------------------------------------------------------------------------- -# HELPER CLASSES FOR PARALLEL EXPERIENCE COLLECTION +# PARALLEL EXPERIENCE COLLECTION # # Moving this to another module would result in circular dependencies. Been there, # done that, it was painful, so let's just leave these here. @@ -581,6 +584,8 @@ def gather_experience( Returns: Experience collected in this parallel worker. """ + from datetime import datetime + buffer = ExperienceBuffer() state = env.reset() past_pw = PPO._init_past_pw(asset_num, device) @@ -595,14 +600,24 @@ def gather_experience( past_pw = PPO._update_past_pw(info["portfolio_value"], actor_logits, device) terminal = len(buffer.ep_rewards) == max_episode_length - if done or terminal: - # this is specific to our special environment setup - # buffer.shift_rewards() - buffer.qvals += PPO.discount_rewards(buffer.ep_rewards, gamma) - buffer.advs += PPO.calc_advantage( - buffer.ep_rewards, buffer.ep_values, gamma, lam - ) - buffer.done_episodes += 1 + if done or terminal or step == max_steps - 1: + # if rewards are immediate, we need this is due to our special environment + # where the immediate reward of a_0 can only calculate at t_1. + # if gamma == 0: + # buffer.shift_rewards() + # buffer.qvals += PPO.discount_rewards(buffer.ep_rewards, gamma) + # buffer.advs += PPO.calc_advantage( + # buffer.ep_rewards, buffer.ep_values, gamma, lam + # ) + + # according to the PGPortfolio paper, reward should be the sum of portfolio + # values, divided by length of episode - no discounting no BS, same for adv + epr = buffer.ep_rewards + epr = np.ones_like(epr) * sum(epr) / len(epr) + buffer.qvals += list(epr) + buffer.advs += list(epr - np.array(buffer.ep_values)) + if done or terminal: + buffer.done_episodes += 1 buffer.epoch_rewards += np.sum(buffer.ep_rewards) buffer.p_ep_end_value.append(info["portfolio_value"]) From e92e857826f1c9a288f961ce7e8b594e1197a3e0 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Tue, 26 Jan 2021 21:39:32 +0000 Subject: [PATCH 39/62] making ppo env and reward calculation as close to the pgportfolio paper as possible --- config/rl_config.yaml | 11 ++++---- src/dagobert/modelling/rl/environment.py | 1 - src/dagobert/modelling/rl/networks.py | 9 ++++++- src/dagobert/modelling/rl/ppo.py | 33 ++++++++++++------------ 4 files changed, 29 insertions(+), 25 deletions(-) diff --git a/config/rl_config.yaml b/config/rl_config.yaml index b5f29836..1d398528 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -36,13 +36,13 @@ asset_names: - LTC trading_cost: 0.002 reward_type: return -max_episode_length: 500 -steps_per_epoch: 10000 -n_optim_iters: 4 +max_episode_length: 2048 +steps_per_epoch: 24576 +n_optim_iters: 6 gamma: 0 lam: 0 lr_actor: 0.0001 -lr_critic: 0.0002 +lr_critic: 0.0003 clip_ratio: 0.25 target_kl: 0.01 @@ -62,7 +62,7 @@ actor_num_channels: [50, 50, 50, 50, 50] actor_kernel_size: 5 actor_dropout: 0.2 # sample size - exp abs diff to mean | 20 - 5% | 50 - 3% | 100 - 2% | 500 - 1% -actor_dirichlet_sample_size: 20 +actor_dirichlet_sample_size: 0 critic_num_channels: [50, 50, 50, 50, 50] critic_kernel_size: 5 critic_dropout: 0.2 @@ -73,7 +73,6 @@ use_last_timepoint: False # -------------------------------------------------------------------------------------- data_dir: "C:/Work/dagobert/data/modelling" -#data_dir: "/home/daniel/dagobert_data/modelling" lookback: auto mini_series_length: auto diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index f31b1cb7..5bde2888 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -175,7 +175,6 @@ def step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: for i, name in enumerate(["USD"] + self.asset_names): info["weight_" + name] = w1[i] info["price_" + name] = y1[i] - self.infos.append(info) return reward, info, done diff --git a/src/dagobert/modelling/rl/networks.py b/src/dagobert/modelling/rl/networks.py index 69c11abc..238a03db 100644 --- a/src/dagobert/modelling/rl/networks.py +++ b/src/dagobert/modelling/rl/networks.py @@ -56,6 +56,7 @@ def __init__( self.linear_a = nn.Linear(n_actions + 1, num_channels[-1]) self.linear1 = nn.Linear(hparams.mini_series_length, 1) self.linear2 = nn.Linear(num_channels[-1] * 2, output_size) + self.linear_m = nn.Linear(num_channels[-1] * 2, 1) def forward(self, state, past_pw): s1 = self.tcn(*state) @@ -65,7 +66,13 @@ def forward(self, state, past_pw): else: s2 = torch.tanh(self.linear1(s1).squeeze(-1)) # bring together the state and past_pw representations - return self.linear2(torch.cat([s2, a1], dim=1)) + if self.actor: + # m decides whether we update old weights or not by mixing past_pw and new + m = torch.sigmoid(self.linear_m(torch.cat([s2, a1], dim=1))) + past_w = past_pw[:, 1:] + return m * past_w + (1 - m) * self.linear2(torch.cat([s2, a1], dim=1)) + else: + return self.linear2(torch.cat([s2, a1], dim=1)) class ActorContinuous(nn.Module): diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index e93e3000..d27bc733 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -227,16 +227,17 @@ def update_metrics_to_log(self): self.to_log[f"{e}/avg_total_reward"] = ep_rewards / done_eps self.to_log[f"{e}/avg_step_reward"] = ep_rewards / self.hparams.steps_per_epoch self.to_log[f"{e}/avg_len"] = self.hparams.steps_per_epoch / done_eps - self.to_log[f"{p}/avg_value_epend"] = ( + self.to_log[f"{p}/avg_value_ep_end"] = ( sum(list(self.buffer.p_ep_end_value)) / done_eps ) - + self.to_log[f"{p}/avg_market_return_ep_end"] = ( + sum(list(self.buffer.p_ep_end_market_return)) / done_eps + ) # need this otherwise the generator won't work multiple times infos = pd.DataFrame(list(self.buffer.infos)).mean() self.to_log[f"{p}/avg_value"] = infos["portfolio_value"] self.to_log[f"{p}/avg_weight_std"] = infos["weights_std"] self.to_log[f"{p}/avg_rebalancing_cost"] = infos["rebalancing_cost"] - self.to_log[f"{p}/avg_market_return"] = infos["market_return"] for w in infos.index[infos.index.str.contains("weight_")]: self.to_log[f"weights/{w}"] = infos[w] @@ -326,7 +327,7 @@ def actor_loss(self, state, past_pw, action, old_logp, adv) -> torch.Tensor: clip_ratio = torch.clamp( ratio, 1 - self.hparams.clip_ratio, 1 + self.hparams.clip_ratio ) - loss_actor = -(torch.min(ratio * adv, clip_ratio * adv)).mean() + loss_actor = -torch.min(ratio * adv, clip_ratio * adv).mean() return loss_actor, approx_kl def critic_loss(self, state, past_pw, qval) -> torch.Tensor: @@ -365,23 +366,17 @@ def training_step( Returns: loss """ - # TODO: investigate rewards, advantage and why we get negative actor loss - state, past_pw, action, old_logp, qval, adv = batch # normalize advantages within batch - adv = (adv - adv.mean()) / adv.std() + # adv = (adv - adv.mean()) / adv.std() + # log all metrics (other than loss) for k, v in self.to_log.items(): self.log(k, v, on_step=False, on_epoch=True) if optimizer_idx == 0: - loss_actor, approx_kl = self.actor_loss( state, past_pw, action, old_logp, adv ) - if torch.isnan(loss_actor): - from IPython import embed - - embed() self.log("loss/actor", loss_actor, on_epoch=True, on_step=False) self.log("loss/approx_kl", approx_kl, on_epoch=True, on_step=False) return loss_actor @@ -476,7 +471,6 @@ def shift_rewards(self): self.actions.pop(-1) self.logps.pop(-1) self.infos.pop(-1) - self.ep_rewards.pop(-1) self.ep_values.pop(-1) def merge_buffers(self, buffers): @@ -495,6 +489,9 @@ def merge_buffers(self, buffers): self.advs = chain(*[buffer.advs for buffer in buffers]) self.infos = chain(*[buffer.infos for buffer in buffers]) self.p_ep_end_value = chain(*[buffer.p_ep_end_value for buffer in buffers]) + self.p_ep_end_market_return = chain( + *[buffer.p_ep_end_market_return for buffer in buffers] + ) self.done_episodes = sum([buffer.done_episodes for buffer in buffers]) self.epoch_rewards = sum([buffer.epoch_rewards for buffer in buffers]) @@ -509,6 +506,7 @@ def clear_buffer(self): self.logps = [] self.infos = [] self.p_ep_end_value = [] + self.p_ep_end_market_return = [] # episode / epoch vars self.ep_rewards = [] @@ -601,15 +599,15 @@ def gather_experience( terminal = len(buffer.ep_rewards) == max_episode_length if done or terminal or step == max_steps - 1: - # if rewards are immediate, we need this is due to our special environment - # where the immediate reward of a_0 can only calculate at t_1. - # if gamma == 0: - # buffer.shift_rewards() # buffer.qvals += PPO.discount_rewards(buffer.ep_rewards, gamma) # buffer.advs += PPO.calc_advantage( # buffer.ep_rewards, buffer.ep_values, gamma, lam # ) + # if rewards are immediate, we need this is due to our special environment + # where the immediate reward of a_0 can only calculate at t_1. + if gamma == 0: + buffer.shift_rewards() # according to the PGPortfolio paper, reward should be the sum of portfolio # values, divided by length of episode - no discounting no BS, same for adv epr = buffer.ep_rewards @@ -620,6 +618,7 @@ def gather_experience( buffer.done_episodes += 1 buffer.epoch_rewards += np.sum(buffer.ep_rewards) buffer.p_ep_end_value.append(info["portfolio_value"]) + buffer.p_ep_end_market_return.append(np.array(info["market_return"]).prod()) # episode over, reset the env and the episode buffer buffer.ep_rewards = [] From 9506e3ff06d69020540d25cdcc22a56e71f1f167 Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Tue, 26 Jan 2021 22:21:46 +0000 Subject: [PATCH 40/62] making ppo env and reward calculation as close to the pgportfolio paper as possible --- config/rl_config.yaml | 3 ++- src/dagobert/modelling/rl/environment.py | 19 ++++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/config/rl_config.yaml b/config/rl_config.yaml index 1d398528..55ed4acf 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -17,7 +17,7 @@ auto_scale_batch_size: # -------------------------------------------------------------------------------------- log_dir: logs -num_workers: 4 +num_workers: 1 exp_name: RL-PPO-TCN tags: - RL_test @@ -36,6 +36,7 @@ asset_names: - LTC trading_cost: 0.002 reward_type: return +num_env_heads: 4 max_episode_length: 2048 steps_per_epoch: 24576 n_optim_iters: 6 diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index 5bde2888..e6a57db3 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -45,10 +45,10 @@ def __init__( - augment_prob - augment_method - augment_dfs_mix + - num_env_heads train_val_test: Whether we are training, validating or testing, it must be either train, val or test. """ - # TODO: make multi head environment self.hparams = hparams if train_val_test == "train": @@ -69,21 +69,30 @@ def __init__( augment_dfs_mix=self.hparams.augment_dfs_mix, ) self.dataset_len = len(self.dataset) - self.idx = np.random.randint(self.dataset_len - self.hparams.max_episode_length) + self._reset_idxs() def step(self): - Xs, ys = self.dataset[self.idx] + from IPython import embed + + embed() + Xs, ys = self.dataset[self.idxs] # add cash price (always 1) to the new price vector y1 = np.concatenate([[1.0], ys]) # turn Xs into a batch of 1, ready to be fed into the actor/critic Xs = [torch.Tensor(x).unsqueeze(0) for x in Xs] - self.idx += 1 + self.idxs += 1 return Xs, y1 def reset(self): - self.idx = np.random.randint(self.dataset_len - self.hparams.max_episode_length) + self._reset_idxs() return self.step() + def _reset_idxs(self): + self.idxs = [ + np.random.randint(self.dataset_len - self.hparams.max_episode_length) + for _ in self.hparams.num_env_heads + ] + class RLPortfolio(object): """ From 6c31d1852d06a534fc236d2458e4209b936e1d5b Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Fri, 29 Jan 2021 17:38:04 +0000 Subject: [PATCH 41/62] sorting shit --- .../modelling/augmentation/tgan_args.py | 186 +++++++++++++++++ .../modelling/augmentation/tgan_runner.py | 38 ++++ .../modelling/augmentation/timegan.py | 195 +++++++++++++++++- src/dagobert/modelling/augmentation/utils.py | 47 +++++ 4 files changed, 458 insertions(+), 8 deletions(-) create mode 100644 src/dagobert/modelling/augmentation/tgan_args.py create mode 100644 src/dagobert/modelling/augmentation/tgan_runner.py diff --git a/src/dagobert/modelling/augmentation/tgan_args.py b/src/dagobert/modelling/augmentation/tgan_args.py new file mode 100644 index 00000000..32853ad6 --- /dev/null +++ b/src/dagobert/modelling/augmentation/tgan_args.py @@ -0,0 +1,186 @@ +""" +All custom arguments and hyper-parameters for the reinforcement learning module. +""" + +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter + +from pytorch_lightning import Trainer + +from dagobert.modelling.dl.tcn import TCNLightning +from dagobert.modelling.dl.tcn_args import ( + add_run_specific_args, + add_data_specific_args, + add_preprocessing_specific_args, +) +from dagobert.naming import ( + NInputDataCols, + NAugmentationMethods, + NBarriers, + NPreprocessingArgs, +) + + +def add_rl_specific_args(parent_parser): + parser = ArgumentParser( + parents=[parent_parser], + add_help=False, + formatter_class=ArgumentDefaultsHelpFormatter, + ) + + # this is just a place-holder so it's easier to read the million params in the cmd + parser.add_argument("--RL_PARAMS", help="====================================") + parser.add_argument( + "--asset_names", + type=str, + nargs="+", + default=["BTC", "ETH"], + help=( + "Names of instruments to include in the portfolio, corresponding to " + "anchor, df2, df3, etc." + ), + ) + parser.add_argument( + "--trading_cost", + type=float, + default=0.002, + help="Commission rate of making trades + an estimated cost of slippage.", + ) + parser.add_argument( + "--reward_type", + type=str, + default="return", + help=( + "Determines the overall reward to maximise by the agent. Either return or " + "sharpe. See RLPortfolio class for more details." + ), + ) + parser.add_argument( + "--max_episode_length", + type=int, + default=1000, + help=( + "Maximum number of interactions between the agent and the environment in " + "an episode." + ), + ) + parser.add_argument( + "--steps_per_epoch", + type=int, + default=10000, + help=( + "How many action-state pairs to rollout for trajectory collection per " + "epoch. I.e. if all episodes run to their max_episode_length, we'll have " + "steps_per_epoch/max_episode_length number of unique episodes/trajectories." + ), + ) + parser.add_argument( + "--n_optim_iters", + type=int, + default=4, + help=( + "How many steps of gradient descent to perform on each batch. This might " + "seem weird, but it helps sampling efficiency, done by the original PPO " + "implementation and the Google ablation study found it to be useful." + ), + ) + parser.add_argument( + "--gamma", type=float, default=0.99, help="Discounting of rewards." + ) + parser.add_argument( + "--lam", + type=float, + default=0.95, + help="Lambda parameter in the advantage discounting equation.", + ) + parser.add_argument( + "--lr_actor", + type=float, + default=0.0003, + help="Learning rate for the actor/policy network.", + ) + parser.add_argument( + "--lr_critic", + type=float, + default=0.001, + help="Learning rate for the critic/value network.", + ) + parser.add_argument( + "--clip_ratio", + type=float, + default=0.2, + help="Clipping parameter for the PPO's policy upgrade cost function.", + ) + + return parser + + +def add_model_specific_args(parent_parser): + parser = ArgumentParser( + parents=[parent_parser], + add_help=False, + formatter_class=ArgumentDefaultsHelpFormatter, + ) + + # this is just a place-holder so it's easier to read the million params in the cmd + parser.add_argument("--MODEL_PARAMS", help="====================================") + parser.add_argument( + "--actor_num_channels", + type=int, + nargs="+", + default=[50, 50, 50, 50, 50], + help=( + "Determines the number of layers (depth) of the actor / policy network and " + "the hidden unit count in each layer." + ), + ) + parser.add_argument( + "--critic_num_channels", + type=int, + nargs="+", + default=[50, 50, 50, 50, 50], + help=( + "Determines the number of layers (depth) of the critic / value network and " + "the hidden unit count in each layer." + ), + ) + parser.add_argument("--actor_kernel_size", type=int, default=5, help=" ") + parser.add_argument("--critic_kernel_size", type=int, default=5, help=" ") + parser.add_argument("--actor_dropout", type=float, default=0, help=" ") + parser.add_argument("--critic_dropout", type=float, default=0, help=" ") + parser.add_argument( + "--no_class_weights", + action="store_true", + help=( + "Set this to True so we can leverage the Preprocessing pipeline written " + "for the supervised DL module." + ), + ) + parser.add_argument( + "--use_last_timepoint", + action="store_true", + help=( + "If this flag is used the only the network's representation " + "corresponding at the latest time-point is used to predict the outcome." + "By default, we combine all representations across the sequence length" + "to make a prediction from, instead of just using the last one." + ), + ) + return parser + + +def get_all_args(): + parser = ArgumentParser( + description="Lightning RL module", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + + # add model params of lightning trainer (this HAS to be first) + parser = Trainer.add_argparse_args(parser) + + # add model and run specific params + parser = add_rl_specific_args(parser) + parser = add_model_specific_args(parser) + parser = add_run_specific_args(parser) + parser = add_data_specific_args(parser) + parser = add_preprocessing_specific_args(parser) + return parser.parse_args() diff --git a/src/dagobert/modelling/augmentation/tgan_runner.py b/src/dagobert/modelling/augmentation/tgan_runner.py new file mode 100644 index 00000000..3ae6c027 --- /dev/null +++ b/src/dagobert/modelling/augmentation/tgan_runner.py @@ -0,0 +1,38 @@ +""" +Dagobert's runner for TimeGAN. + +This module is driven by the `dagobert-tgan` command which can be parametrised by +command line arguments, but it's much more convenient to use YAML configs for this, +see the `tcn_args.py` and `tgan_args.py` for more detail. +""" +import logging +from pathlib import Path + +from dagobert.utils import setup_logging +from dagobert.runner_utils import load_config, update_args +from dagobert.modelling.augmentation.tgan_args import get_all_args +from dagobert.modelling.augmentation.timegan import run_tgan + + +logger = logging.getLogger(__name__) + + +def run(): + """ + Initialise a TimeGan network and train it. + """ + + # parse arguments and setup logging + args = get_all_args() + setup_logging(logger, "dagobert-tgan", logging.INFO, args.log_dir) + + # load config yaml if exist + if args.config_path != "": + config = load_config(Path(args.config_path)) + args = update_args(args, config) + + run_tgan(args) + + +if __name__ == "__main__": + run() diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 6dadf8df..c750aa49 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -10,6 +10,7 @@ from copy import deepcopy from pathlib import Path +from sklearn.manifold import TSNE import numpy as np import pandas as pd import matplotlib @@ -37,7 +38,7 @@ LogCoshLoss, FocalLoss, ) -from dagobert.modelling.augmentation.utils import get_noise +from dagobert.modelling.augmentation.utils import get_noise, pca_analysis logger = logging.getLogger(__name__) @@ -70,7 +71,7 @@ def run_tgan(args): filename="_{epoch:02d}_{avg_reward:.10f}", dirpath=f"{args.log_dir}/models/{args.exp_name}_{tb_logger.version}", save_top_k=3, - mode="max", + mode="min", ) # define trainer and and lightning module @@ -141,11 +142,13 @@ def forward(self, x): class TimeGANLightning(LightningModule): """ Lightning model made of 5 RNN nets working together: - - Embedding network between original feature space to latent space. + - Embedding network between original feature space to latent space, provides + lower-dimensional adversarial learning space. - Recovery network from latent space to original space. - Generator function: generate time-series data in latent space. - Discriminate the original and synthetic time-series data - - Supervisor generating next sequence using the previous sequence. + - Supervisor generating next sequence using the previous sequence to better + capture temporal dynamics """ # ---------------------------------------------------------------------------------- @@ -170,7 +173,7 @@ def __init__(self, hparams: Namespace): self.tgan_device = "cuda" if hparams.gpus > 0 else "cpu" # prepare datafiles if necessary hparams = Preprocessing().preprocess_train_dfs(hparams) - # TODO: check if real data is the right one, get data in + # TODO: any sanity checks on data, hypermparams self.real_logging = None self.comet_logging = not self.hparams.no_comet_logger @@ -361,14 +364,20 @@ def training_step(self, batch, batch_idx, optimizer_idx): y_fake, y_fake_e, y_real, self.hparams.emb_weight ) self.log( - "loss_disc", + "loss_disc/train", loss_disc, on_step=False, on_epoch=True, prog_bar=True, logger=True, ) - return loss_disc + # pytorch lightning needs to have "loss" in the return dict + return { + "loss_disc/train": loss_disc, + "y_fake/train": y_fake, + "y_fake_e/train": y_fake_e, + "y_real/train": y_real, + } def configure_optimizers(self) -> List[optim.Optimizer]: """ @@ -395,6 +404,57 @@ def configure_optimizers(self) -> List[optim.Optimizer]: def train_dataloader(self): return self._get_dataloader(self.hparams.df_train, "train") + def training_epoch_end(self, outputs): + return self._epoch_end(outputs, "train") + + def validation_step(self, batch, batch_idx): + x = batch + batch_len = len(x) + + # noise + z = get_noise( + batch_len, + self.hparams.mini_series_length, + self.hparams.z_dim, + device=self.tgan_device, + ) + z = z.to(self.generator.model[0].weight.dtype) + + # generate fake data and compare with validation set + h = self.embedder(x) + e_hat = self.generator(z) + h_hat = self.supervisor(e_hat) + x_hat = self.recovery(h_hat) + + y_fake = self.discriminator(h_hat.detach()) + y_fake_e = self.discriminator(e_hat.detach()) + y_real = self.discriminator(h.detach()) + + pca_x, pca_x_hat = pca_analysis(x, x_hat) + + loss_disc = TimeGANLightning.discriminator_loss( + y_fake, y_fake_e, y_real, self.hparams.emb_weight + ) + self.log( + "loss_disc/val", + loss_disc, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) + return { + "loss_disc/val": loss_disc, + "y_fake/val": y_fake, + "y_fake_e/val": y_fake_e, + "y_real/val": y_real, + "pca_x/val": pca_x, + "pca_x_hat/val": pca_x_hat, + } + + def validation_epoch_end(self, outputs): + return self._epoch_end(outputs, "val") + def val_dataloader(self): return self._get_dataloader(self.hparams.df_val, "val") @@ -431,7 +491,7 @@ def _get_dataloader(self, dfs_to_load: dict, prefix: str) -> DataLoader: ) # ---------------------------------------------------------------------------------- - # CALCULATION + # LOSS CALCULATION # ---------------------------------------------------------------------------------- @staticmethod def embed_loss0(x_tilde, x): @@ -544,6 +604,125 @@ def discriminator_loss(y_fake, y_fake_e, y_real, emb_weight): # TODO: any use of dividing loss by (2 + emb_weight)? return emb_weight * d_loss_fake_e + d_loss_fake + d_loss_real + # ---------------------------------------------------------------------------------- + # OTHER CALCULATION + # ---------------------------------------------------------------------------------- + def _epoch_end(self, outputs, prefix="val"): + """ + We average the loss across all batches, calculate metrics based on all batches + and log them. Finally, we make plots using all the y_true and y_preds. + Args: + outputs: + prefix: + + Returns: + + """ + avg_loss = [] + y_true = [] + y_fake = [] + y_fake_e = [] + for x in outputs: + avg_loss.append(x[f"loss_disc/{prefix}"]) + y_true.append(x[f"y_true/{prefix}"]) + y_fake.append(x[f"y_fake/{prefix}"]) + y_fake_e.append(x[f"y_fake_e/{prefix}"]) + # log sampled images + self._make_plots(y_true, y_fake, prefix) + + def _calculate_metrics(self, y_true, y_pred, prefix): + """ + Calculates and logs various metrics for regression and classification use cases. + """ + if self.hparams.output_size == 1: + y_pred = y_pred.squeeze(-1) + y_true = y_true.squeeze(-1) + + if self.hparams.regression or self.hparams.mix_density_net: + mae = plm.mean_absolute_error(y_pred, y_true) + self.log(f"mean_absolute_error/{prefix}", mae) + spearman = spearmanr(t2n(y_pred), t2n(y_true)).correlation + self.log(f"spearman_r/{prefix}", spearman) + else: + if self.hparams.output_size == 1: + self.log(f"au_roc/{prefix}", plm.auroc(y_pred, y_true)) + prec, rec, _ = plm.precision_recall_curve(y_pred, y_true) + self.log(f"au_pr/{prefix}", plm.auc(rec, prec)) + elif self.hparams.output_size == 3: + y_pred = torch.argmax(torch.softmax(y_pred, dim=1), dim=1) + cm = cm_from_tensor(y_true, y_pred) + self.log(f"triple_barrier_error/{prefix}", triple_barrier_error(cm)) + self.log(f"non_vertical_error/{prefix}", non_vertical_error(cm)) + + # ---------------------------------------------------------------------------------- + # PLOTTING AND LOGGING FUNCTIONS + # ---------------------------------------------------------------------------------- + def _make_plots(self, y_true, y_pred, prefix): + """ + Makes the following useful summary plots of true and predicted ys: + - scatter plot of y_true and y_pred + - histogram of y_true, y_pred + - AUPR, AUROC + - confusion matrices for classification + """ + # SCATTER + if self.hparams.regression or self.hparams.mix_density_net: + self._log_image( + f"true v pred scatter/{prefix}", + plot_from_tensor(y_true, y_pred), + self.current_epoch, + ) + else: + # HISTOGRAM + if self.hparams.output_size == 1: + y_pred_class = (torch.sigmoid(y_pred) > 0.5).int() + y_pred_for_hist = torch.sigmoid(y_pred) + else: + y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1) + y_pred_for_hist = y_pred_class + self._log_image( + f"true v pred hist/{prefix}", + hist_from_tensor(y_true, y_pred_for_hist), + self.current_epoch, + ) + + # PR, ROC + if self.hparams.output_size == 1: + y_pred = y_pred.squeeze(-1) + y_true = y_true.squeeze(-1) + fpr, tr, _ = plm.roc(y_pred, y_true) + self._log_image( + f"roc/{prefix}", + plot_from_tensor(fpr, tr, "line", "FPR", "TPR"), + self.current_epoch, + ) + prec, rec, thr = plm.precision_recall_curve(y_pred, y_true) + self._log_image( + f"pr/{prefix}", + plot_from_tensor(rec, prec, "line", "recall", "precision"), + self.current_epoch, + ) + + # CM - high confidence binary classification + proba_filter = self.hparams.confident_binary_proba_threshold + if y_pred.max() >= proba_filter or y_pred.min() <= (1.0 - proba_filter): + self._log_image( + f"high confidence true v pred cm/{prefix}", + plot_cm(cm_from_tensor(y_true, y_pred, proba_filter)), + self.current_epoch, + ) + + # CMs - all classification + self._log_image( + f"true v pred cm/{prefix}", + plot_cm(cm_from_tensor(y_true, y_pred_class)), + self.current_epoch, + ) + + # ---------------------------------------------------------------------------------- + # SANITY CHECK FUNCTIONS + # ---------------------------------------------------------------------------------- + @staticmethod def _pre_sanity_check(hparams: Namespace): # ensure we have the rl specific target column in the config diff --git a/src/dagobert/modelling/augmentation/utils.py b/src/dagobert/modelling/augmentation/utils.py index 72e162eb..9137d97d 100644 --- a/src/dagobert/modelling/augmentation/utils.py +++ b/src/dagobert/modelling/augmentation/utils.py @@ -1,6 +1,8 @@ """Util functions for TimeGAN and other augmentation related tasks""" import torch +import numpy as np +from sklearn.decomposition import PCA def get_noise(n_samples: int, mini_series_length: int, z_dim: int, device: str = "cpu"): @@ -19,3 +21,48 @@ def get_noise(n_samples: int, mini_series_length: int, z_dim: int, device: str = Tensor of filled with random numbers from uniform distribution. """ return torch.rand(n_samples, mini_series_length, z_dim, device=device) + + +def pca_analysis(x, x_hat, components: int = 2): + """ + PCA on 2 (real and synthetic) datasets + Args: + x: real data of shape (batch, time, feature) + x_hat: synthetic data of the same shape + components: number of pca components to keep + + Returns: + 2 arrays of PCA-reduced real and synthetic data + """ + x = np.asarray(x) + x_hat = np.asarray(x_hat) + + x = np.mean(x, 2) + x_hat = np.mean(x_hat, 2) + + # Parameters + No = x.shape[0] + colors = ["red" for i in range(No)] + ["blue" for i in range(No)] + + # PCA Analysis + pca = PCA(n_components=components) + pca.fit(x) + pca_results = pca.transform(x) + pca_hat_results = pca.transform(x_hat) + return pca_results, pca_hat_results + # Plotting + """ + f, ax = plt.subplots(1) + + plt.scatter(pca_results[:, 0], pca_results[:, 1], c=colors[:No], alpha=0.2, + label="Original") + plt.scatter(pca_hat_results[:, 0], pca_hat_results[:, 1], c=colors[No:], + alpha=0.2, label="Synthetic") + + ax.legend() + + plt.title('PCA plot') + plt.xlabel('x-pca') + plt.ylabel('y_pca') + plt.show() + """ From c504ca743428712f661d1da80c14757e1c51f6b9 Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Fri, 29 Jan 2021 19:21:33 +0000 Subject: [PATCH 42/62] plots --- .../modelling/augmentation/timegan.py | 154 ++++++++---------- src/dagobert/modelling/augmentation/utils.py | 24 --- src/dagobert/modelling/dl/data.py | 4 +- src/dagobert/modelling/utils.py | 30 ++++ 4 files changed, 103 insertions(+), 109 deletions(-) diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index c750aa49..f26acde9 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -28,6 +28,7 @@ from pytorch_lightning.trainer import seed_everything from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning import Trainer, Callback, loggers +from pytorch_lightning.metrics import functional as plm from dagobert.naming import NStudy, NPreprocessingArgs as npa from dagobert.modelling.dl import ( @@ -39,6 +40,20 @@ FocalLoss, ) from dagobert.modelling.augmentation.utils import get_noise, pca_analysis +from dagobert.modelling.utils import ( + triple_barrier_error, + non_vertical_error, + t2n, + cm_from_tensor, + hist_from_tensor, + plot_from_tensor, + plot_cm, + fig_to_tb, + fig_to_comet, + plot_pca, + update_lookback, + plot_anchor_sample, +) logger = logging.getLogger(__name__) @@ -430,7 +445,7 @@ def validation_step(self, batch, batch_idx): y_fake_e = self.discriminator(e_hat.detach()) y_real = self.discriminator(h.detach()) - pca_x, pca_x_hat = pca_analysis(x, x_hat) + pca_x, pca_x_hat = pca_analysis(t2n(x), t2n(x_hat)) loss_disc = TimeGANLightning.discriminator_loss( y_fake, y_fake_e, y_real, self.hparams.emb_weight @@ -483,6 +498,7 @@ def _get_dataloader(self, dfs_to_load: dict, prefix: str) -> DataLoader: last_y=self.hparams.last_y, data_dir=self.hparams.data_dir, ) + self._plot_dataset(*dataset.plot(), prefix) return DataLoader( dataset, batch_size=self.hparams.batch_size, @@ -619,105 +635,77 @@ def _epoch_end(self, outputs, prefix="val"): """ avg_loss = [] - y_true = [] + y_real = [] y_fake = [] y_fake_e = [] + pca_x = [] + pca_x_hat = [] for x in outputs: avg_loss.append(x[f"loss_disc/{prefix}"]) - y_true.append(x[f"y_true/{prefix}"]) + y_real.append(x[f"y_real/{prefix}"]) y_fake.append(x[f"y_fake/{prefix}"]) y_fake_e.append(x[f"y_fake_e/{prefix}"]) + pca_x.append(x[f"pca_x/{prefix}"]) + pca_x_hat.append(x[f"pca_x_hat/{prefix}"]) # log sampled images - self._make_plots(y_true, y_fake, prefix) - - def _calculate_metrics(self, y_true, y_pred, prefix): - """ - Calculates and logs various metrics for regression and classification use cases. - """ - if self.hparams.output_size == 1: - y_pred = y_pred.squeeze(-1) - y_true = y_true.squeeze(-1) - - if self.hparams.regression or self.hparams.mix_density_net: - mae = plm.mean_absolute_error(y_pred, y_true) - self.log(f"mean_absolute_error/{prefix}", mae) - spearman = spearmanr(t2n(y_pred), t2n(y_true)).correlation - self.log(f"spearman_r/{prefix}", spearman) - else: - if self.hparams.output_size == 1: - self.log(f"au_roc/{prefix}", plm.auroc(y_pred, y_true)) - prec, rec, _ = plm.precision_recall_curve(y_pred, y_true) - self.log(f"au_pr/{prefix}", plm.auc(rec, prec)) - elif self.hparams.output_size == 3: - y_pred = torch.argmax(torch.softmax(y_pred, dim=1), dim=1) - cm = cm_from_tensor(y_true, y_pred) - self.log(f"triple_barrier_error/{prefix}", triple_barrier_error(cm)) - self.log(f"non_vertical_error/{prefix}", non_vertical_error(cm)) + self._make_plots(y_real, y_fake, pca_x, pca_x_hat, prefix) # ---------------------------------------------------------------------------------- # PLOTTING AND LOGGING FUNCTIONS # ---------------------------------------------------------------------------------- - def _make_plots(self, y_true, y_pred, prefix): + def _plot_dataset( + self, fig_close: Figure, fig_data: Figure, fig_target: Figure, prefix: str + ): """ - Makes the following useful summary plots of true and predicted ys: - - scatter plot of y_true and y_pred - - histogram of y_true, y_pred - - AUPR, AUROC - - confusion matrices for classification + Plots the close price and the target column of the train/val/test datasets. + + Args: + fig_close: First element of the returned tuple of `CryptoDataset.plot()` + fig_data: Second element of the returned tuple of `CryptoDataset.plot()` + prefix: One of train, val, test. """ - # SCATTER - if self.hparams.regression or self.hparams.mix_density_net: - self._log_image( - f"true v pred scatter/{prefix}", - plot_from_tensor(y_true, y_pred), - self.current_epoch, - ) - else: - # HISTOGRAM - if self.hparams.output_size == 1: - y_pred_class = (torch.sigmoid(y_pred) > 0.5).int() - y_pred_for_hist = torch.sigmoid(y_pred) - else: - y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1) - y_pred_for_hist = y_pred_class - self._log_image( - f"true v pred hist/{prefix}", - hist_from_tensor(y_true, y_pred_for_hist), - self.current_epoch, - ) + self._log_image(f"anchor_close/{prefix}", fig_close, 0) - # PR, ROC - if self.hparams.output_size == 1: - y_pred = y_pred.squeeze(-1) - y_true = y_true.squeeze(-1) - fpr, tr, _ = plm.roc(y_pred, y_true) - self._log_image( - f"roc/{prefix}", - plot_from_tensor(fpr, tr, "line", "FPR", "TPR"), - self.current_epoch, - ) - prec, rec, thr = plm.precision_recall_curve(y_pred, y_true) - self._log_image( - f"pr/{prefix}", - plot_from_tensor(rec, prec, "line", "recall", "precision"), - self.current_epoch, + def _log_image(self, image_name, image_data, i): + """ + Logs any generated image to both tensorboard and comet. + """ + if self.real_logging: + self.logger.experiment[0].add_image(image_name, fig_to_tb(image_data), i) + if self.comet_logging: + self.logger.experiment[1].log_image( + fig_to_comet(image_data), name=image_name, step=i ) - # CM - high confidence binary classification - proba_filter = self.hparams.confident_binary_proba_threshold - if y_pred.max() >= proba_filter or y_pred.min() <= (1.0 - proba_filter): - self._log_image( - f"high confidence true v pred cm/{prefix}", - plot_cm(cm_from_tensor(y_true, y_pred, proba_filter)), - self.current_epoch, - ) + def _log_graph(self, datasets: GeneratorCryptoDataset): + """ + Logs the graph of the model to both tensorboard and comet. + """ + examples_dataloader = DataLoader(datasets, batch_size=32) + example_shapes = [xi.shape for xi in next(iter(examples_dataloader))[0]] + examples = [torch.rand(*s).float().to(self.tgan_device) for s in example_shapes] + if self.real_logging: + self.logger.experiment[0].add_graph(self, examples) - # CMs - all classification - self._log_image( - f"true v pred cm/{prefix}", - plot_cm(cm_from_tensor(y_true, y_pred_class)), - self.current_epoch, - ) + def _make_plots(self, y_real, y_fake, pca_x, pca_x_hat, prefix): + """ + Makes following useful summary plots: + - plotting 2-dim PCA for visualising diversity learned + - (discriminator's) histogram of y_true, y_fake, y_fake_e + """ + # PCA SCATTER + self._log_image( + f"real v fake PCA-scatter/{prefix}", + plot_pca(pca_x, pca_x_hat), + self.current_epoch, + ) + + # HISTOGRAM + self._log_image( + f"real v fake hist/{prefix}", + hist_from_tensor(y_real, y_fake), + self.current_epoch, + ) # ---------------------------------------------------------------------------------- # SANITY CHECK FUNCTIONS diff --git a/src/dagobert/modelling/augmentation/utils.py b/src/dagobert/modelling/augmentation/utils.py index 9137d97d..11351f66 100644 --- a/src/dagobert/modelling/augmentation/utils.py +++ b/src/dagobert/modelling/augmentation/utils.py @@ -34,35 +34,11 @@ def pca_analysis(x, x_hat, components: int = 2): Returns: 2 arrays of PCA-reduced real and synthetic data """ - x = np.asarray(x) - x_hat = np.asarray(x_hat) - x = np.mean(x, 2) x_hat = np.mean(x_hat, 2) - # Parameters - No = x.shape[0] - colors = ["red" for i in range(No)] + ["blue" for i in range(No)] - - # PCA Analysis pca = PCA(n_components=components) pca.fit(x) pca_results = pca.transform(x) pca_hat_results = pca.transform(x_hat) return pca_results, pca_hat_results - # Plotting - """ - f, ax = plt.subplots(1) - - plt.scatter(pca_results[:, 0], pca_results[:, 1], c=colors[:No], alpha=0.2, - label="Original") - plt.scatter(pca_hat_results[:, 0], pca_hat_results[:, 1], c=colors[No:], - alpha=0.2, label="Synthetic") - - ax.legend() - - plt.title('PCA plot') - plt.xlabel('x-pca') - plt.ylabel('y_pca') - plt.show() - """ diff --git a/src/dagobert/modelling/dl/data.py b/src/dagobert/modelling/dl/data.py index 779b44e6..5de98d3e 100644 --- a/src/dagobert/modelling/dl/data.py +++ b/src/dagobert/modelling/dl/data.py @@ -640,8 +640,8 @@ class GeneratorCryptoDataset(CryptoDataset): CryptoDataset as possible, without extensive refactoring. """ - def __init__(self, *args, **kw): - super().__init__(*args, **kw) + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) def __getitem__(self, idx): """ diff --git a/src/dagobert/modelling/utils.py b/src/dagobert/modelling/utils.py index e23befd4..90bf465c 100644 --- a/src/dagobert/modelling/utils.py +++ b/src/dagobert/modelling/utils.py @@ -421,3 +421,33 @@ def plot_anchor_sample(i, obj, x): cols = obj.hparams.cols_to_model["anchor"] df = pd.DataFrame(x[0][i].detach().cpu().numpy().T, columns=cols) df.plot(subplots=True, layout=(int(np.ceil((len(cols) / 4))), 4)) + + +def plot_pca(pca_x, pca_x_hat): + """ + Plot PCA-reduced x and x_hat to visualise similarity. Overlap suggests similarity. + Args: + pca_x: 2-component-PCA of x + pca_x_hat: 2-component-PCA of x_hat + + Returns: + Scatter plot showing 2-component-PCA of x & x_hat. + """ + + f, ax = plt.subplots(1) + length = pca_x.shape[0] + colors = ["red" for i in range(length)] + ["blue" for i in range(length)] + plt.scatter(pca_x[:, 0], pca_x[:, 1], c=colors[:length], alpha=0.2, label="Real") + plt.scatter( + pca_x_hat[:, 0], + pca_x_hat[:, 1], + c=colors[length:], + alpha=0.2, + label="Synthetic", + ) + ax.legend() + plt.title("PCA plot") + plt.xlabel("x-pca") + plt.ylabel("y_pca") + plt.close() + return f From d48c2fb18647e5e0dcdce32dfea5da3a988514cb Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Sat, 30 Jan 2021 17:49:45 +0000 Subject: [PATCH 43/62] adding multi head environment to speed up experience gathering without multiprocessing --- config/rl_config.yaml | 16 +- src/dagobert/modelling/dl/tcn_args.py | 13 -- src/dagobert/modelling/rl/environment.py | 106 +++++----- src/dagobert/modelling/rl/ppo.py | 242 ++++++++++++++--------- src/dagobert/modelling/rl/rl_args.py | 48 ++++- 5 files changed, 265 insertions(+), 160 deletions(-) diff --git a/config/rl_config.yaml b/config/rl_config.yaml index 55ed4acf..8f1bd392 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -23,7 +23,7 @@ tags: - RL_test no_comet_logger: True seed: 42 -batch_size: 256 +batch_size: 500 # -------------------------------------------------------------------------------------- # RL @@ -36,12 +36,15 @@ asset_names: - LTC trading_cost: 0.002 reward_type: return -num_env_heads: 4 -max_episode_length: 2048 -steps_per_epoch: 24576 +num_env_heads: 20 +num_env_workers: 1 +normalize_advantages: True +pgportfolio: True +max_episode_length: 1000 +steps_per_epoch: 100000 n_optim_iters: 6 -gamma: 0 -lam: 0 +gamma: 0.99 +lam: 0.95 lr_actor: 0.0001 lr_critic: 0.0003 clip_ratio: 0.25 @@ -53,7 +56,6 @@ target_col: rl_return to_label: False no_sample_weights: True binariser_method: -no_weight_norm: True # -------------------------------------------------------------------------------------- # MODEL diff --git a/src/dagobert/modelling/dl/tcn_args.py b/src/dagobert/modelling/dl/tcn_args.py index 21df5792..d08b0a0b 100644 --- a/src/dagobert/modelling/dl/tcn_args.py +++ b/src/dagobert/modelling/dl/tcn_args.py @@ -188,19 +188,6 @@ def add_model_specific_args(parent_parser): "multi-class (3) classification with CrossEntropyLoss." ), ) - parser.add_argument( - "--no_weight_norm", - action="store_true", - help=( - " Weight norm is registered as a pre_forward_hook on the 1D convolutional " - "layers of the TemporalBlock, and these cannot be serialised when training " - "with parallel processes interacting with the model concurrently. If True, " - "we add weight normalisation around these layers, and TCN cannot be used " - "in a multiprocessing setting. If False, then it can be used, even staying " - "on GPU in linux (CPU only on Windows)." - ), - ) - parser.add_argument( "--no_class_weights", action="store_true", diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index e6a57db3..6218ee38 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -10,6 +10,7 @@ import torch import numpy as np from torch.utils.data import Dataset, DataLoader +from torch.utils.data.dataloader import default_collate from dagobert.naming import NPreprocessingArgs as npa from dagobert.modelling.dl import PortfolioCryptoDataset @@ -21,8 +22,10 @@ class RLData(object): """ - Leverages the data class and configuration methods from the `dagobert.modelling.dl` - module as much as possible. + Creates a multi-head data reader, meaning, we can concurrently return the next + state of the environment for arbitrarily many times (see step function). Leverages + the data class and configuration methods from the `dagobert.modelling.dl` module as + much as possible. """ def __init__( @@ -34,8 +37,8 @@ def __init__( Class constructor. Args: - hparams: Hyparams parsed by the rl_runner. Similar to how `TCNLightning` is - initialized with the following fields: + hparams: Hyperparams parsed by the rl_runner. Similar to how `TCNLightning` + is initialized with the following fields: - max_episode_length - cols_to_model - target_col @@ -50,7 +53,6 @@ def __init__( either train, val or test. """ self.hparams = hparams - if train_val_test == "train": augment_dfs = self.hparams.augment_dfs augment_method = self.hparams.augment_method @@ -69,13 +71,11 @@ def __init__( augment_dfs_mix=self.hparams.augment_dfs_mix, ) self.dataset_len = len(self.dataset) + self.latest_idx = self.dataset_len - self.hparams.max_episode_length self._reset_idxs() - def step(self): - from IPython import embed - - embed() - Xs, ys = self.dataset[self.idxs] + def step2(self): + Xs, ys = self.dataset[self.idxs[0]] # add cash price (always 1) to the new price vector y1 = np.concatenate([[1.0], ys]) # turn Xs into a batch of 1, ready to be fed into the actor/critic @@ -83,15 +83,30 @@ def step(self): self.idxs += 1 return Xs, y1 + def step(self): + Xs = [] + ys = [] + for idx in self.idxs: + X, y = self.dataset[idx] + # Xs.append([torch.Tensor(x).unsqueeze(0) for x in X]) + # making sure have float32 data so we don't get torch.float64 tensors later + Xs.append([x.astype("float32") for x in X]) + ys.append(y) + self.idxs += 1 + + # add cash price (always 1) to the new price vector (a column of ones) + ys = np.vstack(ys) + y1 = np.ones((ys.shape[0], ys.shape[1] + 1)) + y1[:, 1:] = ys + return default_collate(Xs), y1 + def reset(self): self._reset_idxs() return self.step() def _reset_idxs(self): - self.idxs = [ - np.random.randint(self.dataset_len - self.hparams.max_episode_length) - for _ in self.hparams.num_env_heads - ] + # reset all head's starting index + self.idxs = np.random.randint(self.latest_idx, size=self.hparams.num_env_heads) class RLPortfolio(object): @@ -116,10 +131,12 @@ def __init__(self, hparams: Namespace): hparams: Hyparams parsed by the rl_runner. Similar to how `TCNLightning` is initialized with the following fields: - asset_names + - num_env_heads - trading_cost - reward_type """ self.asset_names = hparams.asset_names + self.num_env_heads = hparams.num_env_heads self.asset_n = len(self.asset_names) self.trading_cost = hparams.trading_cost self.reward_type = hparams.reward_type @@ -140,19 +157,17 @@ def step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: """ w0 = self.w0 p0 = self.p0 - assert y1[0] == 1, "Cash price has to remain constant: 1." # (eq7) since we last acted prices changed, so weights evolve into - dw1 = (y1 * w0) / (np.dot(y1, w0) + eps) + new_price_old_weights_sum = np.sum(y1 * w0, axis=1) + dw1 = ((y1 * w0).T / (new_price_old_weights_sum + eps)).T # (eq16) cost to change portfolio: # excluding change in cash to avoid double counting for transaction cost - mu = self.trading_cost * (np.abs(dw1[1:] - w1[1:])).sum() + mu = self.trading_cost * (np.abs(dw1[:, 1:] - w1[:, 1:])).sum(axis=1) - # (eq11) final portfolio value: I thought this should be w1 (at the end), but - # then think through how the env actually models the world (see Figure 1), w0 - # (which is the original implementation) makes sense here. - p1 = p0 * (1 - mu) * np.dot(y1, w0) + # (eq11) final portfolio value + p1 = p0 * (1 - mu) * new_price_old_weights_sum # (eq9 & 10) rate of return log rate of return rho1 = p1 / p0 - 1 # rate of returns @@ -161,37 +176,38 @@ def step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: # (eq22) immediate reward is log rate of return scaled by episode length if self.reward_type == "return": reward = r1 - # TODO: implement the differentiable sharpe ratio reward like so https://quant.stackexchange.com/a/38040 + # TODO: implement the differentiable sharpe ratio reward + # https://quant.stackexchange.com/a/38040 # remember for next step self.w0 = w1 self.p0 = p1 - # if we run out of money, we're done - done = p1 <= 0 - - # should only return single values, not list - info = { - "reward": reward, - "log_return": r1, - "portfolio_value": p1, - "market_return": y1.mean(), - "rate_of_return": rho1, - "weights_std": w1.std(), - "rebalancing_cost": mu, - } - # record weights and prices - for i, name in enumerate(["USD"] + self.asset_names): - info["weight_" + name] = w1[i] - info["price_" + name] = y1[i] - self.infos.append(info) - return reward, info, done + # if we run out of money we're done: all env heads are linked here unfortunately + done = np.any(p1 <= 0) + + infos = [] + for i in range(self.num_env_heads): + info = { + "reward": reward[i], + "log_return": r1[i], + "portfolio_value": p1[i], + "market_return": y1[i].mean(), + "rate_of_return": rho1[i], + "weights_std": w1[i].std(), + "rebalancing_cost": mu[i], + } + # record weights and prices + for j, name in enumerate(["USD"] + self.asset_names): + info["weight_" + name] = w1[i, j] + info["price_" + name] = y1[i, j] + infos.append(info) + return reward, infos, done def reset(self): - self.infos = [] - self.w0 = np.zeros(self.asset_n + 1) - self.w0[0] = 1 - self.p0 = 1.0 + self.w0 = np.zeros((self.num_env_heads, self.asset_n + 1)) + self.w0[:, 0] = 1 + self.p0 = np.ones(self.num_env_heads) class RLEnv(gym.Env): diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index d27bc733..5d7155e0 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -75,7 +75,6 @@ def run_rl(args): # define trainer and and lightning module args.multiprocessing = True if args.gpus != 1 else False - args.num_workers = 1 if args.num_workers == 0 else args.num_workers args.windows = True if "win" in sys.platform else False trainer = Trainer.from_argparse_args( args, @@ -114,7 +113,7 @@ def __init__(self, hparams: Namespace): self.hparams = TCNLightning._check_mini_series_lookback(hparams) # create env, policy/value networks and experience buffer + tracking vars - self.envs = [RLEnv(self.hparams) for _ in range(self.hparams.num_workers)] + self.envs = [RLEnv(self.hparams) for _ in range(self.hparams.num_env_workers)] n_actions = self.envs[0].action_space.shape[0] self.critic = ActorCriticTCN( self.hparams, n_actions=n_actions, output_size=1, actor=False @@ -147,39 +146,60 @@ def generate_experience_buffer( List[torch.Tensor], ]: """ - Logic for generating trajectory data to train policy and value networks. This - is done leveraging the `Process` and `Queue` classes of the `multiprocessing` - module of Python. We'll launch hparams.num_workers number of new processes, - each replicating the environment in memory, so this can get expensive where - `num_assets` in the portfolio is large. + Logic for generating trajectory data to train policy and value networks. If + `num_env_workers` > 1, this is done leveraging the `Process` and `Queue` + classes of the `multiprocessing` module of Python. We'll launch + hparams.num_env_workers number of new processes, each replicating the + environment in memory, so this can get expensive where `num_assets` in the + portfolio is large. If `num_env_workers` = 1, we simply collect experience with + the main environment in the main process, running PPO. Yield: Tuple of Lists containing tensors for states, actions, log probs, qvals and advantage. """ - # TODO: make this optional and multi head env the default and set no_weight_norm - max_worker_steps = int(self.hparams.steps_per_epoch / self.hparams.num_workers) - parallel_experiences = ParallelExperiences() device = self.setup_model_for_experience_gathering() - for i in range(self.hparams.num_workers): - args = ( - self.envs[i], + # spawn multiple processes and gather experience in parallel + if self.hparams.num_env_workers > 1: + max_steps_per_worker = int( + self.hparams.steps_per_epoch + / self.hparams.num_env_workers + / self.self.hparams.num_env_heads + ) + parallel_experiences = ParallelExperiences() + for i in range(self.hparams.num_env_workers): + args = ( + self.envs[i], + self.agent, + device, + max_steps_per_worker, + self.hparams.max_episode_length, + len(self.hparams.asset_names), + self.hparams.num_env_heads, + self.hparams.gamma, + self.hparams.lam, + self.hparams.pgportfolio, + ) + parallel_experiences.create_worker(*args) + # collect experiences in parallel, then merge them, calculate metrics + self.buffer.merge_buffers(parallel_experiences.collect_experiences()) + else: + self.buffer = gather_experience( + self.envs[0], self.agent, device, - max_worker_steps, + int(self.hparams.steps_per_epoch / self.hparams.num_env_heads), self.hparams.max_episode_length, len(self.hparams.asset_names), + self.hparams.num_env_heads, self.hparams.gamma, self.hparams.lam, + self.hparams.pgportfolio, ) - parallel_experiences.create_worker(*args) - - # collect experiences in parallel, then merge them, calculate metrics - self.buffer.merge_buffers(parallel_experiences.collect_experiences()) self.update_metrics_to_log() + self.setup_model_for_training() # yield a dataset for dataloader for updating actor/critic and clear buffer - self.setup_model_for_training() for state, past_pw, action, old_logp, qval, adv in zip( self.buffer.states, self.buffer.past_pws, @@ -192,23 +212,28 @@ def generate_experience_buffer( self.buffer.clear_buffer() def setup_model_for_experience_gathering(self): - """Helper function to move model to CPU if necessary""" + """Moves model to CPU if necessary for parallel experience gathering.""" # dropout and batch-norm doesn't make sense for experience gathering self.agent.critic_net.eval() self.agent.actor_net.eval() - # we cannot use cuda tensor sharing on windows (necessary for multiprocessing) if self.hparams.windows: - device = "cpu" - self.agent.critic_net.cpu() - self.agent.actor_net.cpu() + if self.hparams.num_env_workers > 1: + # we cannot use cuda tensor sharing on windows for multiprocessing + device = "cpu" + self.agent.critic_net.cpu() + self.agent.actor_net.cpu() + else: + device = self.device else: device = self.device - self.agent.critic_net.share_memory() - self.agent.actor_net.share_memory() + if self.hparams.num_env_workers > 1: + # use cuda tensor sharing on linux + self.agent.critic_net.share_memory() + self.agent.actor_net.share_memory() return device def setup_model_for_training(self): - """Helper function to move model back to GPU if necessary""" + """Moves model back to GPU if necessary after parallel experience gathering.""" if self.hparams.windows and self.hparams.gpus != 0: self.agent.critic_net.cuda() self.agent.actor_net.cuda() @@ -219,25 +244,27 @@ def update_metrics_to_log(self): """Helper function recalculating metrics we track at end of each epoch""" done_eps = self.buffer.done_episodes + eps ep_rewards = self.buffer.epoch_rewards - e = "episode" - p = "portfolio" # pytorch lightning model checkpoint needs metric name without / + e = "episode" self.to_log["avg_total_reward"] = ep_rewards / done_eps self.to_log[f"{e}/avg_total_reward"] = ep_rewards / done_eps self.to_log[f"{e}/avg_step_reward"] = ep_rewards / self.hparams.steps_per_epoch self.to_log[f"{e}/avg_len"] = self.hparams.steps_per_epoch / done_eps + + # need this otherwise the generator won't work multiple times + p = "portfolio" + infos = pd.DataFrame(list(self.buffer.infos)).mean() self.to_log[f"{p}/avg_value_ep_end"] = ( sum(list(self.buffer.p_ep_end_value)) / done_eps ) self.to_log[f"{p}/avg_market_return_ep_end"] = ( sum(list(self.buffer.p_ep_end_market_return)) / done_eps ) - # need this otherwise the generator won't work multiple times - infos = pd.DataFrame(list(self.buffer.infos)).mean() self.to_log[f"{p}/avg_value"] = infos["portfolio_value"] - self.to_log[f"{p}/avg_weight_std"] = infos["weights_std"] self.to_log[f"{p}/avg_rebalancing_cost"] = infos["rebalancing_cost"] + + self.to_log["weights/avg_weight_std"] = infos["weights_std"] for w in infos.index[infos.index.str.contains("weight_")]: self.to_log[f"weights/{w}"] = infos[w] @@ -292,27 +319,6 @@ def calc_advantage( adv = PPO.discount_rewards(delta, gamma * lam) return adv - @staticmethod - def _init_past_pw(asset_num, device) -> torch.Tensor: - """ - Init past portfolio value and weights to [1, 1, 0, ..., 0], since after the - portfolio is reset for each trajector p0=1, w0[0]=1 (USD relative price is - always 1). - """ - past_pw = torch.ones(asset_num + 2).to(device) - # past_pw[:2] = 1 - return past_pw.unsqueeze(0) - - @staticmethod - def _update_past_pw(p1: float, action: torch.Tensor, device) -> torch.Tensor: - """ - After each interaction, update the past weight / portfolio value vector as for - the next interaction the actor and critic networks take that in along with the - new state to form their outputs. - """ - p1 = torch.Tensor([p1]).to(device) - return torch.cat([p1.unsqueeze(0), action], -1) - # ---------------------------------------------------------------------------------- # LOSSES AND OPTIMIZERS # ---------------------------------------------------------------------------------- @@ -367,8 +373,10 @@ def training_step( loss """ state, past_pw, action, old_logp, qval, adv = batch + # normalize advantages within batch - # adv = (adv - adv.mean()) / adv.std() + if self.hparams.normalize_advantages: + adv = (adv - adv.mean()) / adv.std() # log all metrics (other than loss) for k, v in self.to_log.items(): @@ -388,9 +396,11 @@ def training_step( @staticmethod def _pre_sanity_check(hparams: Namespace): - # ensure we have the rl specific target column in the config if hparams.target_col != NRL.rl_return: raise ValueError("target_col has to be rl_return for RL tasks.") + if hparams.num_env_workers > 1 and not hparams.no_weight_norm: + hparams.no_weight_norm = True + logger.warning("We set no_weight_norm=True as you have num_env_workers>1.") # fill in the same cols for any df that doesn't have the cols_to_model defined if len(hparams.cols_to_model) > 1: @@ -455,6 +465,7 @@ def append( self.infos.append(info) self.ep_rewards.append(reward) self.ep_values.append(value.item()) + self.ep_market_returns.append(info["market_return"]) def shift_rewards(self): """ @@ -472,10 +483,12 @@ def shift_rewards(self): self.logps.pop(-1) self.infos.pop(-1) self.ep_values.pop(-1) + self.ep_market_returns.pop(-1) def merge_buffers(self, buffers): """ Merges the passed in ExperienceBuffers and overwrites the current state with it. + Used when experience is gathered by multiple workers in parallel. Args: buffers: List of smaller ExpereinceBuffers to merge together from parallel @@ -511,6 +524,7 @@ def clear_buffer(self): # episode / epoch vars self.ep_rewards = [] self.ep_values = [] + self.ep_market_returns = [] self.done_episodes = 0 self.epoch_rewards = 0 @@ -518,6 +532,9 @@ def clear_buffer(self): class ParallelExperiences: """ Parallelised experience gathering, idea from https://stackoverflow.com/a/45829852 + Used to spawn parallel processes for each `env_worker` which can independently can + interact with a copy of the environment and return its rewards, logps, values, etc + from the rollout. """ def __init__(self): @@ -559,15 +576,18 @@ def gather_experience( max_steps: int, max_episode_length: int, asset_num: int, + num_env_heads: int, gamma: float, lam: float, + pgportfolio: bool = True, ): """ Workhorse function of the parallel experience gathering. This function can be called as many times as many CPUs are available on the system, to collect the desired number of steps and store them into an `ExperienceBuffer` that is then passed back (via a `multiprocessing.Queue` object) to the main process that - spawned the parallel processes. + spawned the parallel processes. Crucially, this also works if we only have a + single worker i.e. the main process of PPO. Args: env: An instance of the environment to act on. @@ -576,53 +596,87 @@ def gather_experience( max_steps: Total number of steps (over multiple episodes) a worker can take. max_episode_length: Maximum length of a trajectory / episode. asset_num: Number of assets we are modelling (not including USD). + num_env_heads: Number of environment heads we use to interact with the env. gamma: See docs of :func:`PPO.calc_advantage` lam: See docs of :func:`PPO.calc_advantage` + pgportfolio: If True, we calculate the q-values and advantages according to + https://arxiv.org/pdf/1706.10059.pdf, else we use the traditional PPO algo. Returns: Experience collected in this parallel worker. """ - from datetime import datetime - - buffer = ExperienceBuffer() state = env.reset() - past_pw = PPO._init_past_pw(asset_num, device) - for step in range(max_steps): + buffers = [ExperienceBuffer() for _ in range(num_env_heads)] + past_pw = init_past_pw(num_env_heads, asset_num, device) + + for step in range(max_steps + 1): + episode_end = step > 0 and step % max_episode_length == 0 + # get action, make step, get reward and info from env pi, action, actor_logits, logp, value = agent(state, past_pw, device) next_state, reward, done, info = env.step(action.cpu().numpy()) - # store everything and update state, past_pw - buffer.append(state, past_pw, action, logp, reward, value, info) + # update past portfolio value / weights for next round + p1 = torch.Tensor([i["portfolio_value"] for i in info]).to(device).unsqueeze(0) + past_pw = torch.cat([p1.T, actor_logits], -1) + + # store everything, we need to do this for each environment head separately + for i, buffer in enumerate(buffers): + buffer.append( + [s[i] for s in state], + past_pw[i], + action[i], + logp[i], + reward[i], + value[i], + info[i], + ) state = next_state - past_pw = PPO._update_past_pw(info["portfolio_value"], actor_logits, device) - - terminal = len(buffer.ep_rewards) == max_episode_length - if done or terminal or step == max_steps - 1: - # buffer.qvals += PPO.discount_rewards(buffer.ep_rewards, gamma) - # buffer.advs += PPO.calc_advantage( - # buffer.ep_rewards, buffer.ep_values, gamma, lam - # ) - - # if rewards are immediate, we need this is due to our special environment - # where the immediate reward of a_0 can only calculate at t_1. - if gamma == 0: - buffer.shift_rewards() - # according to the PGPortfolio paper, reward should be the sum of portfolio - # values, divided by length of episode - no discounting no BS, same for adv - epr = buffer.ep_rewards - epr = np.ones_like(epr) * sum(epr) / len(epr) - buffer.qvals += list(epr) - buffer.advs += list(epr - np.array(buffer.ep_values)) - if done or terminal: - buffer.done_episodes += 1 - buffer.epoch_rewards += np.sum(buffer.ep_rewards) - buffer.p_ep_end_value.append(info["portfolio_value"]) - buffer.p_ep_end_market_return.append(np.array(info["market_return"]).prod()) - - # episode over, reset the env and the episode buffer - buffer.ep_rewards = [] - buffer.ep_values = [] - state = env.reset() - past_pw = PPO._init_past_pw(asset_num, device) - return buffer + + if done or episode_end or step == max_steps - 1: + for buffer in buffers: + # according to the PGPortfolio paper, reward should be the sum of + # immediate rewards (portfolio returns p1/p0) div by length of episode + if pgportfolio: + if gamma == 0: + buffer.shift_rewards() + epr = buffer.ep_rewards + epr = np.ones_like(epr) * sum(epr) / len(epr) + buffer.qvals += list(epr) + buffer.advs += list(epr - np.array(buffer.ep_values)) + # classic PPO qval and reward estimation + else: + buffer.qvals += PPO.discount_rewards(buffer.ep_rewards, gamma) + buffer.advs += PPO.calc_advantage( + buffer.ep_rewards, buffer.ep_values, gamma, lam + ) + if done or episode_end: + buffer.done_episodes += 1 + buffer.epoch_rewards += np.sum(buffer.ep_rewards) + buffer.p_ep_end_value.append(buffer.infos[-1]["portfolio_value"]) + buffer.p_ep_end_market_return.append( + np.array(buffer.ep_market_returns).prod() + ) + + # episode over, reset the env and the episode buffer + buffer.ep_rewards = [] + buffer.ep_values = [] + buffer.ep_market_returns = [] + state = env.reset() + past_pw = init_past_pw(num_env_heads, asset_num, device) + + # merge buffers from each environment head and return new buffer + merged_buffers = ExperienceBuffer() + merged_buffers.merge_buffers(buffers) + return merged_buffers + + +def init_past_pw( + num_env_heads: int, asset_num: int, device: torch.device +) -> torch.Tensor: + """Init past portfolio value and weights (Dirichlet concentrations) as all ones.""" + # init past portfolio value and weights + past_pw = torch.ones(num_env_heads, asset_num + 2).to(device) + if num_env_heads == 1: + past_pw = past_pw.unsqueeze(0) + return past_pw diff --git a/src/dagobert/modelling/rl/rl_args.py b/src/dagobert/modelling/rl/rl_args.py index 4f2db348..6fd9a604 100644 --- a/src/dagobert/modelling/rl/rl_args.py +++ b/src/dagobert/modelling/rl/rl_args.py @@ -54,6 +54,41 @@ def add_rl_specific_args(parent_parser): "sharpe. See RLPortfolio class for more details." ), ) + parser.add_argument( + "--num_env_heads", + type=int, + default=1, + help=( + "Number of heads we want to read the environment with concurrently. This " + "is an easy and cheap way to parallelize experience gathering on its own " + "does not require multiple processes to be spawn." + ), + ) + parser.add_argument( + "--num_env_workers", + type=int, + default=1, + help=( + "Number parallel processes to spawn to gather experience. This represents " + "second layer of concurrency (num_env_heads being the first and simplest). " + "If this is set to higher than 1, we will have to turn no_weight_norm=True." + ), + ) + + parser.add_argument( + "--pgportfolio", + action="store_true", + help=( + "If True, we calculate the q-values and advantages according to " + "https://arxiv.org/pdf/1706.10059.pdf, else we use traditional PPO algo." + ), + ) + + parser.add_argument( + "--normalize_advantages", + action="store_true", + help="If used, we normalize the advantages in each batch of the learning phase.", + ) parser.add_argument( "--max_episode_length", type=int, @@ -122,7 +157,6 @@ def add_rl_specific_args(parent_parser): "the model becomes deterministic." ), ) - return parser @@ -167,6 +201,18 @@ def add_model_specific_args(parent_parser): "for the supervised DL module." ), ) + parser.add_argument( + "--no_weight_norm", + action="store_true", + help=( + " Weight norm is registered as a pre_forward_hook on the 1D convolutional " + "layers of the TemporalBlock, and these cannot be serialised when training " + "with parallel processes interacting with the model concurrently. If True, " + "we add weight normalisation around these layers, and TCN cannot be used " + "in a multiprocessing setting. If False, then it can be used, even staying " + "on GPU in linux (CPU only on Windows)." + ), + ) parser.add_argument( "--use_last_timepoint", action="store_true", From 629b4a91e2ff029bda020c34ecd1b0e2904e7edb Mon Sep 17 00:00:00 2001 From: Daniel Homola Date: Sun, 31 Jan 2021 12:45:40 +0000 Subject: [PATCH 44/62] num_env_heads works with 1 or many, num_env_workers works with 1 or many, created portfolio_vs_market reward, cleaned up pgportfolio qvals/adv calc --- config/rl_config.yaml | 15 ++++--- src/dagobert/modelling/rl/environment.py | 13 +++++- src/dagobert/modelling/rl/ppo.py | 55 +++++++++--------------- 3 files changed, 41 insertions(+), 42 deletions(-) diff --git a/config/rl_config.yaml b/config/rl_config.yaml index 8f1bd392..cf24c076 100644 --- a/config/rl_config.yaml +++ b/config/rl_config.yaml @@ -24,6 +24,7 @@ tags: no_comet_logger: True seed: 42 batch_size: 500 +max_epochs: 100 # -------------------------------------------------------------------------------------- # RL @@ -35,19 +36,19 @@ asset_names: - XRP - LTC trading_cost: 0.002 -reward_type: return +reward_type: portfolio_vs_market num_env_heads: 20 num_env_workers: 1 normalize_advantages: True -pgportfolio: True -max_episode_length: 1000 -steps_per_epoch: 100000 -n_optim_iters: 6 +pgportfolio: False +max_episode_length: 2000 +steps_per_epoch: 80000 +n_optim_iters: 4 gamma: 0.99 lam: 0.95 lr_actor: 0.0001 lr_critic: 0.0003 -clip_ratio: 0.25 +clip_ratio: 0.2 target_kl: 0.01 @@ -65,7 +66,7 @@ actor_num_channels: [50, 50, 50, 50, 50] actor_kernel_size: 5 actor_dropout: 0.2 # sample size - exp abs diff to mean | 20 - 5% | 50 - 3% | 100 - 2% | 500 - 1% -actor_dirichlet_sample_size: 0 +actor_dirichlet_sample_size: 20 critic_num_channels: [50, 50, 50, 50, 50] critic_kernel_size: 5 critic_dropout: 0.2 diff --git a/src/dagobert/modelling/rl/environment.py b/src/dagobert/modelling/rl/environment.py index 6218ee38..7ce7a15c 100644 --- a/src/dagobert/modelling/rl/environment.py +++ b/src/dagobert/modelling/rl/environment.py @@ -157,6 +157,10 @@ def step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: """ w0 = self.w0 p0 = self.p0 + m0 = self.m0 + + # market return for new timepoint for each head + m1 = m0 * y1.mean(axis=1) # (eq7) since we last acted prices changed, so weights evolve into new_price_old_weights_sum = np.sum(y1 * w0, axis=1) @@ -176,12 +180,15 @@ def step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: # (eq22) immediate reward is log rate of return scaled by episode length if self.reward_type == "return": reward = r1 + elif self.reward_type == "portfolio_vs_market": + reward = np.log(p1 + eps) - np.log(m1 + eps) # TODO: implement the differentiable sharpe ratio reward # https://quant.stackexchange.com/a/38040 # remember for next step self.w0 = w1 self.p0 = p1 + self.m0 = m1 # if we run out of money we're done: all env heads are linked here unfortunately done = np.any(p1 <= 0) @@ -192,7 +199,7 @@ def step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: "reward": reward[i], "log_return": r1[i], "portfolio_value": p1[i], - "market_return": y1[i].mean(), + "market_return": m1[i], "rate_of_return": rho1[i], "weights_std": w1[i].std(), "rebalancing_cost": mu[i], @@ -205,9 +212,13 @@ def step(self, w1: np.array, y1: np.array) -> Tuple[float, dict, bool]: return reward, infos, done def reset(self): + # weights for each head self.w0 = np.zeros((self.num_env_heads, self.asset_n + 1)) self.w0[:, 0] = 1 + # portfolio value for each head self.p0 = np.ones(self.num_env_heads) + # market return for each head + self.m0 = np.ones(self.num_env_heads) class RLEnv(gym.Env): diff --git a/src/dagobert/modelling/rl/ppo.py b/src/dagobert/modelling/rl/ppo.py index 5d7155e0..0200ff0b 100644 --- a/src/dagobert/modelling/rl/ppo.py +++ b/src/dagobert/modelling/rl/ppo.py @@ -164,7 +164,7 @@ def generate_experience_buffer( max_steps_per_worker = int( self.hparams.steps_per_epoch / self.hparams.num_env_workers - / self.self.hparams.num_env_heads + / self.hparams.num_env_heads ) parallel_experiences = ParallelExperiences() for i in range(self.hparams.num_env_workers): @@ -252,18 +252,15 @@ def update_metrics_to_log(self): self.to_log[f"{e}/avg_step_reward"] = ep_rewards / self.hparams.steps_per_epoch self.to_log[f"{e}/avg_len"] = self.hparams.steps_per_epoch / done_eps - # need this otherwise the generator won't work multiple times p = "portfolio" infos = pd.DataFrame(list(self.buffer.infos)).mean() - self.to_log[f"{p}/avg_value_ep_end"] = ( - sum(list(self.buffer.p_ep_end_value)) / done_eps - ) - self.to_log[f"{p}/avg_market_return_ep_end"] = ( - sum(list(self.buffer.p_ep_end_market_return)) / done_eps - ) + p_val = np.array(list(self.buffer.p_ep_end_value)) + m_ret = np.array(list(self.buffer.p_ep_end_market_return)) + self.to_log[f"{p}/avg_value_ep_end"] = p_val.mean() + self.to_log[f"{p}/avg_market_return_ep_end"] = m_ret.mean() + self.to_log[f"{p}/avg_portfolio_vs_market"] = (p_val - m_ret).mean() self.to_log[f"{p}/avg_value"] = infos["portfolio_value"] self.to_log[f"{p}/avg_rebalancing_cost"] = infos["rebalancing_cost"] - self.to_log["weights/avg_weight_std"] = infos["weights_std"] for w in infos.index[infos.index.str.contains("weight_")]: self.to_log[f"weights/{w}"] = infos[w] @@ -464,7 +461,7 @@ def append( self.logps.append(logp) self.infos.append(info) self.ep_rewards.append(reward) - self.ep_values.append(value.item()) + self.ep_values.append(value) self.ep_market_returns.append(info["market_return"]) def shift_rewards(self): @@ -607,13 +604,14 @@ def gather_experience( """ state = env.reset() buffers = [ExperienceBuffer() for _ in range(num_env_heads)] - past_pw = init_past_pw(num_env_heads, asset_num, device) - - for step in range(max_steps + 1): - episode_end = step > 0 and step % max_episode_length == 0 + past_pw = torch.ones(num_env_heads, asset_num + 2).to(device) + for step in range(1, max_steps + 1): # get action, make step, get reward and info from env pi, action, actor_logits, logp, value = agent(state, past_pw, device) + if num_env_heads == 1: + action = action.unsqueeze(0) + logp = logp.unsqueeze(0) next_state, reward, done, info = env.step(action.cpu().numpy()) # update past portfolio value / weights for next round @@ -628,22 +626,22 @@ def gather_experience( action[i], logp[i], reward[i], - value[i], + value[i].item(), info[i], ) state = next_state + episode_end = step > 0 and step % max_episode_length == 0 if done or episode_end or step == max_steps - 1: for buffer in buffers: # according to the PGPortfolio paper, reward should be the sum of # immediate rewards (portfolio returns p1/p0) div by length of episode if pgportfolio: - if gamma == 0: - buffer.shift_rewards() - epr = buffer.ep_rewards - epr = np.ones_like(epr) * sum(epr) / len(epr) - buffer.qvals += list(epr) - buffer.advs += list(epr - np.array(buffer.ep_values)) + buffer.shift_rewards() + buffer.qvals += buffer.ep_rewards + buffer.advs += list( + np.array(buffer.ep_rewards) - np.array(buffer.ep_values) + ) # classic PPO qval and reward estimation else: buffer.qvals += PPO.discount_rewards(buffer.ep_rewards, gamma) @@ -652,7 +650,7 @@ def gather_experience( ) if done or episode_end: buffer.done_episodes += 1 - buffer.epoch_rewards += np.sum(buffer.ep_rewards) + buffer.epoch_rewards += sum(buffer.ep_rewards) buffer.p_ep_end_value.append(buffer.infos[-1]["portfolio_value"]) buffer.p_ep_end_market_return.append( np.array(buffer.ep_market_returns).prod() @@ -663,20 +661,9 @@ def gather_experience( buffer.ep_values = [] buffer.ep_market_returns = [] state = env.reset() - past_pw = init_past_pw(num_env_heads, asset_num, device) + past_pw = torch.ones(num_env_heads, asset_num + 2).to(device) # merge buffers from each environment head and return new buffer merged_buffers = ExperienceBuffer() merged_buffers.merge_buffers(buffers) return merged_buffers - - -def init_past_pw( - num_env_heads: int, asset_num: int, device: torch.device -) -> torch.Tensor: - """Init past portfolio value and weights (Dirichlet concentrations) as all ones.""" - # init past portfolio value and weights - past_pw = torch.ones(num_env_heads, asset_num + 2).to(device) - if num_env_heads == 1: - past_pw = past_pw.unsqueeze(0) - return past_pw From 3d66510356f4808caf2d0d98c8b6a25622964159 Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Wed, 3 Feb 2021 13:45:34 +0000 Subject: [PATCH 45/62] args and stuff --- config/timegan_config.yaml | 27 ++- setup.cfg | 1 + .../modelling/augmentation/tgan_args.py | 158 +++--------------- .../modelling/augmentation/timegan.py | 11 +- src/dagobert/modelling/dl/tcn_args.py | 8 +- src/dagobert/naming.py | 9 + 6 files changed, 71 insertions(+), 143 deletions(-) diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index cf1b838c..ad09b3cb 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -25,7 +25,7 @@ batch_size: 256 # -------------------------------------------------------------------------------------- # gru or lstm -rnn: lstm +rnn: gru # embedding weight in cost of generator loss emb_weight: 1 @@ -41,9 +41,16 @@ binariser_method: dropout: 0.2 num_layers: 2 -hidden_size: 50 -z_dim: 50 -mini_series_length: 50 +hidden_size: 10 +z_dim: 12 +mini_series_length: 20 +# don't change order with lr dict +lr: + embedder0: 0.001 + supervisor: 0.001 + generator: 0.001 + embedder1: 0.001 + discriminator: 0.001 # -------------------------------------------------------------------------------------- # DATA @@ -80,9 +87,21 @@ cols_to_model: # - sin_time # - cos_time +augment_method: +augment_dfs: +augment_dfs_mix: 0 # -------------------------------------------------------------------------------------- # PREPROCESSING # -------------------------------------------------------------------------------------- +train_start_date: "2019-01-01" +train_days: 1 +val_days: 1 +val_train_offset_days: 1 +val_puffer_days: 1 +test_days: 1 +test_train_offset_days: 62 +test_puffer_days: 1 + scaling_method: minmax \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index f0d2b5ce..5c702758 100644 --- a/setup.cfg +++ b/setup.cfg @@ -58,6 +58,7 @@ console_scripts = dagobert-optuna = dagobert.modelling.dl.optuna:run dagobert-s3 = dagobert.io.runner:run dagobert-rl = dagobert.modelling.rl.rl_runner:run + dagobert-tgan = dagobert.modelling.augmentation.tgan_runner:run [test] # py.test options when running `python setup.py test` diff --git a/src/dagobert/modelling/augmentation/tgan_args.py b/src/dagobert/modelling/augmentation/tgan_args.py index 32853ad6..ef0c4a6e 100644 --- a/src/dagobert/modelling/augmentation/tgan_args.py +++ b/src/dagobert/modelling/augmentation/tgan_args.py @@ -1,26 +1,21 @@ """ -All custom arguments and hyper-parameters for the reinforcement learning module. +All custom arguments and hyper-parameters for the TimeGAN module. """ from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from pytorch_lightning import Trainer -from dagobert.modelling.dl.tcn import TCNLightning from dagobert.modelling.dl.tcn_args import ( add_run_specific_args, + add_model_specific_args, add_data_specific_args, add_preprocessing_specific_args, ) -from dagobert.naming import ( - NInputDataCols, - NAugmentationMethods, - NBarriers, - NPreprocessingArgs, -) +from dagobert.naming import NGAN -def add_rl_specific_args(parent_parser): +def add_tgan_specific_args(parent_parser): parser = ArgumentParser( parents=[parent_parser], add_help=False, @@ -28,149 +23,48 @@ def add_rl_specific_args(parent_parser): ) # this is just a place-holder so it's easier to read the million params in the cmd - parser.add_argument("--RL_PARAMS", help="====================================") - parser.add_argument( - "--asset_names", - type=str, - nargs="+", - default=["BTC", "ETH"], - help=( - "Names of instruments to include in the portfolio, corresponding to " - "anchor, df2, df3, etc." - ), - ) - parser.add_argument( - "--trading_cost", - type=float, - default=0.002, - help="Commission rate of making trades + an estimated cost of slippage.", - ) + parser.add_argument("--TGAN_PARAMS", help="====================================") parser.add_argument( - "--reward_type", - type=str, - default="return", - help=( - "Determines the overall reward to maximise by the agent. Either return or " - "sharpe. See RLPortfolio class for more details." - ), - ) - parser.add_argument( - "--max_episode_length", + "--z_dim", type=int, - default=1000, - help=( - "Maximum number of interactions between the agent and the environment in " - "an episode." - ), + default=50, + help="number of dimensions of noise vector (input of generator) at t timepoint", ) parser.add_argument( - "--steps_per_epoch", + "--hidden_size", type=int, - default=10000, - help=( - "How many action-state pairs to rollout for trajectory collection per " - "epoch. I.e. if all episodes run to their max_episode_length, we'll have " - "steps_per_epoch/max_episode_length number of unique episodes/trajectories." - ), + default=50, + help="The number of features in the hidden state, ie in embedded state.", ) parser.add_argument( - "--n_optim_iters", + "--num_layers", type=int, - default=4, + default=1, help=( - "How many steps of gradient descent to perform on each batch. This might " - "seem weird, but it helps sampling efficiency, done by the original PPO " - "implementation and the Google ablation study found it to be useful." + "Number of RNN layers stacked onto each other, ie with new one using output" + " of previous" ), ) parser.add_argument( - "--gamma", type=float, default=0.99, help="Discounting of rewards." - ) - parser.add_argument( - "--lam", - type=float, - default=0.95, - help="Lambda parameter in the advantage discounting equation.", - ) - parser.add_argument( - "--lr_actor", - type=float, - default=0.0003, - help="Learning rate for the actor/policy network.", - ) - parser.add_argument( - "--lr_critic", - type=float, - default=0.001, - help="Learning rate for the critic/value network.", - ) - parser.add_argument( - "--clip_ratio", - type=float, - default=0.2, - help="Clipping parameter for the PPO's policy upgrade cost function.", - ) - - return parser - - -def add_model_specific_args(parent_parser): - parser = ArgumentParser( - parents=[parent_parser], - add_help=False, - formatter_class=ArgumentDefaultsHelpFormatter, - ) - - # this is just a place-holder so it's easier to read the million params in the cmd - parser.add_argument("--MODEL_PARAMS", help="====================================") - parser.add_argument( - "--actor_num_channels", - type=int, - nargs="+", - default=[50, 50, 50, 50, 50], - help=( - "Determines the number of layers (depth) of the actor / policy network and " - "the hidden unit count in each layer." - ), + "--rnn", + type=str, + default=NGAN.lstm, + choices=[NGAN.lstm, NGAN.gru], + help="Choice of RNN to use, either LSTM or GRU", ) parser.add_argument( - "--critic_num_channels", + "--emb_weight", type=int, - nargs="+", - default=[50, 50, 50, 50, 50], - help=( - "Determines the number of layers (depth) of the critic / value network and " - "the hidden unit count in each layer." - ), - ) - parser.add_argument("--actor_kernel_size", type=int, default=5, help=" ") - parser.add_argument("--critic_kernel_size", type=int, default=5, help=" ") - parser.add_argument("--actor_dropout", type=float, default=0, help=" ") - parser.add_argument("--critic_dropout", type=float, default=0, help=" ") - parser.add_argument( - "--no_class_weights", - action="store_true", - help=( - "Set this to True so we can leverage the Preprocessing pipeline written " - "for the supervised DL module." - ), - ) - parser.add_argument( - "--use_last_timepoint", - action="store_true", - help=( - "If this flag is used the only the network's representation " - "corresponding at the latest time-point is used to predict the outcome." - "By default, we combine all representations across the sequence length" - "to make a prediction from, instead of just using the last one." - ), + default=1, + help="Weight multiplier for embedding component in generator loss", ) + return parser def get_all_args(): parser = ArgumentParser( - description="Lightning RL module", + description="Lightning TimeGAN module", formatter_class=ArgumentDefaultsHelpFormatter, ) @@ -178,7 +72,7 @@ def get_all_args(): parser = Trainer.add_argparse_args(parser) # add model and run specific params - parser = add_rl_specific_args(parser) + parser = add_tgan_specific_args(parser) parser = add_model_specific_args(parser) parser = add_run_specific_args(parser) parser = add_data_specific_args(parser) diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index f26acde9..a46861eb 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -147,7 +147,7 @@ def forward(self, x): rnn_out, _hidden = self.rnn(x) rnn_out = self.tanh(rnn_out) # todo: is there reshaping needed? - # rnn_out = rnn_out.reshape(-1, self.linear_input_size) + rnn_out = rnn_out.reshape(-1, self.linear_input_size) output = self.linear(rnn_out) if self.linear_activation: output = self.sigmoid(output) @@ -406,13 +406,14 @@ def configure_optimizers(self) -> List[optim.Optimizer]: list(self.embedder.parameters()) + list(self.recovery.parameters()), list(self.discriminator.parameters()), ] + # TODO: diff lr for each net if "adam" in self.hparams.optimizer.lower(): - for param_pair in param_pairs: - optimizer = torch.optim.AdamW(param_pair, lr=self.hparams.lr) + for param_pair, network in zip(param_pairs, self.hparams.lr.keys()): + optimizer = torch.optim.AdamW(param_pair, lr=self.hparams.lr[network]) optimizers.append(optimizer) elif "adabelief" in self.hparams.optimizer.lower(): - for param_pair in param_pairs: - optimizer = AdaBelief(param_pair, lr=self.hparams.lr) + for param_pair, network in zip(param_pairs, self.hparams.lr.keys()): + optimizer = AdaBelief(param_pair, lr=self.hparams.lr[network]) optimizers.append(optimizer) return optimizers diff --git a/src/dagobert/modelling/dl/tcn_args.py b/src/dagobert/modelling/dl/tcn_args.py index 21df5792..1a14e4f2 100644 --- a/src/dagobert/modelling/dl/tcn_args.py +++ b/src/dagobert/modelling/dl/tcn_args.py @@ -1,6 +1,7 @@ """ All custom arguments and hyper-parameters for the TCN Lightning module. """ +from typing import Union from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter @@ -79,9 +80,12 @@ def add_run_specific_args(parent_parser): ) parser.add_argument( "--lr", - type=float, + type=Union[float, dict], default=0.003, - help="Learning rate. If set to 'auto' we'll find it automatically.", + help=( + "Learning rate. If set to 'auto' we'll find it automatically. In TimeGAN" + "different learning rates can be used for the various networks" + ), ) parser.add_argument( "--max_lr", diff --git a/src/dagobert/naming.py b/src/dagobert/naming.py index 829c0eca..fd2da5a8 100644 --- a/src/dagobert/naming.py +++ b/src/dagobert/naming.py @@ -284,3 +284,12 @@ class NRL(object): """ rl_return = "rl_return" + + +class NGAN(object): + """ + Naming object for TimeGAN. + """ + + gru = "gru" + lstm = "lstm" From cd94349ed6bb91358bd2ae80f146537472586cd8 Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Tue, 9 Feb 2021 17:24:55 +0000 Subject: [PATCH 46/62] debug --- config/custom/tcn_config_m.yaml | 4 +- config/timegan_config.yaml | 21 +- notebooks/modelling/test_cryptodataset.ipynb | 221 +++++++++++++++++- .../modelling/augmentation/timegan.py | 23 +- src/dagobert/modelling/dl/data.py | 2 - 5 files changed, 247 insertions(+), 24 deletions(-) diff --git a/config/custom/tcn_config_m.yaml b/config/custom/tcn_config_m.yaml index 1dba8fa5..de51bdb2 100644 --- a/config/custom/tcn_config_m.yaml +++ b/config/custom/tcn_config_m.yaml @@ -15,7 +15,7 @@ auto_scale_batch_size: # -------------------------------------------------------------------------------------- log_dir: logs -num_workers: 4 +num_workers: 1 exp_name: TCN tags: - model1 @@ -146,7 +146,7 @@ simple_augment_prob: 0.5 # -------------------------------------------------------------------------------------- train_start_date: "2018-06-01" -train_days: 30 +train_days: 1 val_days: 1 val_train_offset_days: 1 val_puffer_days: 1 diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index ad09b3cb..33cb093a 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -11,7 +11,7 @@ gpus: 0 # -------------------------------------------------------------------------------------- log_dir: logs -num_workers: 4 +num_workers: 0 exp_name: TGAN-test tags: - time_gan_test @@ -39,8 +39,9 @@ binariser_method: # MODEL # -------------------------------------------------------------------------------------- +optimizer: "adamw" dropout: 0.2 -num_layers: 2 +num_layers: 1 hidden_size: 10 z_dim: 12 mini_series_length: 20 @@ -74,14 +75,14 @@ cols_to_model: - high - low - close -# - cum_ticks -# - cum_dollar -# - volume -# - cum_volume_buy -# - cum_volume_sell -# - cum_volume_quote -# - cum_volume_quote_buy -# - cum_volume_quote_sell + - cum_ticks + - cum_dollar + - volume + - cum_volume_buy + - cum_volume_sell + - cum_volume_quote + - cum_volume_quote_buy + - cum_volume_quote_sell # - sin_date # - cos_date # - sin_time diff --git a/notebooks/modelling/test_cryptodataset.ipynb b/notebooks/modelling/test_cryptodataset.ipynb index 3cf26b90..a4d4d903 100644 --- a/notebooks/modelling/test_cryptodataset.ipynb +++ b/notebooks/modelling/test_cryptodataset.ipynb @@ -1716,12 +1716,229 @@ "data_loaded[0].shape " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Misc" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import torchtext\n", + "import torch\n", + "from torchtext.data.utils import get_tokenizer\n", + "from collections import Counter\n", + "from torchtext.vocab import Vocab\n", + "from torchtext.utils import download_from_url, extract_archive\n", + "import io" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: en_core_web_sm==2.3.1 from https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz#egg=en_core_web_sm==2.3.1 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (2.3.1)\n", + "Requirement already satisfied: spacy<2.4.0,>=2.3.0 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from en_core_web_sm==2.3.1) (2.3.2)\n", + "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.2)\n", + "Requirement already satisfied: setuptools in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (45.2.0.post20200210)\n", + "Requirement already satisfied: numpy>=1.15.0 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.18.1)\n", + "Requirement already satisfied: blis<0.5.0,>=0.4.0 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.4.1)\n", + "Requirement already satisfied: thinc==7.4.1 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (7.4.1)\n", + "Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.8.0)\n", + "Requirement already satisfied: plac<1.2.0,>=0.9.6 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.1.3)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.23.0)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.2)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.0.3)\n", + "Requirement already satisfied: srsly<1.1.0,>=1.0.2 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.2)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (4.45.0)\n", + "Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2019.11.28)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.4)\n", + "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.9)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.25.8)\n", + "Requirement already satisfied: importlib-metadata>=0.20; python_version < \"3.8\" in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from catalogue<1.1.0,>=0.0.7->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.7.0)\n", + "Requirement already satisfied: zipp>=0.5 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.2.0)\n", + "[+] Download and installation successful\n", + "You can now load the model via spacy.load('en_core_web_sm')\n", + "[x] Couldn't link model to 'en'\n", + "Creating a symlink in spacy/data failed. Make sure you have the required\n", + "permissions and try re-running the command as admin, or use a virtualenv. You\n", + "can still import the model as a module and call its load() method, or create the\n", + "symlink manually.\n", + "C:\\Users\\u164428\\AppData\\Local\\Continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages\\en_core_web_sm\n", + "-->\n", + "C:\\Users\\u164428\\AppData\\Local\\Continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages\\spacy\\data\\en\n", + "[!] Download successful but linking failed\n", + "Creating a shortcut link for 'en' didn't work (maybe you don't have admin\n", + "permissions?), but you can still load the model via its full package name: nlp =\n", + "spacy.load('en_core_web_sm')\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "You do not have sufficient privilege to perform this operation.\n" + ] + } + ], + "source": [ + "! python -m spacy download en" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "en_tokenizer = get_tokenizer('spacy', language='en_core_web_sm')" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "inputs = [\n", + " 'hello, my name is david', \n", + " 'david likes to swim', \n", + " 'the only thing david needs is attention'\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Argument 'string' has incorrect type (expected str, got list)", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# We tokenize our input variables into numbers based on a loaded vocab\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mtokenized_inputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0men_tokenizer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# b x max_input_length\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages\\torchtext\\data\\utils.py\u001b[0m in \u001b[0;36m_spacy_tokenize\u001b[1;34m(x, spacy)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_spacy_tokenize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mspacy\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 14\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mtok\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtext\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mtok\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mspacy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 15\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mTypeError\u001b[0m: Argument 'string' has incorrect type (expected str, got list)" + ] + } + ], + "source": [ + "# We tokenize our input variables into numbers based on a loaded vocab\n", + "tokenized_inputs = en_tokenizer(inputs) # b x max_input_length" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "def build_vocab(input_list, tokenizer):\n", + " counter = Counter()\n", + " for string_ in input_list:\n", + " counter.update(tokenizer(string_))\n", + " return Vocab(counter, specials=['', '', '', ''])" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "en_vocab = build_vocab(inputs, en_tokenizer)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(, 18)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "en_vocab, len(en_vocab)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "# We define our layers\n", + "hidden_size = 300\n", + "embedding = torch.nn.Embedding(len(en_vocab), hidden_size)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "# RNN encoded size must be half since bidirectional RNNs\n", + "# produce 2 hidden states: forwards and backwards\n", + "encoded_size = int(hidden_size / 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "rnn = torch.nn.GRU(hidden_size, encoded_size, bidirectional=True, batch_first=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'tokenized_inputs' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0membedded\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0membedding\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtokenized_inputs\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# tensor of size b x 300\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mNameError\u001b[0m: name 'tokenized_inputs' is not defined" + ] + } + ], + "source": [ + "embedded = embedding(tokenized_inputs) # tensor of size b x 300" + ] } ], "metadata": { diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index a46861eb..c81ed767 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -32,12 +32,13 @@ from dagobert.naming import NStudy, NPreprocessingArgs as npa from dagobert.modelling.dl import ( - GeneratorCryptoDataset, TemporalConvNet, Preprocessing, AdaBelief, LogCoshLoss, FocalLoss, + CryptoDataset, + GeneratorCryptoDataset, ) from dagobert.modelling.augmentation.utils import get_noise, pca_analysis from dagobert.modelling.utils import ( @@ -182,15 +183,15 @@ def __init__(self, hparams: Namespace): # define main vars (other than model) super().__init__() - # TODO: pre sanity check, define hparams hparams = TimeGANLightning._pre_sanity_check(hparams) # lightning sets this to cuda too late for some of our setup to work self.tgan_device = "cuda" if hparams.gpus > 0 else "cpu" # prepare datafiles if necessary - hparams = Preprocessing().preprocess_train_dfs(hparams) + self.hparams = Preprocessing().preprocess_train_dfs(hparams) # TODO: any sanity checks on data, hypermparams self.real_logging = None + self.comet_logging = not self.hparams.no_comet_logger # get feature number of instruments @@ -407,7 +408,7 @@ def configure_optimizers(self) -> List[optim.Optimizer]: list(self.discriminator.parameters()), ] # TODO: diff lr for each net - if "adam" in self.hparams.optimizer.lower(): + if "adamw" in self.hparams.optimizer.lower(): for param_pair, network in zip(param_pairs, self.hparams.lr.keys()): optimizer = torch.optim.AdamW(param_pair, lr=self.hparams.lr[network]) optimizers.append(optimizer) @@ -434,8 +435,11 @@ def validation_step(self, batch, batch_idx): self.hparams.z_dim, device=self.tgan_device, ) - z = z.to(self.generator.model[0].weight.dtype) + # z = z.to(self.generator.model[0].weight.dtype) + from IPython import embed + + embed() # generate fake data and compare with validation set h = self.embedder(x) e_hat = self.generator(z) @@ -491,6 +495,7 @@ def _get_dataloader(self, dfs_to_load: dict, prefix: str) -> DataLoader: shuffle = True else: shuffle = False + dataset = GeneratorCryptoDataset( df_to_load=dfs_to_load, cols_to_model=self.hparams.cols_to_model, @@ -713,10 +718,12 @@ def _make_plots(self, y_real, y_fake, pca_x, pca_x_hat, prefix): # ---------------------------------------------------------------------------------- @staticmethod - def _pre_sanity_check(hparams: Namespace): - # ensure we have the rl specific target column in the config + def _pre_sanity_check(hparams: Namespace) -> Namespace: + """Certain sanity checks must happen before preprocessing takes place.""" + + # ensure we have the no specific target column in the config if hparams.target_col: - raise ValueError("target_col has to be None for GAn development.") + raise ValueError("target_col has to be None for GAN development.") # fill in the same cols for any df that doesn't have the cols_to_model defined if len(hparams.cols_to_model) > 1: diff --git a/src/dagobert/modelling/dl/data.py b/src/dagobert/modelling/dl/data.py index 5de98d3e..b3f043f6 100644 --- a/src/dagobert/modelling/dl/data.py +++ b/src/dagobert/modelling/dl/data.py @@ -651,7 +651,5 @@ def __getitem__(self, idx): batch_dfs, batch_indices, _ = self._get_batch_dfs_indices_target() from_idx, upto_idx = self._get_from_upto_idxs(idx, batch_indices) Xs = self._get_Xs(batch_dfs, from_idx, upto_idx) - # from IPython import embed - # embed() X = np.concatenate(Xs).T return X From 99cbea4da30b5c612f7e69681bff86c20a5052a2 Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Tue, 9 Feb 2021 19:09:12 +0000 Subject: [PATCH 47/62] vaaaaa --- config/custom/tcn_config_m.yaml | 2 +- notebooks/modelling/test_cryptodataset.ipynb | 224 ------------------ .../modelling/augmentation/__init__.py | 1 - .../modelling/augmentation/tgan_args.py | 1 + .../modelling/augmentation/tgan_runner.py | 3 +- .../modelling/augmentation/timegan.py | 17 +- src/dagobert/modelling/dl/__init__.py | 8 +- src/dagobert/modelling/dl/tcn_args.py | 1 + 8 files changed, 12 insertions(+), 245 deletions(-) diff --git a/config/custom/tcn_config_m.yaml b/config/custom/tcn_config_m.yaml index de51bdb2..02858439 100644 --- a/config/custom/tcn_config_m.yaml +++ b/config/custom/tcn_config_m.yaml @@ -15,7 +15,7 @@ auto_scale_batch_size: # -------------------------------------------------------------------------------------- log_dir: logs -num_workers: 1 +num_workers: 4 exp_name: TCN tags: - model1 diff --git a/notebooks/modelling/test_cryptodataset.ipynb b/notebooks/modelling/test_cryptodataset.ipynb index a4d4d903..8e3399e3 100644 --- a/notebooks/modelling/test_cryptodataset.ipynb +++ b/notebooks/modelling/test_cryptodataset.ipynb @@ -1715,230 +1715,6 @@ "source": [ "data_loaded[0].shape " ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Misc" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "import torchtext\n", - "import torch\n", - "from torchtext.data.utils import get_tokenizer\n", - "from collections import Counter\n", - "from torchtext.vocab import Vocab\n", - "from torchtext.utils import download_from_url, extract_archive\n", - "import io" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: en_core_web_sm==2.3.1 from https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz#egg=en_core_web_sm==2.3.1 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (2.3.1)\n", - "Requirement already satisfied: spacy<2.4.0,>=2.3.0 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from en_core_web_sm==2.3.1) (2.3.2)\n", - "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.2)\n", - "Requirement already satisfied: setuptools in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (45.2.0.post20200210)\n", - "Requirement already satisfied: numpy>=1.15.0 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.18.1)\n", - "Requirement already satisfied: blis<0.5.0,>=0.4.0 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.4.1)\n", - "Requirement already satisfied: thinc==7.4.1 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (7.4.1)\n", - "Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.8.0)\n", - "Requirement already satisfied: plac<1.2.0,>=0.9.6 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.1.3)\n", - "Requirement already satisfied: requests<3.0.0,>=2.13.0 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.23.0)\n", - "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.2)\n", - "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.0.3)\n", - "Requirement already satisfied: srsly<1.1.0,>=1.0.2 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.2)\n", - "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (4.45.0)\n", - "Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2019.11.28)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.4)\n", - "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.9)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.25.8)\n", - "Requirement already satisfied: importlib-metadata>=0.20; python_version < \"3.8\" in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from catalogue<1.1.0,>=0.0.7->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.7.0)\n", - "Requirement already satisfied: zipp>=0.5 in c:\\users\\u164428\\appdata\\local\\continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.2.0)\n", - "[+] Download and installation successful\n", - "You can now load the model via spacy.load('en_core_web_sm')\n", - "[x] Couldn't link model to 'en'\n", - "Creating a symlink in spacy/data failed. Make sure you have the required\n", - "permissions and try re-running the command as admin, or use a virtualenv. You\n", - "can still import the model as a module and call its load() method, or create the\n", - "symlink manually.\n", - "C:\\Users\\u164428\\AppData\\Local\\Continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages\\en_core_web_sm\n", - "-->\n", - "C:\\Users\\u164428\\AppData\\Local\\Continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages\\spacy\\data\\en\n", - "[!] Download successful but linking failed\n", - "Creating a shortcut link for 'en' didn't work (maybe you don't have admin\n", - "permissions?), but you can still load the model via its full package name: nlp =\n", - "spacy.load('en_core_web_sm')\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "You do not have sufficient privilege to perform this operation.\n" - ] - } - ], - "source": [ - "! python -m spacy download en" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "en_tokenizer = get_tokenizer('spacy', language='en_core_web_sm')" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "inputs = [\n", - " 'hello, my name is david', \n", - " 'david likes to swim', \n", - " 'the only thing david needs is attention'\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "Argument 'string' has incorrect type (expected str, got list)", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# We tokenize our input variables into numbers based on a loaded vocab\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mtokenized_inputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0men_tokenizer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# b x max_input_length\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\envs\\dagobert\\lib\\site-packages\\torchtext\\data\\utils.py\u001b[0m in \u001b[0;36m_spacy_tokenize\u001b[1;34m(x, spacy)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_spacy_tokenize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mspacy\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 14\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mtok\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtext\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mtok\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mspacy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 15\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mTypeError\u001b[0m: Argument 'string' has incorrect type (expected str, got list)" - ] - } - ], - "source": [ - "# We tokenize our input variables into numbers based on a loaded vocab\n", - "tokenized_inputs = en_tokenizer(inputs) # b x max_input_length" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "def build_vocab(input_list, tokenizer):\n", - " counter = Counter()\n", - " for string_ in input_list:\n", - " counter.update(tokenizer(string_))\n", - " return Vocab(counter, specials=['', '', '', ''])" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "en_vocab = build_vocab(inputs, en_tokenizer)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(, 18)" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "en_vocab, len(en_vocab)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "# We define our layers\n", - "hidden_size = 300\n", - "embedding = torch.nn.Embedding(len(en_vocab), hidden_size)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "# RNN encoded size must be half since bidirectional RNNs\n", - "# produce 2 hidden states: forwards and backwards\n", - "encoded_size = int(hidden_size / 2)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "rnn = torch.nn.GRU(hidden_size, encoded_size, bidirectional=True, batch_first=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'tokenized_inputs' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0membedded\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0membedding\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtokenized_inputs\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# tensor of size b x 300\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;31mNameError\u001b[0m: name 'tokenized_inputs' is not defined" - ] - } - ], - "source": [ - "embedded = embedding(tokenized_inputs) # tensor of size b x 300" - ] } ], "metadata": { diff --git a/src/dagobert/modelling/augmentation/__init__.py b/src/dagobert/modelling/augmentation/__init__.py index dbab2837..a4400910 100644 --- a/src/dagobert/modelling/augmentation/__init__.py +++ b/src/dagobert/modelling/augmentation/__init__.py @@ -1,2 +1 @@ from .augmentation import augment -from .timegan import RnnBlock, TimeGANLightning diff --git a/src/dagobert/modelling/augmentation/tgan_args.py b/src/dagobert/modelling/augmentation/tgan_args.py index ef0c4a6e..0491bbcb 100644 --- a/src/dagobert/modelling/augmentation/tgan_args.py +++ b/src/dagobert/modelling/augmentation/tgan_args.py @@ -6,6 +6,7 @@ from pytorch_lightning import Trainer +from dagobert.modelling.dl.tcn import TCNLightning from dagobert.modelling.dl.tcn_args import ( add_run_specific_args, add_model_specific_args, diff --git a/src/dagobert/modelling/augmentation/tgan_runner.py b/src/dagobert/modelling/augmentation/tgan_runner.py index 3ae6c027..90d4e8b6 100644 --- a/src/dagobert/modelling/augmentation/tgan_runner.py +++ b/src/dagobert/modelling/augmentation/tgan_runner.py @@ -5,6 +5,8 @@ command line arguments, but it's much more convenient to use YAML configs for this, see the `tcn_args.py` and `tgan_args.py` for more detail. """ +import os +import sys import logging from pathlib import Path @@ -13,7 +15,6 @@ from dagobert.modelling.augmentation.tgan_args import get_all_args from dagobert.modelling.augmentation.timegan import run_tgan - logger = logging.getLogger(__name__) diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index c81ed767..8877b1af 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -31,15 +31,11 @@ from pytorch_lightning.metrics import functional as plm from dagobert.naming import NStudy, NPreprocessingArgs as npa -from dagobert.modelling.dl import ( - TemporalConvNet, - Preprocessing, - AdaBelief, - LogCoshLoss, - FocalLoss, - CryptoDataset, - GeneratorCryptoDataset, -) + +from dagobert.modelling.dl import Preprocessing +from dagobert.modelling.dl.data import GeneratorCryptoDataset +from dagobert.modelling.dl import AdaBelief + from dagobert.modelling.augmentation.utils import get_noise, pca_analysis from dagobert.modelling.utils import ( triple_barrier_error, @@ -145,6 +141,7 @@ def __init__( self.linear_activation = linear_activation def forward(self, x): + rnn_out, _hidden = self.rnn(x) rnn_out = self.tanh(rnn_out) # todo: is there reshaping needed? @@ -437,9 +434,7 @@ def validation_step(self, batch, batch_idx): ) # z = z.to(self.generator.model[0].weight.dtype) - from IPython import embed - embed() # generate fake data and compare with validation set h = self.embedder(x) e_hat = self.generator(z) diff --git a/src/dagobert/modelling/dl/__init__.py b/src/dagobert/modelling/dl/__init__.py index a2849ec8..52b1e0bb 100644 --- a/src/dagobert/modelling/dl/__init__.py +++ b/src/dagobert/modelling/dl/__init__.py @@ -1,10 +1,4 @@ -from .data import ( - CryptoDataset, - PortfolioCryptoDataset, - ExperienceSourceDataset, - GeneratorCryptoDataset, -) - +from .data import PortfolioCryptoDataset, ExperienceSourceDataset, CryptoDataset from .tcn_net import TemporalConvNet from .utils import LogCoshLoss, FocalLoss, MixedNormalPDFLoss from .adabelief import AdaBelief diff --git a/src/dagobert/modelling/dl/tcn_args.py b/src/dagobert/modelling/dl/tcn_args.py index acaa4198..48370ef3 100644 --- a/src/dagobert/modelling/dl/tcn_args.py +++ b/src/dagobert/modelling/dl/tcn_args.py @@ -5,6 +5,7 @@ from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter + from pytorch_lightning import Trainer from dagobert.modelling.dl.tcn import TCNLightning From d646bf61242ea9a097f9a5235a5fa452b584668b Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Thu, 18 Feb 2021 11:44:46 +0000 Subject: [PATCH 48/62] bugz --- config/custom/tcn_config_m.yaml | 41 ++++++++++--------- config/timegan_config.yaml | 9 ++-- .../modelling/augmentation/timegan.py | 32 +++++++++------ src/dagobert/modelling/dl/tcn.py | 4 +- src/dagobert/modelling/utils.py | 6 +-- 5 files changed, 51 insertions(+), 41 deletions(-) diff --git a/config/custom/tcn_config_m.yaml b/config/custom/tcn_config_m.yaml index 02858439..64e60696 100644 --- a/config/custom/tcn_config_m.yaml +++ b/config/custom/tcn_config_m.yaml @@ -22,11 +22,11 @@ tags: - ethusdt_volume500 - simple_lookahead_y no_comet_logger: True -seed: 40 +seed: 42 batch_size: 256 early_stopping_rounds: 15 optimizer: adabelief -lr: 0.001 +lr: 'auto' max_lr: 0.1 max_lr_multiplier: 10 one_cycle_length: 60 @@ -40,7 +40,7 @@ output_size: 1 num_channels: [20, 20, 20, 20] kernel_size: 3 dropout: 0.5 -use_last_timepoint: False +use_last_timepoint: True last_y: True non_last_y_frac: 0.5 regression: False @@ -58,14 +58,14 @@ no_sample_weights: True data_dir: "C:/Users/u164428/Desktop/Dagobert/data/modelling" lookback: auto -mini_series_length: 20 +mini_series_length: auto # If this is set to a number, then simple lookahead labelling is in place simple_lookahead_y: 15 simple_lookahead_reg: False # If this is True, anchor is labelled before preprocessing. to_label and simple_lookahead_y cannot be used together. -to_label: False +to_label: True label_sl: 1 label_pt: 1 label_first_or_max: "first" @@ -84,6 +84,12 @@ cols_to_model: - high - low - close + - volume + - cum_volume_buy + - cum_volume_sell + - cum_volume_quote + - cum_volume_quote_buy + - cum_volume_quote_sell # - open_fd_0.0 # - high_fd_0.0 # - low_fd_0.0 @@ -94,12 +100,6 @@ cols_to_model: # - close_fd_tuned # - cum_ticks # - cum_dollar -# - volume -# - cum_volume_buy -# - cum_volume_sell -# - cum_volume_quote -# - cum_volume_quote_buy -# - cum_volume_quote_sell # - sin_date # - cos_date # - sin_time @@ -131,22 +131,23 @@ augment_method: random_fast augment_prob: 0.25 simple_augment_dfs: - std_bar_BTCUSDT_tick_1.feather - - std_bar_LTCUSDT_tick_1.feather - - std_bar_XRPUSDT_tick_1.feather - - std_bar_BTCUSDT_volume_100.feather - - std_bar_LTCUSDT_volume_1000.feather - - std_bar_XRPUSDT_volume_125000.feather - - std_bar_BTCUSDT_dollar_1000000.feather - - std_bar_LTCUSDT_dollar_40000.feather - - std_bar_XRPUSDT_dollar_20000.feather +# - std_bar_LTCUSDT_tick_1.feather +# - std_bar_XRPUSDT_tick_1.feather +# - std_bar_BTCUSDT_volume_100.feather +# - std_bar_LTCUSDT_volume_1000.feather +# - std_bar_XRPUSDT_volume_125000.feather +# - std_bar_BTCUSDT_dollar_1000000.feather +# - std_bar_LTCUSDT_dollar_40000.feather +# - std_bar_XRPUSDT_dollar_20000.feather simple_augment_prob: 0.5 +augment_dfs_mix: 0.33 # -------------------------------------------------------------------------------------- # PREPROCESSING # -------------------------------------------------------------------------------------- train_start_date: "2018-06-01" -train_days: 1 +train_days: 2 val_days: 1 val_train_offset_days: 1 val_puffer_days: 1 diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index 33cb093a..526c4625 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -4,7 +4,9 @@ # -------------------------------------------------------------------------------------- gpus: 0 - +pin_memory: True +val_check_interval: 1 +print_nan_grads: True # -------------------------------------------------------------------------------------- # RUN @@ -17,15 +19,14 @@ tags: - time_gan_test no_comet_logger: True seed: 42 -batch_size: 256 - +batch_size: 64 # -------------------------------------------------------------------------------------- # GAN # -------------------------------------------------------------------------------------- # gru or lstm -rnn: gru +rnn: "gru" # embedding weight in cost of generator loss emb_weight: 1 diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 8877b1af..5ca35dc6 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -30,7 +30,7 @@ from pytorch_lightning import Trainer, Callback, loggers from pytorch_lightning.metrics import functional as plm -from dagobert.naming import NStudy, NPreprocessingArgs as npa +from dagobert.naming import NStudy, NGAN, NPreprocessingArgs as npa from dagobert.modelling.dl import Preprocessing from dagobert.modelling.dl.data import GeneratorCryptoDataset @@ -118,7 +118,7 @@ def __init__( super(RnnBlock, self).__init__() # input/output: (batch, seq, feature) - if rnn == "lstm": + if rnn == NGAN.lstm: self.rnn = nn.LSTM( input_size=input_size, hidden_size=hidden_size, @@ -126,7 +126,7 @@ def __init__( dropout=dropout, batch_first=batch_first, ) - elif rnn == "gru": + elif rnn == NGAN.gru: self.rnn = nn.GRU( input_size=input_size, hidden_size=hidden_size, @@ -141,11 +141,10 @@ def __init__( self.linear_activation = linear_activation def forward(self, x): - rnn_out, _hidden = self.rnn(x) rnn_out = self.tanh(rnn_out) # todo: is there reshaping needed? - rnn_out = rnn_out.reshape(-1, self.linear_input_size) + # rnn_out = rnn_out.reshape(-1, self.linear_input_size) output = self.linear(rnn_out) if self.linear_activation: output = self.sigmoid(output) @@ -241,6 +240,7 @@ def __init__(self, hparams: Namespace): rnn=self.hparams.rnn, linear_activation=True, ) + # TODO: shape of disc (batch, time, 1) or more rather (batch, 1) self.discriminator = RnnBlock( input_size=self.hparams.hidden_size, hidden_size=self.hparams.hidden_size, @@ -269,7 +269,8 @@ def training_step(self, batch, batch_idx, optimizer_idx): Returns: Loss """ - x = batch + print(f"train_step optimizer_idx: {optimizer_idx}") + x = batch.float() batch_len = len(x) h = self.embedder(x) @@ -288,6 +289,7 @@ def training_step(self, batch, batch_idx, optimizer_idx): prog_bar=True, logger=True, ) + return loss_e elif optimizer_idx == 3: @@ -341,6 +343,7 @@ def training_step(self, batch, batch_idx, optimizer_idx): # synthetic data x_hat = self.recovery(h_hat) + # no_grad to leave discriminator unchanged with torch.no_grad(): y_fake = self.discriminator(h_hat) @@ -364,10 +367,11 @@ def training_step(self, batch, batch_idx, optimizer_idx): ) return loss_gen + # TODO: if (check_d_loss > 0.15): # update discriminator elif optimizer_idx == 4: e_hat = self.generator(z) - h_hat = self.supervisor(e_hat) + h_hat = self.supervisor(e_hat.detach()) # detach to update only discriminator y_fake = self.discriminator(h_hat.detach()) y_fake_e = self.discriminator(e_hat.detach()) @@ -386,6 +390,7 @@ def training_step(self, batch, batch_idx, optimizer_idx): ) # pytorch lightning needs to have "loss" in the return dict return { + "loss": loss_disc, "loss_disc/train": loss_disc, "y_fake/train": y_fake, "y_fake_e/train": y_fake_e, @@ -422,7 +427,9 @@ def training_epoch_end(self, outputs): return self._epoch_end(outputs, "train") def validation_step(self, batch, batch_idx): - x = batch + # change float64 to float32 + print(f"val_step batch idx: {batch_idx} LOFASZJOSKA") + x = batch.float() batch_len = len(x) # noise @@ -433,10 +440,9 @@ def validation_step(self, batch, batch_idx): device=self.tgan_device, ) - # z = z.to(self.generator.model[0].weight.dtype) - # generate fake data and compare with validation set h = self.embedder(x) + e_hat = self.generator(z) h_hat = self.supervisor(e_hat) x_hat = self.recovery(h_hat) @@ -648,8 +654,8 @@ def _epoch_end(self, outputs, prefix="val"): y_fake_e.append(x[f"y_fake_e/{prefix}"]) pca_x.append(x[f"pca_x/{prefix}"]) pca_x_hat.append(x[f"pca_x_hat/{prefix}"]) - # log sampled images - self._make_plots(y_real, y_fake, pca_x, pca_x_hat, prefix) + # log sampled images, only first batch (2 validation rounds @ start) + self._make_plots(y_real[0], y_fake[0], pca_x[0], pca_x_hat[0], prefix) # ---------------------------------------------------------------------------------- # PLOTTING AND LOGGING FUNCTIONS @@ -727,4 +733,6 @@ def _pre_sanity_check(hparams: Namespace) -> Namespace: hparams.cols_to_model[df_name] = deepcopy( hparams.cols_to_model[npa.anchor] ) + if hparams.rnn not in [NGAN.gru, NGAN.lstm]: + raise ValueError("rnn has to be either 'gru' or 'lstm'.") return hparams diff --git a/src/dagobert/modelling/dl/tcn.py b/src/dagobert/modelling/dl/tcn.py index 79dd6457..e17790a6 100644 --- a/src/dagobert/modelling/dl/tcn.py +++ b/src/dagobert/modelling/dl/tcn.py @@ -737,8 +737,8 @@ def _check_mini_series_lookback(hparams: Namespace) -> Namespace: num_channels = f"{case}num_channels" k_size = f"{case}kernel_size" if num_channels in hparams: - net_depth = len(hparams.__getattribute__(num_channels)) - k_size = hparams.__getattribute__(k_size) + net_depth = len(hparams[num_channels]) + k_size = hparams[k_size] max_seq_len = TemporalConvNet.get_tcn_receptive_field_size( k_size, net_depth ) diff --git a/src/dagobert/modelling/utils.py b/src/dagobert/modelling/utils.py index 90bf465c..69a3d876 100644 --- a/src/dagobert/modelling/utils.py +++ b/src/dagobert/modelling/utils.py @@ -423,7 +423,7 @@ def plot_anchor_sample(i, obj, x): df.plot(subplots=True, layout=(int(np.ceil((len(cols) / 4))), 4)) -def plot_pca(pca_x, pca_x_hat): +def plot_pca(pca_x, pca_x_hat) -> Figure: """ Plot PCA-reduced x and x_hat to visualise similarity. Overlap suggests similarity. Args: @@ -431,10 +431,10 @@ def plot_pca(pca_x, pca_x_hat): pca_x_hat: 2-component-PCA of x_hat Returns: - Scatter plot showing 2-component-PCA of x & x_hat. + Scatter plot showing 2-component-PCA of x & x_hat. """ - f, ax = plt.subplots(1) + length = pca_x.shape[0] colors = ["red" for i in range(length)] + ["blue" for i in range(length)] plt.scatter(pca_x[:, 0], pca_x[:, 1], c=colors[:length], alpha=0.2, label="Real") From 26f41b06b41a2d3c0398309cd46f036991029a0f Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Mon, 22 Feb 2021 17:36:15 +0000 Subject: [PATCH 49/62] just do it --- .../modelling/augmentation/timegan.py | 69 ++++++++++--------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 5ca35dc6..2059e549 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -79,11 +79,11 @@ def run_tgan(args): # setup callbacks checkpoint_callback = ModelCheckpoint( - monitor="loss_gen", + monitor="loss_disc/val", filename="_{epoch:02d}_{avg_reward:.10f}", dirpath=f"{args.log_dir}/models/{args.exp_name}_{tb_logger.version}", save_top_k=3, - mode="min", + mode="max", ) # define trainer and and lightning module @@ -134,7 +134,7 @@ def __init__( dropout=dropout, batch_first=batch_first, ) - self.linear_input_size = linear_input_size + self.linear_output_size = linear_output_size self.tanh = nn.Tanh() self.linear = nn.Linear(linear_input_size, linear_output_size) self.sigmoid = nn.Sigmoid() @@ -143,11 +143,11 @@ def __init__( def forward(self, x): rnn_out, _hidden = self.rnn(x) rnn_out = self.tanh(rnn_out) - # todo: is there reshaping needed? - # rnn_out = rnn_out.reshape(-1, self.linear_input_size) + # reshape if net is the discirminator + if self.linear_output_size == 1: + rnn_out = rnn_out.reshape(rnn_out.shape[0], -1) output = self.linear(rnn_out) - if self.linear_activation: - output = self.sigmoid(output) + output = self.sigmoid(output) return output @@ -245,7 +245,8 @@ def __init__(self, hparams: Namespace): input_size=self.hparams.hidden_size, hidden_size=self.hparams.hidden_size, num_layers=self.hparams.num_layers, - linear_input_size=self.hparams.hidden_size, + linear_input_size=self.hparams.hidden_size + * self.hparams.mini_series_length, linear_output_size=1, dropout=self.hparams.dropout, batch_first=True, @@ -282,7 +283,7 @@ def training_step(self, batch, batch_idx, optimizer_idx): if optimizer_idx == 0: loss_e = TimeGANLightning.embed_loss0(x_tilde, x) self.log( - "loss_e", + "loss_e/train", loss_e, on_step=False, on_epoch=True, @@ -301,7 +302,7 @@ def training_step(self, batch, batch_idx, optimizer_idx): h, ) self.log( - "loss_embed", + "loss_embed/train", loss_embed, on_step=False, on_epoch=True, @@ -315,7 +316,7 @@ def training_step(self, batch, batch_idx, optimizer_idx): h_hat_supervise = self.supervisor(h) loss_supervisor = TimeGANLightning.supervisor_loss(h_hat_supervise, h) self.log( - "loss_supervisor", + "loss_supervisor/train", loss_supervisor, on_step=False, on_epoch=True, @@ -358,7 +359,7 @@ def training_step(self, batch, batch_idx, optimizer_idx): self.hparams.emb_weight, ) self.log( - "loss_gen", + "loss_gen/train", loss_gen, on_step=False, on_epoch=True, @@ -423,12 +424,8 @@ def configure_optimizers(self) -> List[optim.Optimizer]: def train_dataloader(self): return self._get_dataloader(self.hparams.df_train, "train") - def training_epoch_end(self, outputs): - return self._epoch_end(outputs, "train") - def validation_step(self, batch, batch_idx): # change float64 to float32 - print(f"val_step batch idx: {batch_idx} LOFASZJOSKA") x = batch.float() batch_len = len(x) @@ -442,7 +439,6 @@ def validation_step(self, batch, batch_idx): # generate fake data and compare with validation set h = self.embedder(x) - e_hat = self.generator(z) h_hat = self.supervisor(e_hat) x_hat = self.recovery(h_hat) @@ -476,6 +472,9 @@ def validation_step(self, batch, batch_idx): def validation_epoch_end(self, outputs): return self._epoch_end(outputs, "val") + def training_epoch_end(self, outputs): + return self._epoch_end(outputs, "train") + def val_dataloader(self): return self._get_dataloader(self.hparams.df_val, "val") @@ -536,7 +535,8 @@ def embed_loss0(x_tilde, x): def supervisor_loss(h_hat_supervise, h): """ This loss further ensures that generator produces similar stepwise transitions - (evaluated by ground-truth targets). + (evaluated by ground-truth targets). Responsible to capture how well the + generator approximates the next time step in the latent space. Args: h_hat_supervise: supervisors output from feeding h (real embedding) through h: real embedding defined by embedder net @@ -641,21 +641,24 @@ def _epoch_end(self, outputs, prefix="val"): Returns: """ - avg_loss = [] - y_real = [] - y_fake = [] - y_fake_e = [] - pca_x = [] - pca_x_hat = [] - for x in outputs: - avg_loss.append(x[f"loss_disc/{prefix}"]) - y_real.append(x[f"y_real/{prefix}"]) - y_fake.append(x[f"y_fake/{prefix}"]) - y_fake_e.append(x[f"y_fake_e/{prefix}"]) - pca_x.append(x[f"pca_x/{prefix}"]) - pca_x_hat.append(x[f"pca_x_hat/{prefix}"]) - # log sampled images, only first batch (2 validation rounds @ start) - self._make_plots(y_real[0], y_fake[0], pca_x[0], pca_x_hat[0], prefix) + if prefix == "train": + pass + elif prefix == "val": + avg_loss = [] + y_real = [] + y_fake = [] + y_fake_e = [] + pca_x = [] + pca_x_hat = [] + for x in outputs: + avg_loss.append(x[f"loss_disc/{prefix}"]) + y_real.append(x[f"y_real/{prefix}"]) + y_fake.append(x[f"y_fake/{prefix}"]) + y_fake_e.append(x[f"y_fake_e/{prefix}"]) + pca_x.append(x[f"pca_x/{prefix}"]) + pca_x_hat.append(x[f"pca_x_hat/{prefix}"]) + # log sampled images, only first batch (2 validation rounds @ start) + self._make_plots(y_real[0], y_fake[0], pca_x[0], pca_x_hat[0], prefix) # ---------------------------------------------------------------------------------- # PLOTTING AND LOGGING FUNCTIONS From de5339976ac836bb7829a1932154a0788675d48f Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Tue, 23 Feb 2021 14:35:03 +0000 Subject: [PATCH 50/62] corrections --- config/timegan_config.yaml | 6 ++-- .../modelling/augmentation/timegan.py | 29 ++++++++++++------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index 526c4625..78befdae 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -5,7 +5,7 @@ gpus: 0 pin_memory: True -val_check_interval: 1 +val_check_interval: 0.5 print_nan_grads: True # -------------------------------------------------------------------------------------- @@ -44,7 +44,7 @@ optimizer: "adamw" dropout: 0.2 num_layers: 1 hidden_size: 10 -z_dim: 12 +z_dim: 32 mini_series_length: 20 # don't change order with lr dict lr: @@ -98,7 +98,7 @@ augment_dfs_mix: 0 # -------------------------------------------------------------------------------------- train_start_date: "2019-01-01" -train_days: 1 +train_days: 10 val_days: 1 val_train_offset_days: 1 val_puffer_days: 1 diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 2059e549..0f6992ef 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -143,11 +143,13 @@ def __init__( def forward(self, x): rnn_out, _hidden = self.rnn(x) rnn_out = self.tanh(rnn_out) - # reshape if net is the discirminator + # reshape if net is the discriminator, and no activation if self.linear_output_size == 1: rnn_out = rnn_out.reshape(rnn_out.shape[0], -1) - output = self.linear(rnn_out) - output = self.sigmoid(output) + output = self.linear(rnn_out) + else: + output = self.linear(rnn_out) + output = self.sigmoid(output) return output @@ -270,7 +272,6 @@ def training_step(self, batch, batch_idx, optimizer_idx): Returns: Loss """ - print(f"train_step optimizer_idx: {optimizer_idx}") x = batch.float() batch_len = len(x) @@ -573,8 +574,9 @@ def generator_loss( Loss """ # adversarial - g_loss_u = nn.BCELoss()(y_fake, torch.ones_like(y_fake)) - g_loss_u_e = nn.BCELoss()(y_fake_e, torch.ones_like(y_fake_e)) + criterion = nn.BCEWithLogitsLoss() + g_loss_u = criterion(y_fake, torch.ones_like(y_fake)) + g_loss_u_e = criterion(y_fake_e, torch.ones_like(y_fake_e)) w_g_loss_u_e = emb_weight * g_loss_u_e # supervisor g_loss_s = TimeGANLightning.supervisor_loss(h_hat_supervise, h) @@ -608,7 +610,8 @@ def embedder_loss(x_tilde, x, h_hat_supervise, h): @staticmethod def discriminator_loss(y_fake, y_fake_e, y_real, emb_weight): """ - Discriminator’s binary adversarial feedback, both on fake and real data. + Discriminator’s binary adversarial feedback, both on fake and real data. Real + data is labelled as 1, fake as 0. Args: y_fake: logits for classification of fakes (from h_hat) y_fake_e: logits for classification of fake embeddings (from e_hat) @@ -618,13 +621,11 @@ def discriminator_loss(y_fake, y_fake_e, y_real, emb_weight): Returns: Loss """ - # TODO: is this the correct loss? discriminator returns logits w/out activation. - # changed compared to original TF implementation criterion = nn.BCEWithLogitsLoss() d_loss_fake_e = criterion(y_fake_e, torch.zeros_like(y_fake_e)) d_loss_fake = criterion(y_fake, torch.zeros_like(y_fake)) d_loss_real = criterion(y_real, torch.ones_like(y_real)) - # TODO: any use of dividing loss by (2 + emb_weight)? + # TODO: any use of dividing loss by (2 + emb_weight)? probably not return emb_weight * d_loss_fake_e + d_loss_fake + d_loss_real # ---------------------------------------------------------------------------------- @@ -711,9 +712,15 @@ def _make_plots(self, y_real, y_fake, pca_x, pca_x_hat, prefix): ) # HISTOGRAM + # discirminator's take on real data + y_real_class = (torch.sigmoid(y_real) > 0.5).int() + y_real_for_hist = torch.sigmoid(y_real) + # discirminator's take on fake data + y_fake_class = (torch.sigmoid(y_fake) > 0.5).int() + y_fake_for_hist = torch.sigmoid(y_fake) self._log_image( f"real v fake hist/{prefix}", - hist_from_tensor(y_real, y_fake), + hist_from_tensor(y_real, y_real_for_hist), self.current_epoch, ) From 4f055d3cb0aac09f311a7cd84d3be385b06b0acf Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Wed, 24 Feb 2021 20:02:31 +0000 Subject: [PATCH 51/62] TGAN --- .../modelling/augmentation/timegan.py | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 0f6992ef..7c691182 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -100,7 +100,7 @@ def run_tgan(args): class RnnBlock(nn.Module): """ - Class for creating 5 components of TimeGAN. + Class for creating 5 different rnn-based nets as components of TimeGAN. """ def __init__( @@ -265,8 +265,8 @@ def training_step(self, batch, batch_idx, optimizer_idx): """ Carries out updates to networks from a batch of real samples. Args: - batch: batch of - batch_idx: + batch: batch of X. + batch_idx: idx of batch optimizer_idx: idx that controls optimizing the 5 networks Returns: @@ -390,14 +390,16 @@ def training_step(self, batch, batch_idx, optimizer_idx): prog_bar=True, logger=True, ) - # pytorch lightning needs to have "loss" in the return dict - return { - "loss": loss_disc, - "loss_disc/train": loss_disc, - "y_fake/train": y_fake, - "y_fake_e/train": y_fake_e, - "y_real/train": y_real, - } + # limit discriminator from being "too good" + if loss_disc > 0.15: + # pytorch lightning needs to have "loss" in the return dict + return { + "loss": loss_disc, + "loss_disc/train": loss_disc, + "y_fake/train": y_fake, + "y_fake_e/train": y_fake_e, + "y_real/train": y_real, + } def configure_optimizers(self) -> List[optim.Optimizer]: """ @@ -625,7 +627,7 @@ def discriminator_loss(y_fake, y_fake_e, y_real, emb_weight): d_loss_fake_e = criterion(y_fake_e, torch.zeros_like(y_fake_e)) d_loss_fake = criterion(y_fake, torch.zeros_like(y_fake)) d_loss_real = criterion(y_real, torch.ones_like(y_real)) - # TODO: any use of dividing loss by (2 + emb_weight)? probably not + # TODO: any use of dividing loss by (2 + emb_weight)? probably readability return emb_weight * d_loss_fake_e + d_loss_fake + d_loss_real # ---------------------------------------------------------------------------------- @@ -637,7 +639,7 @@ def _epoch_end(self, outputs, prefix="val"): and log them. Finally, we make plots using all the y_true and y_preds. Args: outputs: - prefix: + prefix: indicates train or val epoch end Returns: From 9c4913b62d0de07b27fbe9ef79171a7a2f17d931 Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Sun, 28 Feb 2021 21:33:40 +0000 Subject: [PATCH 52/62] fixed optimizers as per TGAN paper. not sure if/why images log or not or what the HEKK --- .../modelling/augmentation/timegan.py | 97 +++++++++---------- src/dagobert/modelling/utils.py | 8 +- 2 files changed, 49 insertions(+), 56 deletions(-) diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 7c691182..5365b70c 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -186,9 +186,7 @@ def __init__(self, hparams: Namespace): self.tgan_device = "cuda" if hparams.gpus > 0 else "cpu" # prepare datafiles if necessary self.hparams = Preprocessing().preprocess_train_dfs(hparams) - # TODO: any sanity checks on data, hypermparams - self.real_logging = None self.comet_logging = not self.hparams.no_comet_logger @@ -277,8 +275,8 @@ def training_step(self, batch, batch_idx, optimizer_idx): h = self.embedder(x) - # optimizers #0 & #3 update embedder nets - if optimizer_idx in [0, 3]: + # optimizers #0, #3 & #5 update embedder nets + if optimizer_idx in [0, 3, 5]: x_tilde = self.recovery(h) # optimize embedding via embedder and recovery nets if optimizer_idx == 0: @@ -294,7 +292,7 @@ def training_step(self, batch, batch_idx, optimizer_idx): return loss_e - elif optimizer_idx == 3: + elif optimizer_idx in [3, 5]: h_hat_supervise = self.supervisor(h) loss_embed = TimeGANLightning.embedder_loss( x_tilde, @@ -326,9 +324,8 @@ def training_step(self, batch, batch_idx, optimizer_idx): ) return loss_supervisor - # TODO: If you need to control how often those optimizers step or override - # the default .step() schedule, override the optimizer_step() hook. - elif optimizer_idx in [2, 4]: + # TODO: can we streamline 7 optimizers to 5 with optimizer_step() hook? + elif optimizer_idx in [2, 4, 6]: # random input to generator z = get_noise( batch_len, @@ -337,41 +334,39 @@ def training_step(self, batch, batch_idx, optimizer_idx): device=self.tgan_device, ) # update generator - if optimizer_idx == 2: - for i in range(2): - e_hat = self.generator(z) - h_hat = self.supervisor(e_hat) - h_hat_supervise = self.supervisor(h) - - # synthetic data - x_hat = self.recovery(h_hat) - - # no_grad to leave discriminator unchanged - with torch.no_grad(): - y_fake = self.discriminator(h_hat) - y_fake_e = self.discriminator(e_hat) - loss_gen = TimeGANLightning.generator_loss( - y_fake, - y_fake_e, - h, - h_hat_supervise, - x, - x_hat, - self.hparams.emb_weight, - ) - self.log( - "loss_gen/train", - loss_gen, - on_step=False, - on_epoch=True, - prog_bar=True, - logger=True, - ) - return loss_gen + if optimizer_idx in [2, 4]: + e_hat = self.generator(z) + h_hat = self.supervisor(e_hat) + h_hat_supervise = self.supervisor(h) + + # synthetic data + x_hat = self.recovery(h_hat) + + # no_grad to leave discriminator unchanged + with torch.no_grad(): + y_fake = self.discriminator(h_hat) + y_fake_e = self.discriminator(e_hat) + loss_gen = TimeGANLightning.generator_loss( + y_fake, + y_fake_e, + h, + h_hat_supervise, + x, + x_hat, + self.hparams.emb_weight, + ) + self.log( + "loss_gen/train", + loss_gen, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) + return loss_gen - # TODO: if (check_d_loss > 0.15): # update discriminator - elif optimizer_idx == 4: + elif optimizer_idx == 6: e_hat = self.generator(z) h_hat = self.supervisor(e_hat.detach()) # detach to update only discriminator @@ -411,6 +406,8 @@ def configure_optimizers(self) -> List[optim.Optimizer]: list(self.generator.parameters()) + list(self.supervisor.parameters()), list(self.generator.parameters()) + list(self.supervisor.parameters()), list(self.embedder.parameters()) + list(self.recovery.parameters()), + list(self.generator.parameters()) + list(self.supervisor.parameters()), + list(self.embedder.parameters()) + list(self.recovery.parameters()), list(self.discriminator.parameters()), ] # TODO: diff lr for each net @@ -683,12 +680,11 @@ def _log_image(self, image_name, image_data, i): """ Logs any generated image to both tensorboard and comet. """ - if self.real_logging: - self.logger.experiment[0].add_image(image_name, fig_to_tb(image_data), i) - if self.comet_logging: - self.logger.experiment[1].log_image( - fig_to_comet(image_data), name=image_name, step=i - ) + self.logger.experiment[0].add_image(image_name, fig_to_tb(image_data), i) + if self.comet_logging: + self.logger.experiment[1].log_image( + fig_to_comet(image_data), name=image_name, step=i + ) def _log_graph(self, datasets: GeneratorCryptoDataset): """ @@ -697,8 +693,7 @@ def _log_graph(self, datasets: GeneratorCryptoDataset): examples_dataloader = DataLoader(datasets, batch_size=32) example_shapes = [xi.shape for xi in next(iter(examples_dataloader))[0]] examples = [torch.rand(*s).float().to(self.tgan_device) for s in example_shapes] - if self.real_logging: - self.logger.experiment[0].add_graph(self, examples) + self.logger.experiment[0].add_graph(self, examples) def _make_plots(self, y_real, y_fake, pca_x, pca_x_hat, prefix): """ @@ -714,10 +709,10 @@ def _make_plots(self, y_real, y_fake, pca_x, pca_x_hat, prefix): ) # HISTOGRAM - # discirminator's take on real data + # discriminator's take on real data y_real_class = (torch.sigmoid(y_real) > 0.5).int() y_real_for_hist = torch.sigmoid(y_real) - # discirminator's take on fake data + # discriminator's take on fake data y_fake_class = (torch.sigmoid(y_fake) > 0.5).int() y_fake_for_hist = torch.sigmoid(y_fake) self._log_image( diff --git a/src/dagobert/modelling/utils.py b/src/dagobert/modelling/utils.py index 69a3d876..0de5e3ff 100644 --- a/src/dagobert/modelling/utils.py +++ b/src/dagobert/modelling/utils.py @@ -435,19 +435,17 @@ def plot_pca(pca_x, pca_x_hat) -> Figure: """ f, ax = plt.subplots(1) - length = pca_x.shape[0] - colors = ["red" for i in range(length)] + ["blue" for i in range(length)] - plt.scatter(pca_x[:, 0], pca_x[:, 1], c=colors[:length], alpha=0.2, label="Real") + plt.scatter(pca_x[:, 0], pca_x[:, 1], c="black", alpha=0.2, label="Real") plt.scatter( pca_x_hat[:, 0], pca_x_hat[:, 1], - c=colors[length:], + c="red", alpha=0.2, label="Synthetic", ) ax.legend() plt.title("PCA plot") - plt.xlabel("x-pca") + plt.xlabel("x_pca") plt.ylabel("y_pca") plt.close() return f From efc0556abacc16373df3956f81eb9c0bdeb430a1 Mon Sep 17 00:00:00 2001 From: MateMarcell Date: Sun, 28 Feb 2021 21:42:42 +0000 Subject: [PATCH 53/62] fix fix --- config/timegan_config.yaml | 2 ++ src/dagobert/modelling/augmentation/timegan.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index 78befdae..59bf1bc3 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -52,6 +52,8 @@ lr: supervisor: 0.001 generator: 0.001 embedder1: 0.001 + generator_: 0.001 + embedder1_: 0.001 discriminator: 0.001 # -------------------------------------------------------------------------------------- diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 5365b70c..99263636 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -410,7 +410,6 @@ def configure_optimizers(self) -> List[optim.Optimizer]: list(self.embedder.parameters()) + list(self.recovery.parameters()), list(self.discriminator.parameters()), ] - # TODO: diff lr for each net if "adamw" in self.hparams.optimizer.lower(): for param_pair, network in zip(param_pairs, self.hparams.lr.keys()): optimizer = torch.optim.AdamW(param_pair, lr=self.hparams.lr[network]) From cc8f9b80d551e5be36717998dd24433d7340f9d0 Mon Sep 17 00:00:00 2001 From: marcell_mate Date: Wed, 17 Mar 2021 19:55:59 +0000 Subject: [PATCH 54/62] testing on local and adding more flexibility --- config/timegan_config.yaml | 48 +++++++++++-------- .../modelling/augmentation/timegan.py | 34 +++++++++---- src/dagobert/modelling/dl/preprocessing.py | 1 + src/dagobert/naming.py | 5 ++ 4 files changed, 61 insertions(+), 27 deletions(-) diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index 59bf1bc3..eb5de9e2 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -3,7 +3,7 @@ # LIGHTNING # -------------------------------------------------------------------------------------- -gpus: 0 +gpus: 1 pin_memory: True val_check_interval: 0.5 print_nan_grads: True @@ -19,14 +19,14 @@ tags: - time_gan_test no_comet_logger: True seed: 42 -batch_size: 64 +batch_size: 128 # -------------------------------------------------------------------------------------- # GAN # -------------------------------------------------------------------------------------- # gru or lstm -rnn: "gru" +rnn: "lstm" # embedding weight in cost of generator loss emb_weight: 1 @@ -41,20 +41,28 @@ binariser_method: # -------------------------------------------------------------------------------------- optimizer: "adamw" -dropout: 0.2 -num_layers: 1 -hidden_size: 10 +dropout: + recovery: 0.2 + embedder: 0.2 + supervisor: 0.2 + generator: 0.2 + discriminator: 0.2 + +num_layers: 2 +hidden_size: 32 z_dim: 32 -mini_series_length: 20 -# don't change order with lr dict +mini_series_length: 256 + +# don't change order with lr dict. +# generator_, embedder1_ separated out for ease of code for now. keep lr constant lr: - embedder0: 0.001 - supervisor: 0.001 - generator: 0.001 - embedder1: 0.001 - generator_: 0.001 - embedder1_: 0.001 - discriminator: 0.001 + embedder0: 0.0005 + supervisor: 0.0005 + generator: 0.0005 + embedder1: 0.0005 + generator_: 0.0005 + embedder1_: 0.0005 + discriminator: 0.0005 # -------------------------------------------------------------------------------------- # DATA @@ -62,11 +70,13 @@ lr: #data_dir: "C:/Work/dagobert/data/modelling" #data_dir: "/home/daniel/dagobert_data/modelling" -data_dir: "C:/Users/u164428/Desktop/Dagobert/data/modelling" +data_dir: "C:/Users/marcell/d/data/modelling" df_train: anchor: std_bar_ETHUSDT_tick_1.feather +# anchor: std_bar_ETHUSDT_tick_1.feather + df_val: df_test: @@ -78,8 +88,8 @@ cols_to_model: - high - low - close - - cum_ticks - - cum_dollar +# - cum_ticks +# - cum_dollar - volume - cum_volume_buy - cum_volume_sell @@ -100,7 +110,7 @@ augment_dfs_mix: 0 # -------------------------------------------------------------------------------------- train_start_date: "2019-01-01" -train_days: 10 +train_days: 300 val_days: 1 val_train_offset_days: 1 val_puffer_days: 1 diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 99263636..b429e3cc 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -79,11 +79,11 @@ def run_tgan(args): # setup callbacks checkpoint_callback = ModelCheckpoint( - monitor="loss_disc/val", + monitor="loss_gen/val", filename="_{epoch:02d}_{avg_reward:.10f}", dirpath=f"{args.log_dir}/models/{args.exp_name}_{tb_logger.version}", - save_top_k=3, - mode="max", + save_top_k=5, + mode="min", ) # define trainer and and lightning module @@ -201,7 +201,7 @@ def __init__(self, hparams: Namespace): num_layers=self.hparams.num_layers, linear_input_size=self.hparams.hidden_size, linear_output_size=self.hparams.hidden_size, - dropout=self.hparams.dropout, + dropout=self.hparams.dropout[NGAN.generator], batch_first=True, rnn=self.hparams.rnn, linear_activation=True, @@ -212,7 +212,7 @@ def __init__(self, hparams: Namespace): num_layers=self.hparams.num_layers, linear_input_size=self.hparams.hidden_size, linear_output_size=self.hparams.hidden_size, - dropout=self.hparams.dropout, + dropout=self.hparams.dropout[NGAN.embedder], batch_first=True, rnn=self.hparams.rnn, linear_activation=True, @@ -224,7 +224,7 @@ def __init__(self, hparams: Namespace): num_layers=self.hparams.num_layers, linear_input_size=self.hparams.hidden_size, linear_output_size=self.hparams.hidden_size, - dropout=self.hparams.dropout, + dropout=self.hparams.dropout[NGAN.supervisor], batch_first=True, rnn=self.hparams.rnn, linear_activation=True, @@ -235,7 +235,7 @@ def __init__(self, hparams: Namespace): num_layers=self.hparams.num_layers, linear_input_size=self.hparams.hidden_size, linear_output_size=all_inputs, - dropout=self.hparams.dropout, + dropout=self.hparams.dropout[NGAN.recovery], batch_first=True, rnn=self.hparams.rnn, linear_activation=True, @@ -248,7 +248,7 @@ def __init__(self, hparams: Namespace): linear_input_size=self.hparams.hidden_size * self.hparams.mini_series_length, linear_output_size=1, - dropout=self.hparams.dropout, + dropout=self.hparams.dropout[NGAN.discriminator], batch_first=True, rnn=self.hparams.rnn, linear_activation=False, @@ -441,6 +441,7 @@ def validation_step(self, batch, batch_idx): e_hat = self.generator(z) h_hat = self.supervisor(e_hat) x_hat = self.recovery(h_hat) + h_hat_supervise = self.supervisor(h) y_fake = self.discriminator(h_hat.detach()) y_fake_e = self.discriminator(e_hat.detach()) @@ -448,6 +449,23 @@ def validation_step(self, batch, batch_idx): pca_x, pca_x_hat = pca_analysis(t2n(x), t2n(x_hat)) + loss_gen = TimeGANLightning.generator_loss( + y_fake, + y_fake_e, + h, + h_hat_supervise, + x, + x_hat, + self.hparams.emb_weight, + ) + self.log( + "loss_gen/val", + loss_gen, + on_step=False, + on_epoch=True, + prog_bar=True, + logger=True, + ) loss_disc = TimeGANLightning.discriminator_loss( y_fake, y_fake_e, y_real, self.hparams.emb_weight ) diff --git a/src/dagobert/modelling/dl/preprocessing.py b/src/dagobert/modelling/dl/preprocessing.py index 5654064b..dddf7c82 100644 --- a/src/dagobert/modelling/dl/preprocessing.py +++ b/src/dagobert/modelling/dl/preprocessing.py @@ -345,6 +345,7 @@ def _preprocess_train_dfs( df_to_scale = df_split[cols_to_scale].values.reshape(-1, 1) log_msg = f"{cols_to_scale} of {df_name} {df_split_name} with {id(sc)}." try: + #todo: scikit 0.24+ needs values.reshape(-1,1) to transform check_is_fitted(sc) df_split[cols_to_scale] = sc.transform(df_to_scale) logger.info(f"Transformed {log_msg}") diff --git a/src/dagobert/naming.py b/src/dagobert/naming.py index fd2da5a8..f5af7438 100644 --- a/src/dagobert/naming.py +++ b/src/dagobert/naming.py @@ -293,3 +293,8 @@ class NGAN(object): gru = "gru" lstm = "lstm" + embedder = "embedder" + supervisor = "supervisor" + generator = "generator" + recovery = "recovery" + discriminator = "discriminator" From 79b9ee22fecf0f0ee55cb654fc62ca60e184292f Mon Sep 17 00:00:00 2001 From: marcell_mate Date: Wed, 17 Mar 2021 20:12:38 +0000 Subject: [PATCH 55/62] black. run! --- src/dagobert/modelling/augmentation/timegan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index b429e3cc..32bc7a38 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -263,7 +263,7 @@ def training_step(self, batch, batch_idx, optimizer_idx): """ Carries out updates to networks from a batch of real samples. Args: - batch: batch of X. + batch: batch of X batch_idx: idx of batch optimizer_idx: idx that controls optimizing the 5 networks From f7c93d06d9e5002835782faca83abf54d241f0d9 Mon Sep 17 00:00:00 2001 From: marcell_mate Date: Wed, 24 Mar 2021 22:47:32 +0000 Subject: [PATCH 56/62] more graphs --- config/timegan_config.yaml | 13 +++---- .../modelling/augmentation/timegan.py | 36 +++++++++++++++---- src/dagobert/modelling/augmentation/utils.py | 23 ++++++++++++ src/dagobert/modelling/utils.py | 28 +++++++++++++++ 4 files changed, 88 insertions(+), 12 deletions(-) diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index eb5de9e2..cec6b6de 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -26,7 +26,7 @@ batch_size: 128 # -------------------------------------------------------------------------------------- # gru or lstm -rnn: "lstm" +rnn: "gru" # embedding weight in cost of generator loss emb_weight: 1 @@ -48,10 +48,10 @@ dropout: generator: 0.2 discriminator: 0.2 -num_layers: 2 -hidden_size: 32 +num_layers: 1 +hidden_size: 24 z_dim: 32 -mini_series_length: 256 +mini_series_length: 128 # don't change order with lr dict. # generator_, embedder1_ separated out for ease of code for now. keep lr constant @@ -75,7 +75,7 @@ data_dir: "C:/Users/marcell/d/data/modelling" df_train: anchor: std_bar_ETHUSDT_tick_1.feather -# anchor: std_bar_ETHUSDT_tick_1.feather +# df2: std_bar_BTCUSDT_tick_1.feather df_val: df_test: @@ -100,6 +100,7 @@ cols_to_model: # - cos_date # - sin_time # - cos_time +# df2: augment_method: augment_dfs: @@ -110,7 +111,7 @@ augment_dfs_mix: 0 # -------------------------------------------------------------------------------------- train_start_date: "2019-01-01" -train_days: 300 +train_days: 100 val_days: 1 val_train_offset_days: 1 val_puffer_days: 1 diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 32bc7a38..c1197da8 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -36,7 +36,11 @@ from dagobert.modelling.dl.data import GeneratorCryptoDataset from dagobert.modelling.dl import AdaBelief -from dagobert.modelling.augmentation.utils import get_noise, pca_analysis +from dagobert.modelling.augmentation.utils import ( + get_noise, + pca_analysis, + tsne_analysis, +) from dagobert.modelling.utils import ( triple_barrier_error, non_vertical_error, @@ -48,6 +52,7 @@ fig_to_tb, fig_to_comet, plot_pca, + plot_tsne, update_lookback, plot_anchor_sample, ) @@ -448,6 +453,7 @@ def validation_step(self, batch, batch_idx): y_real = self.discriminator(h.detach()) pca_x, pca_x_hat = pca_analysis(t2n(x), t2n(x_hat)) + tsne_x, tsne_x_hat = tsne_analysis(t2n(x), t2n(x_hat)) loss_gen = TimeGANLightning.generator_loss( y_fake, @@ -484,6 +490,8 @@ def validation_step(self, batch, batch_idx): "y_real/val": y_real, "pca_x/val": pca_x, "pca_x_hat/val": pca_x_hat, + "tsne_x/val": tsne_x, + "tsne_x_hat/val": tsne_x_hat, } def validation_epoch_end(self, outputs): @@ -654,9 +662,6 @@ def _epoch_end(self, outputs, prefix="val"): Args: outputs: prefix: indicates train or val epoch end - - Returns: - """ if prefix == "train": pass @@ -667,6 +672,8 @@ def _epoch_end(self, outputs, prefix="val"): y_fake_e = [] pca_x = [] pca_x_hat = [] + tsne_x = [] + tsne_x_hat = [] for x in outputs: avg_loss.append(x[f"loss_disc/{prefix}"]) y_real.append(x[f"y_real/{prefix}"]) @@ -674,8 +681,18 @@ def _epoch_end(self, outputs, prefix="val"): y_fake_e.append(x[f"y_fake_e/{prefix}"]) pca_x.append(x[f"pca_x/{prefix}"]) pca_x_hat.append(x[f"pca_x_hat/{prefix}"]) + tsne_x.append(x[f"tsne_x/{prefix}"]) + tsne_x_hat.append(x[f"tsne_x_hat/{prefix}"]) # log sampled images, only first batch (2 validation rounds @ start) - self._make_plots(y_real[0], y_fake[0], pca_x[0], pca_x_hat[0], prefix) + self._make_plots( + y_real[0], + y_fake[0], + pca_x[0], + pca_x_hat[0], + tsne_x[0], + tsne_x_hat[0], + prefix, + ) # ---------------------------------------------------------------------------------- # PLOTTING AND LOGGING FUNCTIONS @@ -712,7 +729,7 @@ def _log_graph(self, datasets: GeneratorCryptoDataset): examples = [torch.rand(*s).float().to(self.tgan_device) for s in example_shapes] self.logger.experiment[0].add_graph(self, examples) - def _make_plots(self, y_real, y_fake, pca_x, pca_x_hat, prefix): + def _make_plots(self, y_real, y_fake, pca_x, pca_x_hat, tsne_x, tsne_x_hat, prefix): """ Makes following useful summary plots: - plotting 2-dim PCA for visualising diversity learned @@ -725,6 +742,13 @@ def _make_plots(self, y_real, y_fake, pca_x, pca_x_hat, prefix): self.current_epoch, ) + # TSNE SCATTER + self._log_image( + f"real v fake TSNE-scatter/{prefix}", + plot_tsne(tsne_x, tsne_x_hat), + self.current_epoch, + ) + # HISTOGRAM # discriminator's take on real data y_real_class = (torch.sigmoid(y_real) > 0.5).int() diff --git a/src/dagobert/modelling/augmentation/utils.py b/src/dagobert/modelling/augmentation/utils.py index 11351f66..72fde853 100644 --- a/src/dagobert/modelling/augmentation/utils.py +++ b/src/dagobert/modelling/augmentation/utils.py @@ -3,6 +3,7 @@ import torch import numpy as np from sklearn.decomposition import PCA +from sklearn.manifold import TSNE def get_noise(n_samples: int, mini_series_length: int, z_dim: int, device: str = "cpu"): @@ -42,3 +43,25 @@ def pca_analysis(x, x_hat, components: int = 2): pca_results = pca.transform(x) pca_hat_results = pca.transform(x_hat) return pca_results, pca_hat_results + + +def tsne_analysis(x, x_hat, components: int = 2, n_iter=300): + """ + TSNE on 2 (real and synthetic) datasets + Args: + x: real data of shape (batch, time, feature) + x_hat: synthetic data of the same shape + components: number of components to keep + + Returns: + 2 arrays of TSNE-reduced real and synthetic data + """ + x = np.mean(x, 2) + x_hat = np.mean(x_hat, 2) + batch_len = x.shape[0] + + tsne = TSNE(n_components=components, n_iter=n_iter) + tsne_all = tsne.fit_transform(np.concatenate((x, x_hat), axis=0)) + tsne_results = tsne_all[:batch_len] + tsne_hat_results = tsne_all[batch_len:] + return tsne_results, tsne_hat_results diff --git a/src/dagobert/modelling/utils.py b/src/dagobert/modelling/utils.py index 0de5e3ff..c84612bb 100644 --- a/src/dagobert/modelling/utils.py +++ b/src/dagobert/modelling/utils.py @@ -449,3 +449,31 @@ def plot_pca(pca_x, pca_x_hat) -> Figure: plt.ylabel("y_pca") plt.close() return f + + +def plot_tsne(tsne_x, tsne_x_hat) -> Figure: + """ + Plot TSNE-reduced x and x_hat to visualise similarity. Overlap suggests similarity. + Args: + tsne_x: 2-component-PCA of x + tsne_x_hat: 2-component-PCA of x_hat + + Returns: + Scatter plot showing 2-component-TSNE of x & x_hat. + """ + f, ax = plt.subplots(1) + + plt.scatter(tsne_x[:, 0], tsne_x[:, 1], c="black", alpha=0.2, label="Real") + plt.scatter( + tsne_x_hat[:, 0], + tsne_x_hat[:, 1], + c="red", + alpha=0.2, + label="Synthetic", + ) + ax.legend() + plt.title("TSNE plot") + plt.xlabel("x_tsne") + plt.ylabel("y_tsne") + plt.close() + return f From 337420dd6d6a5bb196a026e560ec57472aaba4bc Mon Sep 17 00:00:00 2001 From: marcell_mate Date: Thu, 25 Mar 2021 11:50:57 +0000 Subject: [PATCH 57/62] keep models based on training loss --- src/dagobert/modelling/augmentation/timegan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index c1197da8..59c6b2b5 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -84,7 +84,7 @@ def run_tgan(args): # setup callbacks checkpoint_callback = ModelCheckpoint( - monitor="loss_gen/val", + monitor="loss_gen/train", filename="_{epoch:02d}_{avg_reward:.10f}", dirpath=f"{args.log_dir}/models/{args.exp_name}_{tb_logger.version}", save_top_k=5, From 951ffcdb60db66c67454fb5e84fb227d9c72673e Mon Sep 17 00:00:00 2001 From: marcell_mate Date: Thu, 25 Mar 2021 11:59:11 +0000 Subject: [PATCH 58/62] changing config --- config/timegan_config.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index cec6b6de..034d5395 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -19,14 +19,14 @@ tags: - time_gan_test no_comet_logger: True seed: 42 -batch_size: 128 +batch_size: 256 # -------------------------------------------------------------------------------------- # GAN # -------------------------------------------------------------------------------------- # gru or lstm -rnn: "gru" +rnn: "lstm" # embedding weight in cost of generator loss emb_weight: 1 @@ -48,10 +48,10 @@ dropout: generator: 0.2 discriminator: 0.2 -num_layers: 1 +num_layers: 3 hidden_size: 24 z_dim: 32 -mini_series_length: 128 +mini_series_length: 256 # don't change order with lr dict. # generator_, embedder1_ separated out for ease of code for now. keep lr constant @@ -75,7 +75,7 @@ data_dir: "C:/Users/marcell/d/data/modelling" df_train: anchor: std_bar_ETHUSDT_tick_1.feather -# df2: std_bar_BTCUSDT_tick_1.feather + df2: std_bar_BTCUSDT_tick_1.feather df_val: df_test: @@ -88,7 +88,7 @@ cols_to_model: - high - low - close -# - cum_ticks + - cum_ticks # - cum_dollar - volume - cum_volume_buy @@ -100,7 +100,7 @@ cols_to_model: # - cos_date # - sin_time # - cos_time -# df2: + df2: augment_method: augment_dfs: @@ -111,7 +111,7 @@ augment_dfs_mix: 0 # -------------------------------------------------------------------------------------- train_start_date: "2019-01-01" -train_days: 100 +train_days: 500 val_days: 1 val_train_offset_days: 1 val_puffer_days: 1 From eec0ee7b7da66f1151f199fb00a4023cdf9163f8 Mon Sep 17 00:00:00 2001 From: marcell_mate Date: Thu, 25 Mar 2021 13:16:13 +0000 Subject: [PATCH 59/62] changing config --- config/timegan_config.yaml | 4 ++-- src/dagobert/modelling/augmentation/timegan.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index 034d5395..c6743087 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -13,7 +13,7 @@ print_nan_grads: True # -------------------------------------------------------------------------------------- log_dir: logs -num_workers: 0 +num_workers: 8 exp_name: TGAN-test tags: - time_gan_test @@ -71,7 +71,7 @@ lr: #data_dir: "C:/Work/dagobert/data/modelling" #data_dir: "/home/daniel/dagobert_data/modelling" data_dir: "C:/Users/marcell/d/data/modelling" - +# data_dir: "home/ubuntu/dagobert/data/modelling" df_train: anchor: std_bar_ETHUSDT_tick_1.feather diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index 59c6b2b5..b1d62d29 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -84,7 +84,7 @@ def run_tgan(args): # setup callbacks checkpoint_callback = ModelCheckpoint( - monitor="loss_gen/train", + monitor="loss_gen/val", filename="_{epoch:02d}_{avg_reward:.10f}", dirpath=f"{args.log_dir}/models/{args.exp_name}_{tb_logger.version}", save_top_k=5, @@ -395,6 +395,7 @@ def training_step(self, batch, batch_idx, optimizer_idx): # pytorch lightning needs to have "loss" in the return dict return { "loss": loss_disc, + "loss_gen/train": loss_gen, "loss_disc/train": loss_disc, "y_fake/train": y_fake, "y_fake_e/train": y_fake_e, @@ -484,6 +485,7 @@ def validation_step(self, batch, batch_idx): logger=True, ) return { + "loss_gen/val": loss_gen, "loss_disc/val": loss_disc, "y_fake/val": y_fake, "y_fake_e/val": y_fake_e, From fa5ab6fa2d89acdbd872b7972a722028266a89e6 Mon Sep 17 00:00:00 2001 From: marcell_mate Date: Thu, 25 Mar 2021 13:26:31 +0000 Subject: [PATCH 60/62] changing config --- config/timegan_config.yaml | 2 +- src/dagobert/modelling/augmentation/timegan.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index c6743087..ffe7ebc4 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -71,7 +71,7 @@ lr: #data_dir: "C:/Work/dagobert/data/modelling" #data_dir: "/home/daniel/dagobert_data/modelling" data_dir: "C:/Users/marcell/d/data/modelling" -# data_dir: "home/ubuntu/dagobert/data/modelling" +# data_dir: "/home/ubuntu/dagobert/data/modelling" df_train: anchor: std_bar_ETHUSDT_tick_1.feather diff --git a/src/dagobert/modelling/augmentation/timegan.py b/src/dagobert/modelling/augmentation/timegan.py index b1d62d29..bdccfd7e 100644 --- a/src/dagobert/modelling/augmentation/timegan.py +++ b/src/dagobert/modelling/augmentation/timegan.py @@ -395,7 +395,6 @@ def training_step(self, batch, batch_idx, optimizer_idx): # pytorch lightning needs to have "loss" in the return dict return { "loss": loss_disc, - "loss_gen/train": loss_gen, "loss_disc/train": loss_disc, "y_fake/train": y_fake, "y_fake_e/train": y_fake_e, From 96570be6abd5841b54ef485e70f8224bac7612e8 Mon Sep 17 00:00:00 2001 From: marcell_mate Date: Wed, 14 Jul 2021 12:21:32 +0100 Subject: [PATCH 61/62] config and accessing node nb --- config/timegan_config.yaml | 4 +- config/timegan_config_local.yaml | 122 ++++++++ .../experiments/interact_with_nodes.ipynb | 2 +- .../interact_with_nodes_tgan.ipynb | 268 ++++++++++++++++++ 4 files changed, 393 insertions(+), 3 deletions(-) create mode 100644 config/timegan_config_local.yaml create mode 100644 notebooks/experiments/interact_with_nodes_tgan.ipynb diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index ffe7ebc4..1c76881d 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -70,8 +70,8 @@ lr: #data_dir: "C:/Work/dagobert/data/modelling" #data_dir: "/home/daniel/dagobert_data/modelling" -data_dir: "C:/Users/marcell/d/data/modelling" -# data_dir: "/home/ubuntu/dagobert/data/modelling" +# data_dir: "C:/Users/marcell/d/data/modelling" +data_dir: "/home/ubuntu/dagobert/data/modelling" df_train: anchor: std_bar_ETHUSDT_tick_1.feather diff --git a/config/timegan_config_local.yaml b/config/timegan_config_local.yaml new file mode 100644 index 00000000..ffe7ebc4 --- /dev/null +++ b/config/timegan_config_local.yaml @@ -0,0 +1,122 @@ + +# -------------------------------------------------------------------------------------- +# LIGHTNING +# -------------------------------------------------------------------------------------- + +gpus: 1 +pin_memory: True +val_check_interval: 0.5 +print_nan_grads: True + +# -------------------------------------------------------------------------------------- +# RUN +# -------------------------------------------------------------------------------------- + +log_dir: logs +num_workers: 8 +exp_name: TGAN-test +tags: + - time_gan_test +no_comet_logger: True +seed: 42 +batch_size: 256 + +# -------------------------------------------------------------------------------------- +# GAN +# -------------------------------------------------------------------------------------- + +# gru or lstm +rnn: "lstm" +# embedding weight in cost of generator loss +emb_weight: 1 + +# don't change these, or preprocessing won't work +target_col: +to_label: False +no_sample_weights: True +binariser_method: + +# -------------------------------------------------------------------------------------- +# MODEL +# -------------------------------------------------------------------------------------- + +optimizer: "adamw" +dropout: + recovery: 0.2 + embedder: 0.2 + supervisor: 0.2 + generator: 0.2 + discriminator: 0.2 + +num_layers: 3 +hidden_size: 24 +z_dim: 32 +mini_series_length: 256 + +# don't change order with lr dict. +# generator_, embedder1_ separated out for ease of code for now. keep lr constant +lr: + embedder0: 0.0005 + supervisor: 0.0005 + generator: 0.0005 + embedder1: 0.0005 + generator_: 0.0005 + embedder1_: 0.0005 + discriminator: 0.0005 + +# -------------------------------------------------------------------------------------- +# DATA +# -------------------------------------------------------------------------------------- + +#data_dir: "C:/Work/dagobert/data/modelling" +#data_dir: "/home/daniel/dagobert_data/modelling" +data_dir: "C:/Users/marcell/d/data/modelling" +# data_dir: "/home/ubuntu/dagobert/data/modelling" + +df_train: + anchor: std_bar_ETHUSDT_tick_1.feather + df2: std_bar_BTCUSDT_tick_1.feather + +df_val: +df_test: + +# the cols of the secondary DFs will automatically be set to anchor's if not defined +cols_to_model: + anchor: + - date_diff + - open + - high + - low + - close + - cum_ticks +# - cum_dollar + - volume + - cum_volume_buy + - cum_volume_sell + - cum_volume_quote + - cum_volume_quote_buy + - cum_volume_quote_sell +# - sin_date +# - cos_date +# - sin_time +# - cos_time + df2: + +augment_method: +augment_dfs: +augment_dfs_mix: 0 + +# -------------------------------------------------------------------------------------- +# PREPROCESSING +# -------------------------------------------------------------------------------------- + +train_start_date: "2019-01-01" +train_days: 500 +val_days: 1 +val_train_offset_days: 1 +val_puffer_days: 1 +test_days: 1 +test_train_offset_days: 62 +test_puffer_days: 1 + +scaling_method: minmax \ No newline at end of file diff --git a/notebooks/experiments/interact_with_nodes.ipynb b/notebooks/experiments/interact_with_nodes.ipynb index f6e63954..6aa7b0a9 100644 --- a/notebooks/experiments/interact_with_nodes.ipynb +++ b/notebooks/experiments/interact_with_nodes.ipynb @@ -228,7 +228,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.5" } }, "nbformat": 4, diff --git a/notebooks/experiments/interact_with_nodes_tgan.ipynb b/notebooks/experiments/interact_with_nodes_tgan.ipynb new file mode 100644 index 00000000..3692f08f --- /dev/null +++ b/notebooks/experiments/interact_with_nodes_tgan.ipynb @@ -0,0 +1,268 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'paramiko'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpathlib\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mPath\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mparamiko\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'paramiko'" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "import time\n", + "import paramiko " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'C:\\\\Users\\\\marcell\\\\d\\\\dagobert\\\\notebooks\\\\experiments'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pwd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## London GPUs\n", + "\n", + "- log in to all 10\n", + "- pull latest branch\n", + "- delete prev data files" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "london_gpus = [\n", + " \"ec2-3-8-198-113.eu-west-2.compute.amazonaws.com\",\n", + " \"ec2-18-132-49-7.eu-west-2.compute.amazonaws.com\",\n", + " \"ec2-35-178-168-24.eu-west-2.compute.amazonaws.com\",\n", + " \"ec2-18-130-246-221.eu-west-2.compute.amazonaws.com\",\n", + " \"ec2-52-56-202-156.eu-west-2.compute.amazonaws.com\",\n", + " \"ec2-18-132-17-125.eu-west-2.compute.amazonaws.com\",\n", + " \"ec2-35-178-170-162.eu-west-2.compute.amazonaws.com\",\n", + " \"ec2-3-8-155-239.eu-west-2.compute.amazonaws.com\",\n", + " \"ec2-18-130-180-205.eu-west-2.compute.amazonaws.com\",\n", + " \"ec2-3-8-194-52.eu-west-2.compute.amazonaws.com\",\n", + "]\n", + "username = \"ubuntu\"\n", + "london_k = paramiko.RSAKey.from_private_key_file(\"../../../sec/dagobert.pem\")" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 ec2-3-8-198-113.eu-west-2.compute.amazonaws.com\n", + "0 b''\n", + "1 ec2-18-132-49-7.eu-west-2.compute.amazonaws.com\n", + "1 b''\n", + "2 ec2-35-178-168-24.eu-west-2.compute.amazonaws.com\n", + "2 b''\n", + "3 ec2-18-130-246-221.eu-west-2.compute.amazonaws.com\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 16\u001b[0m \"\"\"\n\u001b[0;32m 17\u001b[0m \u001b[0mssh_stdin\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mssh_stdout\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mssh_stderr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mssh\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexec_command\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcmd2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 18\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mssh_stderr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 19\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\envs\\dagobert\\lib\\site-packages\\paramiko\\file.py\u001b[0m in \u001b[0;36mread\u001b[1;34m(self, size)\u001b[0m\n\u001b[0;32m 198\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 199\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 200\u001b[1;33m \u001b[0mnew_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_DEFAULT_BUFSIZE\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 201\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mEOFError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 202\u001b[0m \u001b[0mnew_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\envs\\dagobert\\lib\\site-packages\\paramiko\\channel.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(self, size)\u001b[0m\n\u001b[0;32m 1374\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1375\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msize\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1376\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchannel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrecv_stderr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msize\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1377\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1378\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_write\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\envs\\dagobert\\lib\\site-packages\\paramiko\\channel.py\u001b[0m in \u001b[0;36mrecv_stderr\u001b[1;34m(self, nbytes)\u001b[0m\n\u001b[0;32m 745\u001b[0m \"\"\"\n\u001b[0;32m 746\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 747\u001b[1;33m \u001b[0mout\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0min_stderr_buffer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnbytes\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 748\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mPipeTimeout\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 749\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0msocket\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\envs\\dagobert\\lib\\site-packages\\paramiko\\buffered_pipe.py\u001b[0m in \u001b[0;36mread\u001b[1;34m(self, nbytes, timeout)\u001b[0m\n\u001b[0;32m 158\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_buffer\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_closed\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 159\u001b[0m \u001b[0mthen\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 160\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_cv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 161\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 162\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[1;33m-=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mthen\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\envs\\dagobert\\lib\\threading.py\u001b[0m in \u001b[0;36mwait\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 294\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 295\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 296\u001b[1;33m \u001b[0mwaiter\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 297\u001b[0m \u001b[0mgotit\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 298\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "for i, hostname in enumerate(london_gpus):\n", + " ssh = paramiko.SSHClient()\n", + " ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())\n", + " ssh.connect(hostname=hostname, username=username, pkey=london_k)\n", + " print(i, hostname)\n", + " cmd = \"\"\"\n", + " cd dagobert/dagobert;\n", + " rm ../data/modelling/*;\n", + " git pull https://danielhomola:4frvgh%GTB@github.com/danielhomola/dagobert hparams_labelling;\n", + " \"\"\"\n", + " ssh_stdin, ssh_stdout, ssh_stderr = ssh.exec_command(cmd)\n", + " print(i, ssh_stderr.read())\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Zip all log folders\n", + "\n", + "- log in to all gpus and zip all folders that start with log and log the models too\n", + "- uplaod them to s3\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import time\n", + "import paramiko " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "local_path = Path('/home/daniel/dagobert_data/')\n", + "username = \"ubuntu\"\n", + "ohio_k = paramiko.RSAKey.from_private_key_file(\"../../../sec/dagobert_preprocessing_node.pem\")\n", + "london_k = paramiko.RSAKey.from_private_key_file(\"../../../sec/dagobert.pem\")\n", + "nodes = {\n", + " \"gpu1\": {\"hostname\": \"ec2-52-20-7-61.compute-1.amazonaws.com\", \"key\": ohio_k},\n", + " \"gpu2\": {\"hostname\": \"ec2-52-22-178-27.compute-1.amazonaws.com\", \"key\": ohio_k},\n", + " \"gpu3\": {\"hostname\": \"ec2-54-147-237-118.compute-1.amazonaws.com\", \"key\": ohio_k},\n", + " \"gpu4\": {\"hostname\": \"ec2-54-152-39-74.compute-1.amazonaws.com\", \"key\": ohio_k},\n", + " \"gpu5\": {\"hostname\": \"ec2-54-225-32-4.compute-1.amazonaws.com\", \"key\": ohio_k},\n", + " \"gpu6\": {\"hostname\": \"ec2-54-90-219-179.compute-1.amazonaws.com\", \"key\": ohio_k},\n", + " \"gpu7\": {\"hostname\": \"ec2-100-24-115-15.compute-1.amazonaws.com\", \"key\": ohio_k},\n", + " \"gpu8\": {\"hostname\": \"ec2-3-236-251-175.compute-1.amazonaws.com\", \"key\": ohio_k},\n", + " \"gpu9\": {\"hostname\": \"ec2-34-237-76-111.compute-1.amazonaws.com\", \"key\": ohio_k},\n", + " \"gpu10\": {\"hostname\": \"ec2-3-10-228-3.eu-west-2.compute.amazonaws.com\",\"key\": london_k},\n", + " \"gpu11\": {\"hostname\": \"ec2-18-130-191-126.eu-west-2.compute.amazonaws.com\",\"key\": london_k},\n", + " \"gpu12\": {\"hostname\": \"ec2-3-10-150-229.eu-west-2.compute.amazonaws.com\",\"key\": london_k},\n", + " \"gpu13\": {\"hostname\": \"ec2-3-8-28-118.eu-west-2.compute.amazonaws.com\",\"key\": london_k},\n", + " \"gpu14\": {\"hostname\": \"ec2-35-176-172-205.eu-west-2.compute.amazonaws.com\",\"key\": london_k},\n", + " \"gpu15\": {\"hostname\": \"ec2-18-133-29-17.eu-west-2.compute.amazonaws.com\",\"key\": london_k},\n", + " \"gpu16\": {\"hostname\": \"ec2-18-133-64-254.eu-west-2.compute.amazonaws.com\",\"key\": london_k},\n", + " \"gpu17\": {\"hostname\": \"ec2-3-8-197-96.eu-west-2.compute.amazonaws.com\",\"key\": london_k},\n", + " \"gpu18\": {\"hostname\": \"ec2-35-178-66-77.eu-west-2.compute.amazonaws.com\",\"key\": london_k},\n", + " \"gpu19\": {\"hostname\": \"ec2-3-8-181-180.eu-west-2.compute.amazonaws.com\",\"key\": london_k},\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------------------------------\n", + "gpu6\n", + "-------------------------------------------\n", + "Uploaded everything successfully for gpu6\n", + "-------------------------------------------\n", + "gpu7\n", + "-------------------------------------------\n", + "Uploaded everything successfully for gpu7\n", + "-------------------------------------------\n", + "gpu8\n", + "-------------------------------------------\n", + "Uploaded everything successfully for gpu8\n", + "-------------------------------------------\n", + "gpu9\n", + "-------------------------------------------\n", + "Uploaded everything successfully for gpu9\n" + ] + } + ], + "source": [ + "for name, node in nodes.items():\n", + " ssh = paramiko.SSHClient()\n", + " ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())\n", + " ssh.connect(hostname=node['hostname'], username=username, pkey=node['key'])\n", + " \n", + " print('-------------------------------------------')\n", + " print(name)\n", + " print('-------------------------------------------')\n", + " \n", + " cmd = (\n", + " f\"cd dagobert/dagobert;\"\n", + " f\"sudo apt install zip;\"\n", + " f\"rm -rf logs_run1;\"\n", + " f\"ls | grep logs | xargs zip {name}_all_logs.zip -r;\"\n", + " f\"zip {name}_models.zip -r TCN;\"\n", + " f\"aws s3 cp {name}_all_logs.zip s3://dagobert/;\"\n", + " f\"aws s3 cp {name}_models.zip s3://dagobert/;\"\n", + " )\n", + " ssh_stdin, ssh_stdout, ssh_stderr = ssh.exec_command(cmd)\n", + " print(f'Uploaded everything successfully for {name}')\n", + " \n", + " # download zip - not used in the end because we have better ways\n", + " # ftp_client=ssh.open_sftp()\n", + " # ftp_client.get(\"/home/ubuntu/dagobert/dagobert/all_logs.zip\", local_path / f\"{name}_all_logs.zip\")\n", + " # print (f\"Downloaded all_zips from {name}.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 11296f7e75ad19f6ea558e7dea3d5b72acdad14a Mon Sep 17 00:00:00 2001 From: marcell_mate Date: Thu, 15 Jul 2021 12:41:22 +0100 Subject: [PATCH 62/62] tgan readme --- config/timegan_config.yaml | 16 +++--- src/dagobert/modelling/augmentation/README.md | 54 +++++++++++++++++++ 2 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 src/dagobert/modelling/augmentation/README.md diff --git a/config/timegan_config.yaml b/config/timegan_config.yaml index 1c76881d..10a9f0bc 100644 --- a/config/timegan_config.yaml +++ b/config/timegan_config.yaml @@ -22,14 +22,9 @@ seed: 42 batch_size: 256 # -------------------------------------------------------------------------------------- -# GAN +# PREPROCESSING # -------------------------------------------------------------------------------------- -# gru or lstm -rnn: "lstm" -# embedding weight in cost of generator loss -emb_weight: 1 - # don't change these, or preprocessing won't work target_col: to_label: False @@ -40,6 +35,13 @@ binariser_method: # MODEL # -------------------------------------------------------------------------------------- + +# gru or lstm +rnn: "lstm" + +# embedding weight in cost of generator loss +emb_weight: 1 + optimizer: "adamw" dropout: recovery: 0.2 @@ -107,7 +109,7 @@ augment_dfs: augment_dfs_mix: 0 # -------------------------------------------------------------------------------------- -# PREPROCESSING +# PREPROCESSING DATES # -------------------------------------------------------------------------------------- train_start_date: "2019-01-01" diff --git a/src/dagobert/modelling/augmentation/README.md b/src/dagobert/modelling/augmentation/README.md new file mode 100644 index 00000000..fc4ee69f --- /dev/null +++ b/src/dagobert/modelling/augmentation/README.md @@ -0,0 +1,54 @@ +# Dagobert augmentation / TGAN module + +This module holds the implementation of TimeGAN. It is adopted to fit into Pytorch Lightning so we get benefits of easy +set up, checkpointing etc. The network is based on +[this paper](https://papers.nips.cc/paper/2019/file/c9efe5f26cd17ba6216bbe2a7d26d490-Paper.pdf). + +A couple of other useful links can be found in [this issue](https://github.com/danielhomola/dagobert/issues/63) + +## Running it +Like the `dl` module, this module this is to be driven from the cmd line via an entry-point and a config file. +``` +dagobert-tgan -c config/timegan_config.yaml +``` + +## Config params +There are some config params which we can tinker with for optimal training. The example one +(`config/timegan_config.yml`) is nicely structured in blocks so it should be easy to understand which relate to the +TimeGAN model structure. + +The params are nicely documented in `augmentation/tgan_args.py` so make sure to check there before trying to find out +from the code what each of these do. + +## The TimeGAN +Much of the structure was implemented as quoted from the original paper (above). Five RNNs work together to create a +learned embedding space optimized with both supervised and adversarial objectives, encouraging the network to adhere to +the temporal dynamics of the training data. + +GAN convergence is notorioulsy tricky, and there are a bunch of handles and hyperparameters we can toggle +(some of this is inspired by various literature about training GANs more widely): +- in order for the discirminator not to get 'too smart', we optimise it on the simple condition that the loss is not too + small. +- the generator (and one of the embedders) is optimised twice before every optimisation of the discriminator - this is currently implemented in a + crude way, but it works. +- convergence is very training intensive, the authors and [other implementations](https://github.com/jsyoon0823/TimeGAN) + all refer to 5-10k epochs +- one imprtant aspect of this network is that the ouput series' lenght is a hyperparameter we set before training, and + need to feed in the same lenght for X - we call this mini series length +- performance is measured visually by PCA, and t-SNE analyses between the original and the synthetic data. For +discriminative performance the authors use an rnn classifier to distinguish between real and synthetic data. For + predictive performance they train an RNN to predict the last element of a series on synthetic samples. This trained + rnn is then validated on real data, measuring MAE. These latter two are not implemented currently, as synthetics data + was already fucked up upon visual inspection of PCA, t-SNE. + +## Future work +- experiment with more advanced learning rates for different components of TimeGAN +- warm up training of generator is a common measure to avoid lack of convergence +- try different thresholds for prohibiting discriminator optimisation +- it is challenging to iterate fast with this project. Convergence can take time, and training the generator +can go well initially and then deteriorate or vica versa. To always wait for being able to inspect (PCA,t-SNE) visuals, + is time consuming and inconsistent. Some distance measurements between real/synthetic can come in handy for triggering + various actions, or just introducing more consistency into monitoring. + + +