From 360afc64d09d55b9864885112bcb7dbee02352d9 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 19:58:56 -0700 Subject: [PATCH 001/162] Added VAE paper to the bibliography. --- docs/Bibliography.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/Bibliography.md b/docs/Bibliography.md index 2bcf3548..7e7303ea 100644 --- a/docs/Bibliography.md +++ b/docs/Bibliography.md @@ -4,6 +4,13 @@ Below, I present a most likely incomplete list of works I referred to when I was on this library: +### Autoencoders + +- (Dec 2013) **Auto-Encoding Variational Bayes** + Diederik P Kingma, Max Welling + https://arxiv.org/abs/1312.6114 + + ### Learning rate and optimization - (2012) **Lecture 6.5 -- RmsProp: Divide the gradient by a running average of its recent magnitude** From 5a3d2b6de3fab55c19a3b68c45f7c90e4ed1b55f Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 19:59:03 -0700 Subject: [PATCH 002/162] Reordered MNIST file. --- vel/models/vision/mnist_cnn_01.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vel/models/vision/mnist_cnn_01.py b/vel/models/vision/mnist_cnn_01.py index 40b2d54e..05b61c09 100644 --- a/vel/models/vision/mnist_cnn_01.py +++ b/vel/models/vision/mnist_cnn_01.py @@ -26,11 +26,6 @@ class Net(SupervisedModel): Dense - output (softmax) """ - @staticmethod - def _weight_initializer(tensor): - init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('relu')) - init.constant_(tensor.bias, 0.0) - def __init__(self, img_rows, img_cols, img_channels, num_classes): super(Net, self).__init__() @@ -45,6 +40,11 @@ def __init__(self, img_rows, img_cols, img_channels, num_classes): self.dropout2 = nn.Dropout(p=0.5) self.fc2 = nn.Linear(128, num_classes) + @staticmethod + def _weight_initializer(tensor): + init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('relu')) + init.constant_(tensor.bias, 0.0) + def reset_weights(self): self._weight_initializer(self.conv1) self._weight_initializer(self.conv2) From c156b6969309efef7438f0f74abb1a239b0d03eb Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 20:27:50 -0700 Subject: [PATCH 003/162] Add default varargs to be empty. --- vel/launcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vel/launcher.py b/vel/launcher.py index e2a00a47..bb44f733 100644 --- a/vel/launcher.py +++ b/vel/launcher.py @@ -13,7 +13,7 @@ def main(): parser.add_argument('config', metavar='FILENAME', help='Configuration file for the run') parser.add_argument('command', metavar='COMMAND', help='A command to run') - parser.add_argument('varargs', nargs='*', metavar='VARARGS', help='Extra options to the command') + parser.add_argument('varargs', nargs='*', default=[], metavar='VARARGS', help='Extra options to the command') parser.add_argument('-r', '--run_number', type=int, default=0, help="A run number") parser.add_argument('-d', '--device', default='cuda', help="A device to run the model on") parser.add_argument('-s', '--seed', type=int, default=None, help="Random seed for the project") From 5d9f112234dc030a36156bce18f2ac298fd5a55c Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 20:42:44 -0700 Subject: [PATCH 004/162] Easy script model config. --- vel/api/__init__.py | 3 +-- vel/{internals => api}/model_config.py | 26 +++++++++++++++++++++----- 2 files changed, 22 insertions(+), 7 deletions(-) rename vel/{internals => api}/model_config.py (90%) diff --git a/vel/api/__init__.py b/vel/api/__init__.py index ac1eec1f..3eb8c6e3 100644 --- a/vel/api/__init__.py +++ b/vel/api/__init__.py @@ -11,5 +11,4 @@ from .source import Source, TrainingData, TextData from .storage import Storage from .train_phase import TrainPhase, EmptyTrainPhase - -from vel.internals.model_config import ModelConfig +from .model_config import ModelConfig diff --git a/vel/internals/model_config.py b/vel/api/model_config.py similarity index 90% rename from vel/internals/model_config.py rename to vel/api/model_config.py index 979fcdee..7b1e1225 100644 --- a/vel/internals/model_config.py +++ b/vel/api/model_config.py @@ -1,5 +1,6 @@ import datetime as dtm import os.path +import typing from vel.exceptions import VelInitializationException from vel.internals.parser import Parser @@ -58,14 +59,29 @@ def from_file(cls, filename: str, run_number: int, continue_training: bool = Fal ) @classmethod - def from_memory(cls, model_data: dict, run_number: int, project_dir: str, - continue_training=False, seed: int = None, device: str = 'cuda', params=None): + def script(cls, model_name: str = 'script', configuration: typing.Optional[dict] = None, run_number: int = 1, + continue_training=False, seed: int = None, device: str = 'cuda', params=None): """ Create model config from supplied data """ + if configuration is None: + configuration = {} + + configuration['name'] = model_name + + project_config_path = ModelConfig.find_project_directory(os.path.dirname(os.path.abspath(os.getcwd()))) + + with open(os.path.join(project_config_path, cls.PROJECT_FILE_NAME), 'r') as fp: + project_config_contents = Parser.parse(fp) + + aggregate_dictionary = { + **project_config_contents, + **configuration + } + return ModelConfig( - filename="[memory]", - configuration=model_data, + filename="[script]", + configuration=aggregate_dictionary, run_number=run_number, - project_dir=project_dir, + project_dir=project_config_path, continue_training=continue_training, seed=seed, device=device, From 96bb187f378cbfee84621c793593226aafa84fed Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 20:44:48 -0700 Subject: [PATCH 005/162] train_data has been renamed to data. --- vel/sources/vision/cifar10.py | 2 +- vel/sources/vision/mnist.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vel/sources/vision/cifar10.py b/vel/sources/vision/cifar10.py index 452f5eb7..6d4a0861 100644 --- a/vel/sources/vision/cifar10.py +++ b/vel/sources/vision/cifar10.py @@ -20,7 +20,7 @@ def create(model_config, batch_size, normalize=True, num_workers=0, augmentation augmentations = [ToArray()] + (augmentations if augmentations is not None else []) if normalize: - train_data = train_dataset.train_data + train_data = train_dataset.data mean_value = (train_data / 255).mean(axis=(0, 1, 2)) std_value = (train_data / 255).std(axis=(0, 1, 2)) diff --git a/vel/sources/vision/mnist.py b/vel/sources/vision/mnist.py index 24f26646..d1a39b84 100644 --- a/vel/sources/vision/mnist.py +++ b/vel/sources/vision/mnist.py @@ -18,7 +18,7 @@ def create(model_config, batch_size, normalize=True, num_workers=0, augmentation augmentations = [ToArray()] + (augmentations if augmentations is not None else []) if normalize: - train_data = train_dataset.train_data + train_data = train_dataset.data mean_value = (train_data.double() / 255).mean().item() std_value = (train_data.double() / 255).std().item() From 7a44a8e4c1dc65713ff1c43ead059cdfd097e77f Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 21:05:37 -0700 Subject: [PATCH 006/162] Added matplotlib dependency. --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cb5f1854..b336050f 100644 --- a/setup.py +++ b/setup.py @@ -28,14 +28,15 @@ install_requires=[ 'attrs', 'cloudpickle', + 'matplotlib', 'numpy', 'opencv-python', 'pandas', 'pyyaml', 'scikit-learn', 'torch ~= 1.0', - 'torchvision', 'torchtext', + 'torchvision', 'tqdm' ], extras_require={ From 6b5ac6da6cc4961b41b4b1c70883b5a0cc62a856 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 21:05:52 -0700 Subject: [PATCH 007/162] Fixed a typo. --- vel/augmentations/to_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vel/augmentations/to_array.py b/vel/augmentations/to_array.py index 3f8c980f..ffbd353d 100644 --- a/vel/augmentations/to_array.py +++ b/vel/augmentations/to_array.py @@ -4,7 +4,7 @@ class ToArray(data.Augmentation): - """ Convert imate to an array of floats """ + """ Convert image to an array of floats """ def __init__(self, mode='x', tags=None): super().__init__(mode, tags) From 82148e07ded14c76bf13af2dd9d68beaa0d50fc0 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 21:06:16 -0700 Subject: [PATCH 008/162] Unsupervised MNIST dataset. --- vel/api/__init__.py | 2 +- vel/api/source.py | 11 ++++++----- vel/augmentations/unsupervised.py | 14 ++++++++++++++ vel/modules/input/embedding.py | 6 +++--- vel/notebook/__init__.py | 3 ++- vel/notebook/defaults.py | 6 ++++++ vel/sources/img_dir_source.py | 4 ++-- vel/sources/nlp/imdb.py | 4 ++-- vel/sources/vision/cifar10.py | 4 ++-- vel/sources/vision/mnist.py | 10 +++++++--- 10 files changed, 45 insertions(+), 19 deletions(-) create mode 100644 vel/augmentations/unsupervised.py create mode 100644 vel/notebook/defaults.py diff --git a/vel/api/__init__.py b/vel/api/__init__.py index 3eb8c6e3..411a6eaa 100644 --- a/vel/api/__init__.py +++ b/vel/api/__init__.py @@ -8,7 +8,7 @@ from .optimizer import OptimizerFactory from .schedule import Schedule from .scheduler import SchedulerFactory -from .source import Source, TrainingData, TextData +from .source import Source, SupervisedTrainingData, SupervisedTextData from .storage import Storage from .train_phase import TrainPhase, EmptyTrainPhase from .model_config import ModelConfig diff --git a/vel/api/source.py b/vel/api/source.py index 083221ee..ebfffb5d 100644 --- a/vel/api/source.py +++ b/vel/api/source.py @@ -1,5 +1,7 @@ import torch.utils.data as data +from .data import DataFlow + class Source: """ Source of data for supervised learning algorithms """ @@ -31,7 +33,7 @@ def val_iterations_per_epoch(self): raise NotImplementedError -class TextData(Source): +class SupervisedTextData(Source): """ An NLP torchtext data source """ def __init__(self, train_source, val_source, train_iterator, val_iterator, data_field, target_field): super().__init__() @@ -68,10 +70,9 @@ def val_iterations_per_epoch(self): return len(self.val_iterator) -class TrainingData(Source): +class SupervisedTrainingData(Source): """ Most common source of data combining a basic datasource and sampler """ def __init__(self, train_source, val_source, num_workers, batch_size, augmentations=None): - import vel.api.data as vel_data super().__init__() @@ -84,8 +85,8 @@ def __init__(self, train_source, val_source, num_workers, batch_size, augmentati self.augmentations = augmentations # Derived values - self.train_ds = vel_data.DataFlow(self.train_source, augmentations, tag='train') - self.val_ds = vel_data.DataFlow(self.val_source, augmentations, tag='val') + self.train_ds = DataFlow(self.train_source, augmentations, tag='train') + self.val_ds = DataFlow(self.val_source, augmentations, tag='val') self._train_loader = data.DataLoader( self.train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers diff --git a/vel/augmentations/unsupervised.py b/vel/augmentations/unsupervised.py new file mode 100644 index 00000000..2caeb448 --- /dev/null +++ b/vel/augmentations/unsupervised.py @@ -0,0 +1,14 @@ +import vel.api.data as data + + +class Unsupervised(data.Augmentation): + """ Simply transform supervised to an unsupervised dataset, cloning data to a target """ + def __init__(self): + super().__init__('both', None) + + def __call__(self, x_data, y_data): + return x_data, x_data + + +def create(): + return Unsupervised() diff --git a/vel/modules/input/embedding.py b/vel/modules/input/embedding.py index ff7d790c..ab4aee1c 100644 --- a/vel/modules/input/embedding.py +++ b/vel/modules/input/embedding.py @@ -1,13 +1,13 @@ import torch.nn as nn -from vel.api import LinearBackboneModel, TextData, ModelFactory +from vel.api import LinearBackboneModel, SupervisedTextData, ModelFactory class EmbeddingInput(LinearBackboneModel): """ Learnable Embedding input layer """ def __init__(self, alphabet_size: int, output_dim: int, pretrained: bool=False, frozen: bool=False, - source: TextData=None): + source: SupervisedTextData=None): super().__init__() self._output_dim = output_dim @@ -34,7 +34,7 @@ def forward(self, input_data): return self.layer(input_data) -def create(alphabet_size: int, output_dim: int, pretrained: bool=False, frozen: bool=False, source: TextData=None): +def create(alphabet_size: int, output_dim: int, pretrained: bool=False, frozen: bool=False, source: SupervisedTextData=None): """ Vel factory function """ def instantiate(**_): return EmbeddingInput(alphabet_size, output_dim, pretrained=pretrained, frozen=frozen, source=source) diff --git a/vel/notebook/__init__.py b/vel/notebook/__init__.py index 5d173dba..b29639c0 100644 --- a/vel/notebook/__init__.py +++ b/vel/notebook/__init__.py @@ -1 +1,2 @@ -from .loader import load \ No newline at end of file +from .loader import load +from .defaults import reasonable_notbook_defaults diff --git a/vel/notebook/defaults.py b/vel/notebook/defaults.py new file mode 100644 index 00000000..044d9446 --- /dev/null +++ b/vel/notebook/defaults.py @@ -0,0 +1,6 @@ + + +def reasonable_notbook_defaults(): + """ Notbook defaults """ + import matplotlib.pyplot as plt + plt.rcParams['figure.figsize'] = [10, 5] diff --git a/vel/sources/img_dir_source.py b/vel/sources/img_dir_source.py index c1df3120..1e0c138e 100644 --- a/vel/sources/img_dir_source.py +++ b/vel/sources/img_dir_source.py @@ -2,7 +2,7 @@ import torchvision.datasets as ds -from vel.api import TrainingData +from vel.api import SupervisedTrainingData class ImageDirSource(ds.ImageFolder): @@ -20,7 +20,7 @@ def create(model_config, path, num_workers, batch_size, augmentations=None, tta= train_ds = ImageDirSource(train_path) val_ds = ImageDirSource(valid_path) - return TrainingData( + return SupervisedTrainingData( train_ds, val_ds, num_workers=num_workers, diff --git a/vel/sources/nlp/imdb.py b/vel/sources/nlp/imdb.py index 6a9310cb..c6ac6fe9 100644 --- a/vel/sources/nlp/imdb.py +++ b/vel/sources/nlp/imdb.py @@ -7,7 +7,7 @@ import torchtext.data as data -from vel.api import TextData +from vel.api import SupervisedTextData class IMDBCached(imdb.IMDB): @@ -68,6 +68,6 @@ def create(model_config, batch_size, vectors=None): shuffle=True ) - return TextData( + return SupervisedTextData( train_source, test_source, train_iterator, test_iterator, text_field, label_field ) diff --git a/vel/sources/vision/cifar10.py b/vel/sources/vision/cifar10.py index 6d4a0861..4d8c02ee 100644 --- a/vel/sources/vision/cifar10.py +++ b/vel/sources/vision/cifar10.py @@ -1,6 +1,6 @@ from torchvision import datasets -from vel.api import TrainingData +from vel.api import SupervisedTrainingData from vel.augmentations.normalize import Normalize from vel.augmentations.to_tensor import ToTensor @@ -28,7 +28,7 @@ def create(model_config, batch_size, normalize=True, num_workers=0, augmentation augmentations.append(ToTensor()) - return TrainingData( + return SupervisedTrainingData( train_dataset, test_dataset, batch_size=batch_size, diff --git a/vel/sources/vision/mnist.py b/vel/sources/vision/mnist.py index d1a39b84..0ac79aae 100644 --- a/vel/sources/vision/mnist.py +++ b/vel/sources/vision/mnist.py @@ -1,14 +1,15 @@ from torchvision import datasets -from vel.api import TrainingData +from vel.api import SupervisedTrainingData from vel.augmentations.normalize import Normalize from vel.augmentations.to_tensor import ToTensor from vel.augmentations.to_array import ToArray +from vel.augmentations.unsupervised import Unsupervised -def create(model_config, batch_size, normalize=True, num_workers=0, augmentations=None): +def create(model_config, batch_size, normalize=True, num_workers=0, augmentations=None, unsupervised=False): """ Create a MNIST dataset, normalized """ path = model_config.data_dir('mnist') @@ -26,7 +27,10 @@ def create(model_config, batch_size, normalize=True, num_workers=0, augmentation augmentations.append(ToTensor()) - return TrainingData( + if unsupervised: + augmentations.append(Unsupervised()) + + return SupervisedTrainingData( train_dataset, test_dataset, num_workers=num_workers, From 24a25561dd34c78d82064da60aba84ca60bc895e Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 22:06:43 -0700 Subject: [PATCH 009/162] Mnist Autoencoder. --- .../mnist/mnist_cnn_autoencoder.yaml | 34 +++++++ vel/launcher.py | 2 +- vel/models/autoencoder/__init__.py | 0 .../autoencoder/mnist_cnn_autoencoder.py | 93 +++++++++++++++++++ vel/modules/layers.py | 11 +++ 5 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml create mode 100644 vel/models/autoencoder/__init__.py create mode 100644 vel/models/autoencoder/mnist_cnn_autoencoder.py diff --git a/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml b/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml new file mode 100644 index 00000000..ed2b9536 --- /dev/null +++ b/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml @@ -0,0 +1,34 @@ +name: 'mnist_cnn_autoenoder' + + +model: + name: vel.models.autoencoder.mnist_cnn_autoencoder + img_rows: 28 + img_cols: 28 + img_channels: 1 + num_classes: 10 + + +source: + name: vel.sources.vision.mnist + batch_size: 128 + normalize: False + num_workers: 4 + unsupervised: true + + +commands: + train: + name: vel.commands.train_command + epochs: 12 + log_frequency: 100 + + optimizer: + name: vel.optimizers.adadelta + + checkpoint: + metric: 'val:loss' + + + visdom: + name: vel.commands.vis_store_command diff --git a/vel/launcher.py b/vel/launcher.py index bb44f733..e4c68114 100644 --- a/vel/launcher.py +++ b/vel/launcher.py @@ -3,7 +3,7 @@ import multiprocessing import sys -from vel.internals.model_config import ModelConfig +from vel.api.model_config import ModelConfig from vel.internals.parser import Parser diff --git a/vel/models/autoencoder/__init__.py b/vel/models/autoencoder/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/models/autoencoder/mnist_cnn_autoencoder.py b/vel/models/autoencoder/mnist_cnn_autoencoder.py new file mode 100644 index 00000000..3dfd7b93 --- /dev/null +++ b/vel/models/autoencoder/mnist_cnn_autoencoder.py @@ -0,0 +1,93 @@ +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F + +import vel.util.network as net_util + +from vel.api import SupervisedModel, ModelFactory +from vel.metrics.loss_metric import Loss +from vel.modules.layers import Flatten, Reshape + + +class MnistCnnAutoencoder(SupervisedModel): + """ + A simple MNIST classification model. + + Conv 3x3 - 32 + Conv 3x3 - 64 + MaxPool 2x2 + Dropout 0.25 + Flatten + Dense - 128 + Dense - output (softmax) + """ + + def __init__(self, img_rows, img_cols, img_channels, num_classes): + super(MnistCnnAutoencoder, self).__init__() + + self.flattened_size = (img_rows - 4) // 2 * (img_cols - 4) // 2 * 64 + + layer_series = [ + (3, 1, 1), + (3, 1, 2), + (3, 1, 2), + ] + + self.final_width = net_util.convolutional_layer_series(img_rows, layer_series) + self.final_height = net_util.convolutional_layer_series(img_cols, layer_series) + + self.encoder = nn.Sequential( + nn.Conv2d(in_channels=img_channels, out_channels=16, kernel_size=(3, 3), padding=1), + nn.ReLU(True), + nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), stride=2, padding=1), + nn.ReLU(True), + nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3), stride=2, padding=1), + Flatten(), + nn.Linear(self.final_width * self.final_height * 32, 32) + ) + + self.decoder = nn.Sequential( + nn.Linear(32, self.final_width * self.final_height * 32), + nn.ReLU(True), + Reshape(32, self.final_width, self.final_height), + nn.ConvTranspose2d(in_channels=32, out_channels=32, kernel_size=3, stride=2, padding=1, output_padding=1), + nn.ReLU(True), + nn.ConvTranspose2d(in_channels=32, out_channels=16, kernel_size=3, stride=2, padding=1, output_padding=1), + nn.ReLU(True), + nn.ConvTranspose2d(in_channels=16, out_channels=img_channels, kernel_size=3, padding=1), + ) + + @staticmethod + def _weight_initializer(tensor): + init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('relu')) + init.constant_(tensor.bias, 0.0) + + def reset_weights(self): + for m in self.children(): + if isinstance(m, nn.Conv2d): + self._weight_initializer(m) + elif isinstance(m, nn.ConvTranspose2d): + self._weight_initializer(m) + elif isinstance(m, nn.Linear): + self._weight_initializer(m) + + def forward(self, x): + encoding = self.encoder(x) + decoded = self.decoder(encoding) + return decoded + + def loss_value(self, x_data, y_true, y_pred): + """ Calculate a value of loss function """ + return F.mse_loss(y_pred, y_true) + + def metrics(self): + """ Set of metrics for this model """ + return [Loss()] + + +def create(img_rows, img_cols, img_channels, num_classes): + """ Vel factory function """ + def instantiate(**_): + return MnistCnnAutoencoder(img_rows, img_cols, img_channels, num_classes) + + return ModelFactory.generic(instantiate) diff --git a/vel/modules/layers.py b/vel/modules/layers.py index 6b694334..9a95e31d 100644 --- a/vel/modules/layers.py +++ b/vel/modules/layers.py @@ -39,6 +39,17 @@ def forward(self, x): return x.view(x.size(0), -1) +class Reshape(nn.Module): + """ Flatten input vector """ + def __init__(self, *sizes): + super().__init__() + + self.sizes = sizes + + def forward(self, x): + return x.view(x.size(0), *self.sizes) + + class OneHotEncode(nn.Module): """ One-hot encoding layer """ def __init__(self, num_classes): From ed88af125d1156216c085b1d98bf3ffc563b308a Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 22:44:00 -0700 Subject: [PATCH 010/162] Loading pretrained models. --- vel/api/model_config.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/vel/api/model_config.py b/vel/api/model_config.py index 7b1e1225..e2f98cd2 100644 --- a/vel/api/model_config.py +++ b/vel/api/model_config.py @@ -6,6 +6,8 @@ from vel.internals.parser import Parser from vel.internals.provider import Provider +from .info import TrainingInfo + class ModelConfig: """ @@ -30,8 +32,12 @@ def find_project_directory(start_path) -> str: else: return ModelConfig.find_project_directory(up_path) + @staticmethod + def from_project_directory(path) -> str: + return os.path.join(ModelConfig.find_project_directory('.'), path) + @classmethod - def from_file(cls, filename: str, run_number: int, continue_training: bool = False, seed: int = None, + def from_file(cls, filename: str, run_number: int = 1, continue_training: bool = False, seed: int = None, device: str = 'cuda', params=None): """ Create model config from file """ with open(filename, 'r') as fp: @@ -209,3 +215,26 @@ def quit_banner(self) -> None: # Small UI utils def __repr__(self): return f"" + + #################################################################################################################### + # CONVENIENCE METHODS FOR SCRIPTS + def load_trained_model(self): + """ Load a latest trained model from storage """ + model = self.provide("model").instantiate() + storage = self.provide("storage") + + last_epoch_idx = storage.last_epoch_idx() + + if last_epoch_idx == 0: + raise VelInitializationException("No trained model available") + + training_info = TrainingInfo( + start_epoch_idx=last_epoch_idx, + run_name=self.run_name, + ) + + model_state, hidden_state = storage.load(training_info) + + model.load_state_dict(model_state) + + return model From e52df95a3eb664d9c4f68c667cf000633782d4df Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 22:44:16 -0700 Subject: [PATCH 011/162] Better optimizer for the task. --- .../autoencoders/mnist/mnist_cnn_autoencoder.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml b/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml index ed2b9536..6d4651dd 100644 --- a/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml +++ b/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml @@ -17,14 +17,17 @@ source: unsupervised: true +optimizer: + name: vel.optimizers.adam + lr: 1.0e-3 + + commands: train: name: vel.commands.train_command epochs: 12 log_frequency: 100 - optimizer: - name: vel.optimizers.adadelta checkpoint: metric: 'val:loss' From d3d1fe71d1e1563cfc3ae62d00f3c55371d5b7bf Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 22:48:01 -0700 Subject: [PATCH 012/162] Turned methods of Source into properties. --- vel/api/learner.py | 6 +++--- vel/api/source.py | 18 ++++++++++++++++++ vel/commands/augvis_command.py | 2 +- vel/commands/lr_find_command.py | 4 ++-- vel/commands/summary_command.py | 2 +- vel/commands/train_command.py | 2 +- vel/phase/cycle.py | 2 +- vel/phase/generic.py | 2 +- 8 files changed, 28 insertions(+), 10 deletions(-) diff --git a/vel/api/learner.py b/vel/api/learner.py index 98bb4b89..a99fbcec 100644 --- a/vel/api/learner.py +++ b/vel/api/learner.py @@ -60,9 +60,9 @@ def train_epoch(self, epoch_info, source: 'vel.api.Source', interactive=True): self.train() if interactive: - iterator = tqdm.tqdm(source.train_loader(), desc="Training", unit="iter", file=sys.stdout) + iterator = tqdm.tqdm(source.train_loader, desc="Training", unit="iter", file=sys.stdout) else: - iterator = source.train_loader() + iterator = source.train_loader for batch_idx, (data, target) in enumerate(iterator): batch_info = BatchInfo(epoch_info, batch_idx) @@ -77,7 +77,7 @@ def validation_epoch(self, epoch_info, source: 'vel.api.Source'): """ Run a single evaluation epoch """ self.eval() - iterator = tqdm.tqdm(source.val_loader(), desc="Validation", unit="iter", file=sys.stdout) + iterator = tqdm.tqdm(source.val_loader, desc="Validation", unit="iter", file=sys.stdout) with torch.no_grad(): for batch_idx, (data, target) in enumerate(iterator): diff --git a/vel/api/source.py b/vel/api/source.py index ebfffb5d..a566521a 100644 --- a/vel/api/source.py +++ b/vel/api/source.py @@ -8,26 +8,32 @@ class Source: def __init__(self): pass + @property def train_loader(self): """ PyTorch loader of training data """ raise NotImplementedError + @property def val_loader(self): """ PyTorch loader of validation data """ raise NotImplementedError + @property def train_dataset(self): """ Return the training dataset """ raise NotImplementedError + @property def val_dataset(self): """ Return the validation dataset """ raise NotImplementedError + @property def train_iterations_per_epoch(self): """ Return number of iterations per epoch """ raise NotImplementedError + @property def val_iterations_per_epoch(self): """ Return number of iterations per epoch - validation """ raise NotImplementedError @@ -45,26 +51,32 @@ def __init__(self, train_source, val_source, train_iterator, val_iterator, data_ self.data_field = data_field self.target_field = target_field + @property def train_loader(self): """ PyTorch loader of training data """ return self.train_iterator + @property def val_loader(self): """ PyTorch loader of validation data """ return self.val_iterator + @property def train_dataset(self): """ Return the training dataset """ return self.train_source + @property def val_dataset(self): """ Return the validation dataset """ return self.val_source + @property def train_iterations_per_epoch(self): """ Return number of iterations per epoch """ return len(self.train_iterator) + @property def val_iterations_per_epoch(self): """ Return number of iterations per epoch - validation """ return len(self.val_iterator) @@ -96,26 +108,32 @@ def __init__(self, train_source, val_source, num_workers, batch_size, augmentati self.val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers ) + @property def train_loader(self): """ PyTorch loader of training data """ return self._train_loader + @property def val_loader(self): """ PyTorch loader of validation data """ return self._val_loader + @property def train_dataset(self): """ Return the training dataset """ return self.train_ds + @property def val_dataset(self): """ Return the validation dataset """ return self.val_ds + @property def train_iterations_per_epoch(self): """ Return number of iterations per epoch """ return len(self._train_loader) + @property def val_iterations_per_epoch(self): """ Return number of iterations per epoch - validation """ return len(self._val_loader) diff --git a/vel/commands/augvis_command.py b/vel/commands/augvis_command.py index bd56ee6a..df4f5352 100644 --- a/vel/commands/augvis_command.py +++ b/vel/commands/augvis_command.py @@ -13,7 +13,7 @@ def __init__(self, source: Source, samples, cases): def run(self): """ Run the visualization """ - dataset = self.source.train_dataset() + dataset = self.source.train_dataset num_samples = len(dataset) fig, ax = plt.subplots(self.cases, self.samples+1) diff --git a/vel/commands/lr_find_command.py b/vel/commands/lr_find_command.py index f3a67c47..f1de1e20 100644 --- a/vel/commands/lr_find_command.py +++ b/vel/commands/lr_find_command.py @@ -75,7 +75,7 @@ def run(self): # Optimizer shoudl be created after freeze optimizer = self.optimizer_factory.instantiate(learner.model) - iterator = iter(self.source.train_loader()) + iterator = iter(self.source.train_loader) # Metrics to track through this training metrics = learner.metrics() + [AveragingNamedMetric("lr")] @@ -101,7 +101,7 @@ def run(self): try: data, target = next(iterator) except StopIteration: - iterator = iter(self.source.train_loader()) + iterator = iter(self.source.train_loader) data, target = next(iterator) learner.train_batch(batch_info, data, target) diff --git a/vel/commands/summary_command.py b/vel/commands/summary_command.py index 8da53b8a..37393b84 100644 --- a/vel/commands/summary_command.py +++ b/vel/commands/summary_command.py @@ -12,7 +12,7 @@ def run(self, *args): if self.source is None: self.model.summary() else: - x_data, y_data = next(iter(self.source.train_loader())) + x_data, y_data = next(iter(self.source.train_loader)) self.model.summary(input_size=x_data.shape[1:]) diff --git a/vel/commands/train_command.py b/vel/commands/train_command.py index 7c798263..f9b6afd2 100644 --- a/vel/commands/train_command.py +++ b/vel/commands/train_command.py @@ -49,7 +49,7 @@ def run(self): epoch_info = api.EpochInfo( training_info=training_info, global_epoch_idx=global_epoch_idx, - batches_per_epoch=self.source.train_iterations_per_epoch(), + batches_per_epoch=self.source.train_iterations_per_epoch, optimizer=optimizer ) diff --git a/vel/phase/cycle.py b/vel/phase/cycle.py index 7374bcb1..1c89915c 100644 --- a/vel/phase/cycle.py +++ b/vel/phase/cycle.py @@ -137,7 +137,7 @@ def epoch_info(self, training_info: TrainingInfo, global_idx: int, local_idx: in training_info=training_info, global_epoch_idx=global_idx, local_epoch_idx=local_idx, - batches_per_epoch=self._source.train_iterations_per_epoch(), + batches_per_epoch=self._source.train_iterations_per_epoch, optimizer=self._optimizer_instance, # Add special callback for this epoch callbacks=[self.special_callback] + training_info.callbacks diff --git a/vel/phase/generic.py b/vel/phase/generic.py index 87d0f571..25c52c1e 100644 --- a/vel/phase/generic.py +++ b/vel/phase/generic.py @@ -27,7 +27,7 @@ def epoch_info(self, training_info: TrainingInfo, global_idx: int, local_idx: in training_info=training_info, global_epoch_idx=global_idx, local_epoch_idx=local_idx, - batches_per_epoch=self._source.train_iterations_per_epoch(), + batches_per_epoch=self._source.train_iterations_per_epoch, optimizer=self._optimizer_instance ) From 17a5531151ee99abd0e17be24aecd0b4a4f9b975 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 23:23:12 -0700 Subject: [PATCH 013/162] Fixing model summary. --- vel/api/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vel/api/model.py b/vel/api/model.py index 53098d3e..9f776348 100644 --- a/vel/api/model.py +++ b/vel/api/model.py @@ -40,7 +40,7 @@ def summary(self, input_size=None, hashsummary=False): if input_size is None: print(self) print("-" * 120) - number = sum(p.numel() for p in self.model.parameters()) + number = sum(p.numel() for p in self.parameters()) print("Number of model parameters: {:,}".format(number)) print("-" * 120) else: From 89c110fbf4726244e05ba98788722f7ae17ef3c7 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 23:37:22 -0700 Subject: [PATCH 014/162] Some more changes to MNIST autoencoder. --- .../mnist/mnist_cnn_autoencoder.yaml | 3 +- .../autoencoder/mnist_cnn_autoencoder.py | 41 ++++++++++--------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml b/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml index 6d4651dd..00501f4c 100644 --- a/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml +++ b/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml @@ -6,7 +6,8 @@ model: img_rows: 28 img_cols: 28 img_channels: 1 - num_classes: 10 + channels: [8, 16, 16] + representation_length: 16 source: diff --git a/vel/models/autoencoder/mnist_cnn_autoencoder.py b/vel/models/autoencoder/mnist_cnn_autoencoder.py index 3dfd7b93..002cc576 100644 --- a/vel/models/autoencoder/mnist_cnn_autoencoder.py +++ b/vel/models/autoencoder/mnist_cnn_autoencoder.py @@ -22,39 +22,39 @@ class MnistCnnAutoencoder(SupervisedModel): Dense - output (softmax) """ - def __init__(self, img_rows, img_cols, img_channels, num_classes): + def __init__(self, img_rows, img_cols, img_channels, channels=[16, 32, 32], representation_length=32): super(MnistCnnAutoencoder, self).__init__() - self.flattened_size = (img_rows - 4) // 2 * (img_cols - 4) // 2 * 64 - layer_series = [ (3, 1, 1), (3, 1, 2), (3, 1, 2), ] + self.representation_length = representation_length self.final_width = net_util.convolutional_layer_series(img_rows, layer_series) self.final_height = net_util.convolutional_layer_series(img_cols, layer_series) + self.channels = channels self.encoder = nn.Sequential( - nn.Conv2d(in_channels=img_channels, out_channels=16, kernel_size=(3, 3), padding=1), + nn.Conv2d(in_channels=img_channels, out_channels=channels[0], kernel_size=(3, 3), padding=1), nn.ReLU(True), - nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), stride=2, padding=1), + nn.Conv2d(in_channels=channels[0], out_channels=channels[1], kernel_size=(3, 3), stride=2, padding=1), nn.ReLU(True), - nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3), stride=2, padding=1), + nn.Conv2d(in_channels=channels[1], out_channels=channels[2], kernel_size=(3, 3), stride=2, padding=1), Flatten(), - nn.Linear(self.final_width * self.final_height * 32, 32) + nn.Linear(self.final_width * self.final_height * channels[2], representation_length) ) self.decoder = nn.Sequential( - nn.Linear(32, self.final_width * self.final_height * 32), + nn.Linear(representation_length, self.final_width * self.final_height * channels[2]), nn.ReLU(True), Reshape(32, self.final_width, self.final_height), - nn.ConvTranspose2d(in_channels=32, out_channels=32, kernel_size=3, stride=2, padding=1, output_padding=1), + nn.ConvTranspose2d(in_channels=channels[2], out_channels=channels[1], kernel_size=3, stride=2, padding=1, output_padding=1), nn.ReLU(True), - nn.ConvTranspose2d(in_channels=32, out_channels=16, kernel_size=3, stride=2, padding=1, output_padding=1), + nn.ConvTranspose2d(in_channels=channels[1], out_channels=channels[0], kernel_size=3, stride=2, padding=1, output_padding=1), nn.ReLU(True), - nn.ConvTranspose2d(in_channels=16, out_channels=img_channels, kernel_size=3, padding=1), + nn.ConvTranspose2d(in_channels=channels[0], out_channels=img_channels, kernel_size=3, padding=1), ) @staticmethod @@ -63,13 +63,14 @@ def _weight_initializer(tensor): init.constant_(tensor.bias, 0.0) def reset_weights(self): - for m in self.children(): - if isinstance(m, nn.Conv2d): - self._weight_initializer(m) - elif isinstance(m, nn.ConvTranspose2d): - self._weight_initializer(m) - elif isinstance(m, nn.Linear): - self._weight_initializer(m) + pass + # for m in children: + # if isinstance(m, nn.Conv2d): + # self._weight_initializer(m) + # elif isinstance(m, nn.ConvTranspose2d): + # self._weight_initializer(m) + # elif isinstance(m, nn.Linear): + # self._weight_initializer(m) def forward(self, x): encoding = self.encoder(x) @@ -85,9 +86,9 @@ def metrics(self): return [Loss()] -def create(img_rows, img_cols, img_channels, num_classes): +def create(img_rows, img_cols, img_channels, representation_length=32): """ Vel factory function """ def instantiate(**_): - return MnistCnnAutoencoder(img_rows, img_cols, img_channels, num_classes) + return MnistCnnAutoencoder(img_rows, img_cols, img_channels, representation_length) return ModelFactory.generic(instantiate) From 95f191c2945b9f756369567a2babf869b475b1bc Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 23:39:03 -0700 Subject: [PATCH 015/162] Better weight reset for autoencoder. --- .../autoencoder/mnist_cnn_autoencoder.py | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/vel/models/autoencoder/mnist_cnn_autoencoder.py b/vel/models/autoencoder/mnist_cnn_autoencoder.py index 002cc576..f78262b4 100644 --- a/vel/models/autoencoder/mnist_cnn_autoencoder.py +++ b/vel/models/autoencoder/mnist_cnn_autoencoder.py @@ -1,3 +1,5 @@ +import itertools as it + import torch.nn as nn import torch.nn.init as init import torch.nn.functional as F @@ -50,9 +52,13 @@ def __init__(self, img_rows, img_cols, img_channels, channels=[16, 32, 32], repr nn.Linear(representation_length, self.final_width * self.final_height * channels[2]), nn.ReLU(True), Reshape(32, self.final_width, self.final_height), - nn.ConvTranspose2d(in_channels=channels[2], out_channels=channels[1], kernel_size=3, stride=2, padding=1, output_padding=1), + nn.ConvTranspose2d( + in_channels=channels[2], out_channels=channels[1], kernel_size=3, stride=2, padding=1, output_padding=1 + ), nn.ReLU(True), - nn.ConvTranspose2d(in_channels=channels[1], out_channels=channels[0], kernel_size=3, stride=2, padding=1, output_padding=1), + nn.ConvTranspose2d( + in_channels=channels[1], out_channels=channels[0], kernel_size=3, stride=2, padding=1, output_padding=1 + ), nn.ReLU(True), nn.ConvTranspose2d(in_channels=channels[0], out_channels=img_channels, kernel_size=3, padding=1), ) @@ -63,14 +69,13 @@ def _weight_initializer(tensor): init.constant_(tensor.bias, 0.0) def reset_weights(self): - pass - # for m in children: - # if isinstance(m, nn.Conv2d): - # self._weight_initializer(m) - # elif isinstance(m, nn.ConvTranspose2d): - # self._weight_initializer(m) - # elif isinstance(m, nn.Linear): - # self._weight_initializer(m) + for m in it.chain(self.encoder, self.decoder): + if isinstance(m, nn.Conv2d): + self._weight_initializer(m) + elif isinstance(m, nn.ConvTranspose2d): + self._weight_initializer(m) + elif isinstance(m, nn.Linear): + self._weight_initializer(m) def forward(self, x): encoding = self.encoder(x) From 792064a39ff080ce6bb795465b748a77fd072c9c Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 23:44:06 -0700 Subject: [PATCH 016/162] Small code changes in MNIST autoencoder. --- vel/models/autoencoder/mnist_cnn_autoencoder.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/vel/models/autoencoder/mnist_cnn_autoencoder.py b/vel/models/autoencoder/mnist_cnn_autoencoder.py index f78262b4..556a1a4b 100644 --- a/vel/models/autoencoder/mnist_cnn_autoencoder.py +++ b/vel/models/autoencoder/mnist_cnn_autoencoder.py @@ -24,9 +24,12 @@ class MnistCnnAutoencoder(SupervisedModel): Dense - output (softmax) """ - def __init__(self, img_rows, img_cols, img_channels, channels=[16, 32, 32], representation_length=32): + def __init__(self, img_rows, img_cols, img_channels, channels=None, representation_length=32): super(MnistCnnAutoencoder, self).__init__() + if channels is None: + channels = [16, 32, 32] + layer_series = [ (3, 1, 1), (3, 1, 2), @@ -91,9 +94,14 @@ def metrics(self): return [Loss()] -def create(img_rows, img_cols, img_channels, representation_length=32): +def create(img_rows, img_cols, img_channels, channels=None, representation_length=32): """ Vel factory function """ + if channels is None: + channels = [16, 32, 32] + def instantiate(**_): - return MnistCnnAutoencoder(img_rows, img_cols, img_channels, representation_length) + return MnistCnnAutoencoder( + img_rows, img_cols, img_channels, channels=channels, representation_length=representation_length + ) return ModelFactory.generic(instantiate) From 3cd40b3802bd9780aaca92e53ce5cfcd065e9874 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 7 Apr 2019 23:45:31 -0700 Subject: [PATCH 017/162] Fixing a bug in MNIST autoencoder. --- vel/models/autoencoder/mnist_cnn_autoencoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vel/models/autoencoder/mnist_cnn_autoencoder.py b/vel/models/autoencoder/mnist_cnn_autoencoder.py index 556a1a4b..ddef1d38 100644 --- a/vel/models/autoencoder/mnist_cnn_autoencoder.py +++ b/vel/models/autoencoder/mnist_cnn_autoencoder.py @@ -54,7 +54,7 @@ def __init__(self, img_rows, img_cols, img_channels, channels=None, representati self.decoder = nn.Sequential( nn.Linear(representation_length, self.final_width * self.final_height * channels[2]), nn.ReLU(True), - Reshape(32, self.final_width, self.final_height), + Reshape(channels[2], self.final_width, self.final_height), nn.ConvTranspose2d( in_channels=channels[2], out_channels=channels[1], kernel_size=3, stride=2, padding=1, output_padding=1 ), From 123fc5fe9506b47bceb9318297d4140ce15d1341 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Mon, 8 Apr 2019 21:48:31 -0700 Subject: [PATCH 018/162] Ignoring local notebooks for now. --- .gitignore | 3 + .../autoencoders/mnist/mnist_cnn_vae.yaml | 38 +++++++ .../autoencoder/mnist_cnn_autoencoder.py | 10 +- vel/models/autoencoder/mnist_cnn_vae.py | 105 ++++++++++++++++++ 4 files changed, 147 insertions(+), 9 deletions(-) create mode 100644 examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml create mode 100644 vel/models/autoencoder/mnist_cnn_vae.py diff --git a/.gitignore b/.gitignore index b0a800fd..6860e5a1 100644 --- a/.gitignore +++ b/.gitignore @@ -117,3 +117,6 @@ environment.yaml # Test cache /.pytest_cache + +# Local notebooks +/examples-notebooks diff --git a/examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml b/examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml new file mode 100644 index 00000000..74c499c9 --- /dev/null +++ b/examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml @@ -0,0 +1,38 @@ +name: 'mnist_cnn_autoenoder' + + +model: + name: vel.models.autoencoder.mnist_cnn_vae + img_rows: 28 + img_cols: 28 + img_channels: 1 + channels: [8, 16, 16] + representation_length: 16 + + +source: + name: vel.sources.vision.mnist + batch_size: 128 + normalize: False + num_workers: 4 + unsupervised: true + + +optimizer: + name: vel.optimizers.adam + lr: 1.0e-3 + + +commands: + train: + name: vel.commands.train_command + epochs: 12 + log_frequency: 100 + + + checkpoint: + metric: 'val:loss' + + + visdom: + name: vel.commands.vis_store_command diff --git a/vel/models/autoencoder/mnist_cnn_autoencoder.py b/vel/models/autoencoder/mnist_cnn_autoencoder.py index ddef1d38..79eb8432 100644 --- a/vel/models/autoencoder/mnist_cnn_autoencoder.py +++ b/vel/models/autoencoder/mnist_cnn_autoencoder.py @@ -13,15 +13,7 @@ class MnistCnnAutoencoder(SupervisedModel): """ - A simple MNIST classification model. - - Conv 3x3 - 32 - Conv 3x3 - 64 - MaxPool 2x2 - Dropout 0.25 - Flatten - Dense - 128 - Dense - output (softmax) + A simple MNIST autoencoder, containing 3 convolutional layers. """ def __init__(self, img_rows, img_cols, img_channels, channels=None, representation_length=32): diff --git a/vel/models/autoencoder/mnist_cnn_vae.py b/vel/models/autoencoder/mnist_cnn_vae.py new file mode 100644 index 00000000..6745e6d4 --- /dev/null +++ b/vel/models/autoencoder/mnist_cnn_vae.py @@ -0,0 +1,105 @@ +import itertools as it + +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F + +import vel.util.network as net_util + +from vel.api import SupervisedModel, ModelFactory +from vel.metrics.loss_metric import Loss +from vel.modules.layers import Flatten, Reshape + + +class MnistCnnAutoencoder(SupervisedModel): + """ + A simple MNIST variational autoencoder, containing 3 convolutional layers. + """ + + def __init__(self, img_rows, img_cols, img_channels, channels=None, representation_length=32): + super(MnistCnnAutoencoder, self).__init__() + + assert representation_length % 2 == 0, "Representation length must be even" + + if channels is None: + channels = [16, 32, 32] + + layer_series = [ + (3, 1, 1), + (3, 1, 2), + (3, 1, 2), + ] + + self.representation_length = representation_length + self.final_width = net_util.convolutional_layer_series(img_rows, layer_series) + self.final_height = net_util.convolutional_layer_series(img_cols, layer_series) + self.channels = channels + + self.encoder = nn.Sequential( + nn.Conv2d(in_channels=img_channels, out_channels=channels[0], kernel_size=(3, 3), padding=1), + nn.ReLU(True), + nn.Conv2d(in_channels=channels[0], out_channels=channels[1], kernel_size=(3, 3), stride=2, padding=1), + nn.ReLU(True), + nn.Conv2d(in_channels=channels[1], out_channels=channels[2], kernel_size=(3, 3), stride=2, padding=1), + Flatten(), + nn.Linear(self.final_width * self.final_height * channels[2], representation_length) + ) + + self.decoder = nn.Sequential( + nn.Linear(representation_length, self.final_width * self.final_height * channels[2]), + nn.ReLU(True), + Reshape(channels[2], self.final_width, self.final_height), + nn.ConvTranspose2d( + in_channels=channels[2], out_channels=channels[1], kernel_size=3, stride=2, padding=1, output_padding=1 + ), + nn.ReLU(True), + nn.ConvTranspose2d( + in_channels=channels[1], out_channels=channels[0], kernel_size=3, stride=2, padding=1, output_padding=1 + ), + nn.ReLU(True), + nn.ConvTranspose2d(in_channels=channels[0], out_channels=img_channels, kernel_size=3, padding=1), + ) + + @staticmethod + def _weight_initializer(tensor): + init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('relu')) + init.constant_(tensor.bias, 0.0) + + def reset_weights(self): + for m in it.chain(self.encoder, self.decoder): + if isinstance(m, nn.Conv2d): + self._weight_initializer(m) + elif isinstance(m, nn.ConvTranspose2d): + self._weight_initializer(m) + elif isinstance(m, nn.Linear): + self._weight_initializer(m) + + def forward(self, x): + encoding = self.encoder(x) + decoded = self.decoder(encoding) + + return { + 'result': decoded, + 'encoding': encoding + } + + def loss_value(self, x_data, y_true, y_pred): + """ Calculate a value of loss function """ + return F.mse_loss(y_pred, y_true) + + def metrics(self): + """ Set of metrics for this model """ + return [Loss()] + + +def create(img_rows, img_cols, img_channels, channels=None, representation_length=32): + """ Vel factory function """ + if channels is None: + channels = [16, 32, 32] + + def instantiate(**_): + return MnistCnnAutoencoder( + img_rows, img_cols, img_channels, channels=channels, representation_length=representation_length + ) + + return ModelFactory.generic(instantiate) From 846fd4d1a10c6941864db558c9100e735d1fe574 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Mon, 8 Apr 2019 22:34:39 -0700 Subject: [PATCH 019/162] Reducing number of parameters. --- vel/api/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vel/api/model.py b/vel/api/model.py index 9f776348..3d2029ca 100644 --- a/vel/api/model.py +++ b/vel/api/model.py @@ -39,10 +39,10 @@ def summary(self, input_size=None, hashsummary=False): if input_size is None: print(self) - print("-" * 120) + print("-" * 100) number = sum(p.numel() for p in self.parameters()) print("Number of model parameters: {:,}".format(number)) - print("-" * 120) + print("-" * 100) else: summary(self, input_size) From f62830055a255085412cda8cecdd87f224847dc3 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Tue, 9 Apr 2019 08:15:10 -0700 Subject: [PATCH 020/162] Implemented Variational autoencoder. --- README.md | 4 + .../autoencoders/mnist/mnist_cnn_vae.yaml | 2 +- vel/api/__init__.py | 3 +- vel/api/learner.py | 16 ++-- vel/api/model.py | 77 ++++++++++++------- vel/metrics/loss_metric.py | 2 +- .../autoencoder/mnist_cnn_autoencoder.py | 10 ++- vel/models/autoencoder/mnist_cnn_vae.py | 77 ++++++++++++++++--- vel/models/imagenet/resnet34.py | 4 +- .../multilayer_rnn_sequence_classification.py | 4 +- vel/models/vision/cifar10_cnn_01.py | 4 +- vel/models/vision/cifar_resnet_v1.py | 4 +- vel/models/vision/cifar_resnet_v2.py | 4 +- vel/models/vision/cifar_resnext.py | 15 +--- vel/models/vision/mnist_cnn_01.py | 5 +- vel/notebook/__init__.py | 2 +- vel/notebook/defaults.py | 2 +- vel/notebook/loader.py | 10 ++- 18 files changed, 160 insertions(+), 85 deletions(-) diff --git a/README.md b/README.md index b418f9b6..597aa4e8 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,10 @@ that are ready to run and easy to modify for other similar usecases: - Distributional Q-Learning - Noisy Networks for Exploration - Rainbow (combination of the above) + +# Implemented models - Unsupervised learning + +- Autoencoders and Variational autoencoders with an examples on MNIST dataset. # Examples diff --git a/examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml b/examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml index 74c499c9..14224ef0 100644 --- a/examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml +++ b/examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml @@ -20,7 +20,7 @@ source: optimizer: name: vel.optimizers.adam - lr: 1.0e-3 + lr: 1.0e-4 commands: diff --git a/vel/api/__init__.py b/vel/api/__init__.py index 411a6eaa..eeaadd26 100644 --- a/vel/api/__init__.py +++ b/vel/api/__init__.py @@ -2,7 +2,8 @@ from .info import BatchInfo, EpochInfo, TrainingInfo from .learner import Learner from .model import ( - Model, BackboneModel, LinearBackboneModel, SupervisedModel, RnnLinearBackboneModel, RnnModel, RnnSupervisedModel + Model, SupervisedModel, LossFunctionModel, + BackboneModel, LinearBackboneModel, RnnLinearBackboneModel, RnnModel, RnnSupervisedModel ) from .model_factory import ModelFactory from .optimizer import OptimizerFactory diff --git a/vel/api/learner.py b/vel/api/learner.py index a99fbcec..6ebb1949 100644 --- a/vel/api/learner.py +++ b/vel/api/learner.py @@ -3,12 +3,13 @@ import tqdm import typing +from .model import SupervisedModel from .info import BatchInfo, EpochInfo, TrainingInfo class Learner: """ Manages training process of a single model """ - def __init__(self, device: torch.device, model, max_grad_norm: typing.Optional[float]=None): + def __init__(self, device: torch.device, model: SupervisedModel, max_grad_norm: typing.Optional[float]=None): self.device = device self.model = model.to(device) self.max_grad_norm = max_grad_norm @@ -90,21 +91,14 @@ def validation_epoch(self, epoch_info, source: 'vel.api.Source'): def feed_batch(self, batch_info, data, target): """ Run single batch of data """ data, target = data.to(self.device), target.to(self.device) - output, loss = self.model.loss(data, target) + metrics = self.model.calculate_gradient(data, target) - # Store extra batch information for calculation of the statistics - batch_info['data'] = data - batch_info['target'] = target - batch_info['output'] = output - batch_info['loss'] = loss - - return loss + batch_info.update(metrics) def train_batch(self, batch_info, data, target): """ Train single batch of data """ batch_info.optimizer.zero_grad() - loss = self.feed_batch(batch_info, data, target) - loss.backward() + self.feed_batch(batch_info, data, target) if self.max_grad_norm is not None: batch_info['grad_norm'] = torch.nn.utils.clip_grad_norm_( diff --git a/vel/api/model.py b/vel/api/model.py index 3d2029ca..1060170b 100644 --- a/vel/api/model.py +++ b/vel/api/model.py @@ -13,7 +13,7 @@ class Model(nn.Module): def metrics(self) -> list: """ Set of metrics for this model """ - return [Loss()] + return [] def train(self, mode=True): r""" @@ -75,6 +75,54 @@ def is_recurrent(self) -> bool: return False +class SupervisedModel(Model): + """ Model for a supervised learning problem """ + + def calculate_gradient(self, x_data, y_true): + raise NotImplementedError + + +class LossFunctionModel(SupervisedModel): + """ Model for a supervised learning with a simple loss function """ + + def metrics(self) -> list: + """ Set of metrics for this model """ + return [Loss()] + + def calculate_gradient(self, x_data, y_true): + y_pred = self(x_data) + loss_value = self.loss_value(x_data, y_true, y_pred) + + if self.training: + loss_value.backward() + + return { + 'loss': loss_value.item(), + 'data': x_data, + 'target': y_true, + 'output': y_pred + } + + def loss_value(self, x_data, y_true, y_pred): + """ Calculate a value of loss function """ + raise NotImplementedError + + +class BackboneModel(Model): + """ Model that serves as a backbone network to connect your heads to """ + + +class LinearBackboneModel(BackboneModel): + """ + Model that serves as a backbone network to connect your heads to - one that spits out a single-dimension output + """ + + @property + def output_dim(self) -> int: + """ Final dimension of model output """ + raise NotImplementedError + + class RnnModel(Model): """ Class representing recurrent model """ @@ -93,10 +141,6 @@ def zero_state(self, batch_size): return torch.zeros(batch_size, self.state_dim) -class BackboneModel(Model): - """ Model that serves as a backbone network to connect your heads to """ - - class RnnLinearBackboneModel(BackboneModel): """ Model that serves as a backbone network to connect your heads to - @@ -123,29 +167,6 @@ def zero_state(self, batch_size): return torch.zeros(batch_size, self.state_dim, dtype=torch.float32) -class LinearBackboneModel(BackboneModel): - """ - Model that serves as a backbone network to connect your heads to - one that spits out a single-dimension output - """ - - @property - def output_dim(self) -> int: - """ Final dimension of model output """ - raise NotImplementedError - - -class SupervisedModel(Model): - """ Model for a supervised learning problem """ - def loss(self, x_data, y_true): - """ Forward propagate network and return a value of loss function """ - y_pred = self(x_data) - return y_pred, self.loss_value(x_data, y_true, y_pred) - - def loss_value(self, x_data, y_true, y_pred): - """ Calculate a value of loss function """ - raise NotImplementedError - - class RnnSupervisedModel(RnnModel): """ Model for a supervised learning problem """ diff --git a/vel/metrics/loss_metric.py b/vel/metrics/loss_metric.py index d241a393..8de3707d 100644 --- a/vel/metrics/loss_metric.py +++ b/vel/metrics/loss_metric.py @@ -8,4 +8,4 @@ def __init__(self): def _value_function(self, batch_info): """ Just forward a value of the loss""" - return batch_info['loss'].item() + return batch_info['loss'] diff --git a/vel/models/autoencoder/mnist_cnn_autoencoder.py b/vel/models/autoencoder/mnist_cnn_autoencoder.py index 79eb8432..1ad5536f 100644 --- a/vel/models/autoencoder/mnist_cnn_autoencoder.py +++ b/vel/models/autoencoder/mnist_cnn_autoencoder.py @@ -6,12 +6,12 @@ import vel.util.network as net_util -from vel.api import SupervisedModel, ModelFactory +from vel.api import LossFunctionModel, ModelFactory from vel.metrics.loss_metric import Loss from vel.modules.layers import Flatten, Reshape -class MnistCnnAutoencoder(SupervisedModel): +class MnistCnnAutoencoder(LossFunctionModel): """ A simple MNIST autoencoder, containing 3 convolutional layers. """ @@ -77,6 +77,12 @@ def forward(self, x): decoded = self.decoder(encoding) return decoded + def encode(self, sample): + return self.encoder(sample) + + def decode(self, sample): + return self.decoder(sample) + def loss_value(self, x_data, y_true, y_pred): """ Calculate a value of loss function """ return F.mse_loss(y_pred, y_true) diff --git a/vel/models/autoencoder/mnist_cnn_vae.py b/vel/models/autoencoder/mnist_cnn_vae.py index 6745e6d4..0cdfadde 100644 --- a/vel/models/autoencoder/mnist_cnn_vae.py +++ b/vel/models/autoencoder/mnist_cnn_vae.py @@ -1,5 +1,6 @@ import itertools as it +import torch import torch.nn as nn import torch.nn.init as init import torch.nn.functional as F @@ -7,17 +8,18 @@ import vel.util.network as net_util from vel.api import SupervisedModel, ModelFactory +from vel.api.metrics import AveragingNamedMetric from vel.metrics.loss_metric import Loss from vel.modules.layers import Flatten, Reshape -class MnistCnnAutoencoder(SupervisedModel): +class MnistCnnVAE(SupervisedModel): """ A simple MNIST variational autoencoder, containing 3 convolutional layers. """ def __init__(self, img_rows, img_cols, img_channels, channels=None, representation_length=32): - super(MnistCnnAutoencoder, self).__init__() + super(MnistCnnVAE, self).__init__() assert representation_length % 2 == 0, "Representation length must be even" @@ -31,6 +33,7 @@ def __init__(self, img_rows, img_cols, img_channels, channels=None, representati ] self.representation_length = representation_length + self.final_width = net_util.convolutional_layer_series(img_rows, layer_series) self.final_height = net_util.convolutional_layer_series(img_cols, layer_series) self.channels = channels @@ -42,7 +45,7 @@ def __init__(self, img_rows, img_cols, img_channels, channels=None, representati nn.ReLU(True), nn.Conv2d(in_channels=channels[1], out_channels=channels[2], kernel_size=(3, 3), stride=2, padding=1), Flatten(), - nn.Linear(self.final_width * self.final_height * channels[2], representation_length) + nn.Linear(self.final_width * self.final_height * channels[2], representation_length * 2) ) self.decoder = nn.Sequential( @@ -58,6 +61,7 @@ def __init__(self, img_rows, img_cols, img_channels, channels=None, representati ), nn.ReLU(True), nn.ConvTranspose2d(in_channels=channels[0], out_channels=img_channels, kernel_size=3, padding=1), + nn.Sigmoid() ) @staticmethod @@ -74,22 +78,71 @@ def reset_weights(self): elif isinstance(m, nn.Linear): self._weight_initializer(m) - def forward(self, x): - encoding = self.encoder(x) - decoded = self.decoder(encoding) + def encode(self, sample): + encoding = self.encoder(sample) + + mu = encoding[:, :self.representation_length] + # I encode std directly as a softplus, rather than exp(logstd) + std = F.softplus(encoding[:, self.representation_length:]) + + return mu + torch.randn_like(std) * std + + def decode(self, sample): + return self.decoder(sample) + + def forward(self, sample): + encoding = self.encoder(sample) + + mu = encoding[:, :self.representation_length] + # I encode std directly as a softplus, rather than exp(logstd) + std = F.softplus(encoding[:, self.representation_length:]) + + z = mu + torch.randn_like(std) * std + + decoded = self.decoder(z) return { - 'result': decoded, - 'encoding': encoding + 'decoded': decoded, + 'encoding': z, + 'mu': mu, + 'std': std } - def loss_value(self, x_data, y_true, y_pred): + def calculate_gradient(self, x_data, y_true): """ Calculate a value of loss function """ - return F.mse_loss(y_pred, y_true) + output = self(x_data) + + y_pred = output['decoded'] + + mu = output['mu'] + std = output['std'] + var = std ** 2 + + kl_divergence = - 0.5 * (1 + torch.log(var) - mu ** 2 - var).sum(dim=1) + kl_divergence = kl_divergence.mean() + + # reconstruction = 0.5 * F.mse_loss(y_pred, y_true) + + # We must sum over all image axis and average only on minibatch axis + reconstruction = F.binary_cross_entropy(y_pred, y_true, reduce=False).sum(1).sum(1).sum(1).mean() + loss = reconstruction + kl_divergence + + if self.training: + loss.backward() + + return { + 'loss': loss.item(), + 'reconstruction': reconstruction.item(), + 'kl_divergence': kl_divergence.item() + } def metrics(self): """ Set of metrics for this model """ - return [Loss()] + return [ + Loss(), + AveragingNamedMetric('reconstruction'), + AveragingNamedMetric('kl_divergence') + ] def create(img_rows, img_cols, img_channels, channels=None, representation_length=32): @@ -98,7 +151,7 @@ def create(img_rows, img_cols, img_channels, channels=None, representation_lengt channels = [16, 32, 32] def instantiate(**_): - return MnistCnnAutoencoder( + return MnistCnnVAE( img_rows, img_cols, img_channels, channels=channels, representation_length=representation_length ) diff --git a/vel/models/imagenet/resnet34.py b/vel/models/imagenet/resnet34.py index e5a5a97c..fc819a2a 100644 --- a/vel/models/imagenet/resnet34.py +++ b/vel/models/imagenet/resnet34.py @@ -5,14 +5,14 @@ import vel.modules.layers as l import vel.util.module_util as mu -from vel.api import SupervisedModel, ModelFactory +from vel.api import LossFunctionModel, ModelFactory # Because of concat pooling it's 2x 512 NET_OUTPUT = 1024 -class Resnet34(SupervisedModel): +class Resnet34(LossFunctionModel): """ Resnet34 network model """ def __init__(self, fc_layers=None, dropout=None, pretrained=True): diff --git a/vel/models/rnn/multilayer_rnn_sequence_classification.py b/vel/models/rnn/multilayer_rnn_sequence_classification.py index 8542e119..0d3e7439 100644 --- a/vel/models/rnn/multilayer_rnn_sequence_classification.py +++ b/vel/models/rnn/multilayer_rnn_sequence_classification.py @@ -4,13 +4,13 @@ import torch.nn.functional as F import torch.nn as nn -from vel.api import SupervisedModel, ModelFactory, LinearBackboneModel +from vel.api import LossFunctionModel, ModelFactory, LinearBackboneModel from vel.metrics.accuracy import Accuracy from vel.metrics.loss_metric import Loss from vel.modules.rnn_layer import RnnLayer -class MultilayerRnnSequenceClassification(SupervisedModel): +class MultilayerRnnSequenceClassification(LossFunctionModel): """ Multilayer GRU network for sequence modeling (n:1) """ def __init__(self, input_block: LinearBackboneModel, rnn_type: str, output_dim: int, diff --git a/vel/models/vision/cifar10_cnn_01.py b/vel/models/vision/cifar10_cnn_01.py index 10eeafe8..50dc1328 100644 --- a/vel/models/vision/cifar10_cnn_01.py +++ b/vel/models/vision/cifar10_cnn_01.py @@ -8,12 +8,12 @@ import torch.nn.init as init import torch.nn.functional as F -from vel.api import SupervisedModel, ModelFactory +from vel.api import LossFunctionModel, ModelFactory from vel.metrics.loss_metric import Loss from vel.metrics.accuracy import Accuracy -class Net(SupervisedModel): +class Net(LossFunctionModel): """ A simple MNIST classification model. diff --git a/vel/models/vision/cifar_resnet_v1.py b/vel/models/vision/cifar_resnet_v1.py index 5b638710..fef562c8 100644 --- a/vel/models/vision/cifar_resnet_v1.py +++ b/vel/models/vision/cifar_resnet_v1.py @@ -6,11 +6,11 @@ import torch.nn as nn import torch.nn.functional as F -from vel.api import SupervisedModel, ModelFactory +from vel.api import LossFunctionModel, ModelFactory from vel.modules.resnet_v1 import Bottleneck, BasicBlock -class ResNetV1(SupervisedModel): +class ResNetV1(LossFunctionModel): """ A ResNet V1 model as defined in the literature """ def __init__(self, block, layers, inplanes, divisor=4, img_channels=3, num_classes=1000): diff --git a/vel/models/vision/cifar_resnet_v2.py b/vel/models/vision/cifar_resnet_v2.py index bd430af8..3bc03b52 100644 --- a/vel/models/vision/cifar_resnet_v2.py +++ b/vel/models/vision/cifar_resnet_v2.py @@ -6,11 +6,11 @@ import torch.nn as nn import torch.nn.functional as F -from vel.api import SupervisedModel, ModelFactory +from vel.api import LossFunctionModel, ModelFactory from vel.modules.resnet_v2 import Bottleneck, BasicBlock -class ResNetV2(SupervisedModel): +class ResNetV2(LossFunctionModel): """ A ResNet V2 (pre-activation resnet) model as defined in the literature """ def __init__(self, block, layers, inplanes, divisor=4, img_channels=3, num_classes=1000): diff --git a/vel/models/vision/cifar_resnext.py b/vel/models/vision/cifar_resnext.py index 9ce14b4a..ffa0fc76 100644 --- a/vel/models/vision/cifar_resnext.py +++ b/vel/models/vision/cifar_resnext.py @@ -6,14 +6,15 @@ import torch.nn as nn import torch.nn.functional as F -from vel.api import SupervisedModel, ModelFactory +from vel.api import LossFunctionModel, ModelFactory from vel.modules.resnext import ResNeXtBottleneck -class ResNeXt(SupervisedModel): +class ResNeXt(LossFunctionModel): """ A ResNext model as defined in the literature """ - def __init__(self, block, layers, inplanes, image_features, cardinality=4, divisor=4, img_channels=3, num_classes=1000): + def __init__(self, block, layers, inplanes, image_features, cardinality=4, divisor=4, img_channels=3, + num_classes=1000): super().__init__() self.num_classess = num_classes @@ -77,14 +78,6 @@ def metrics(self): from vel.metrics.accuracy import Accuracy return [Loss(), Accuracy()] - def summary(self): - """ Print model summary """ - # import torchsummary - - print(self) - # self.eval() - # torchsummary.summary(self, input_size=(3, 32, 32)) - def create(blocks, mode='basic', inplanes=64, cardinality=4, image_features=64, divisor=4, num_classes=1000): """ Vel factory function """ diff --git a/vel/models/vision/mnist_cnn_01.py b/vel/models/vision/mnist_cnn_01.py index 05b61c09..08472f4d 100644 --- a/vel/models/vision/mnist_cnn_01.py +++ b/vel/models/vision/mnist_cnn_01.py @@ -8,12 +8,13 @@ import torch.nn.init as init import torch.nn.functional as F -from vel.api import SupervisedModel, ModelFactory + +from vel.api import LossFunctionModel, ModelFactory from vel.metrics.loss_metric import Loss from vel.metrics.accuracy import Accuracy -class Net(SupervisedModel): +class Net(LossFunctionModel): """ A simple MNIST classification model. diff --git a/vel/notebook/__init__.py b/vel/notebook/__init__.py index b29639c0..3b31f630 100644 --- a/vel/notebook/__init__.py +++ b/vel/notebook/__init__.py @@ -1,2 +1,2 @@ -from .loader import load +from .loader import load_config from .defaults import reasonable_notbook_defaults diff --git a/vel/notebook/defaults.py b/vel/notebook/defaults.py index 044d9446..3781ad0b 100644 --- a/vel/notebook/defaults.py +++ b/vel/notebook/defaults.py @@ -3,4 +3,4 @@ def reasonable_notbook_defaults(): """ Notbook defaults """ import matplotlib.pyplot as plt - plt.rcParams['figure.figsize'] = [10, 5] + plt.rcParams['figure.figsize'] = [15, 8] diff --git a/vel/notebook/loader.py b/vel/notebook/loader.py index 9edc5e1d..55bd13fc 100644 --- a/vel/notebook/loader.py +++ b/vel/notebook/loader.py @@ -1,8 +1,10 @@ from vel.api import ModelConfig -def load(config_path, run_number=0, device='cuda:0'): +def load_config(config_path, run_number=0, device='cuda:0'): """ Load a ModelConfig from filename """ - model_config = ModelConfig.from_file(config_path, run_number, device=device) - - return model_config + return ModelConfig.from_file( + ModelConfig.from_project_directory(config_path), + run_number=run_number, + device=device + ) From 3e5069c1ed41e1507d0afd24ff5f7796245df007 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Tue, 9 Apr 2019 08:21:01 -0700 Subject: [PATCH 021/162] Adding example notebooks. --- .gitignore | 3 - .../mnist/mnist-autoencoder.ipynb | 288 ++++++++++++++++ .../autoencoders/mnist/mnist-vae.ipynb | 310 ++++++++++++++++++ 3 files changed, 598 insertions(+), 3 deletions(-) create mode 100644 examples-notebooks/autoencoders/mnist/mnist-autoencoder.ipynb create mode 100644 examples-notebooks/autoencoders/mnist/mnist-vae.ipynb diff --git a/.gitignore b/.gitignore index 6860e5a1..b0a800fd 100644 --- a/.gitignore +++ b/.gitignore @@ -117,6 +117,3 @@ environment.yaml # Test cache /.pytest_cache - -# Local notebooks -/examples-notebooks diff --git a/examples-notebooks/autoencoders/mnist/mnist-autoencoder.ipynb b/examples-notebooks/autoencoders/mnist/mnist-autoencoder.ipynb new file mode 100644 index 00000000..ad7a6b1e --- /dev/null +++ b/examples-notebooks/autoencoders/mnist/mnist-autoencoder.ipynb @@ -0,0 +1,288 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import torch\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import vel\n", + "import vel.notebook as nb\n", + "nb.reasonable_notbook_defaults()\n", + "torch.set_grad_enabled(False) # We don't need autograd here\n", + "None" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "config = nb.load_config('examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml', run_number=4, device='cpu')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Setting up a new session...\n" + ] + } + ], + "source": [ + "model = config.load_trained_model()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MnistCnnAutoencoder(\n", + " (encoder): Sequential(\n", + " (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (1): ReLU(inplace)\n", + " (2): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", + " (3): ReLU(inplace)\n", + " (4): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", + " (5): Flatten()\n", + " (6): Linear(in_features=784, out_features=16, bias=True)\n", + " )\n", + " (decoder): Sequential(\n", + " (0): Linear(in_features=16, out_features=784, bias=True)\n", + " (1): ReLU(inplace)\n", + " (2): Reshape()\n", + " (3): ConvTranspose2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))\n", + " (4): ReLU(inplace)\n", + " (5): ConvTranspose2d(16, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))\n", + " (6): ReLU(inplace)\n", + " (7): ConvTranspose2d(8, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " )\n", + ")\n", + "----------------------------------------------------------------------------------------------------\n", + "Number of model parameters: 33,009\n", + "----------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "data_source = config.provide('source')\n", + "train_dataset = data_source.train_dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def get_sample(idx):\n", + " return train_dataset[idx][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def show_image(axis, sample):\n", + " axis.imshow(train_dataset.denormalize(sample)[:, :, 0], cmap='gray')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Browse examples\n", + "fig, axes = plt.subplots(1, 5)\n", + "\n", + "for index in range(5):\n", + " show_image(axes[index], get_sample(index))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Browse examples\n", + "fig, axes = plt.subplots(2, 5)\n", + "\n", + "for index in range(5):\n", + " sample = get_sample(index)\n", + " decoded = model(sample[None])[0]\n", + " show_image(axes[0, index], sample)\n", + " show_image(axes[1, index], decoded)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "samples = torch.randn(5, model.representation_length)\n", + "\n", + "fig, axes = plt.subplots(1, 5)\n", + "\n", + "for index in range(5):\n", + " decoded = model.decoder(samples[index][None])[0].detach()\n", + " show_image(axes[index], decoded)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[-0.9777, -1.3779, -0.6812, 2.4773, 2.3612, 0.1038, 0.8307, -2.4117,\n", + " -1.0913, 1.0372, -2.3588, -0.2581, -1.2573, 0.8061, -1.3952, 2.1415]])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " model.encode(get_sample(0)[None])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "idx1 = 1\n", + "idx2 = 5\n", + "\n", + "N = 10\n", + "\n", + "encoding1 = model.encode(get_sample(idx1)[None])\n", + "encoding2 = model.encoder(get_sample(idx2)[None])\n", + "\n", + "fig, axes = plt.subplots(1, N)\n", + "\n", + "for i in range(10):\n", + " beta = float(i) / float(N - 1)\n", + " alpha = 1.0 - beta\n", + " \n", + " combined = model.decoder(encoding1 * alpha + encoding2 * beta)[0]\n", + " \n", + " show_image(axes[i], combined)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples-notebooks/autoencoders/mnist/mnist-vae.ipynb b/examples-notebooks/autoencoders/mnist/mnist-vae.ipynb new file mode 100644 index 00000000..4a00f5da --- /dev/null +++ b/examples-notebooks/autoencoders/mnist/mnist-vae.ipynb @@ -0,0 +1,310 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import torch\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import vel\n", + "import vel.notebook as nb\n", + "nb.reasonable_notbook_defaults()\n", + "torch.set_grad_enabled(False) # We don't need autograd here\n", + "None" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "config = nb.load_config('examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml', run_number=2, device='cpu')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Setting up a new session...\n" + ] + } + ], + "source": [ + "model = config.load_trained_model()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MnistCnnVAE(\n", + " (encoder): Sequential(\n", + " (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (1): ReLU(inplace)\n", + " (2): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", + " (3): ReLU(inplace)\n", + " (4): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", + " (5): Flatten()\n", + " (6): Linear(in_features=784, out_features=32, bias=True)\n", + " )\n", + " (decoder): Sequential(\n", + " (0): Linear(in_features=16, out_features=784, bias=True)\n", + " (1): ReLU(inplace)\n", + " (2): Reshape()\n", + " (3): ConvTranspose2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))\n", + " (4): ReLU(inplace)\n", + " (5): ConvTranspose2d(16, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))\n", + " (6): ReLU(inplace)\n", + " (7): ConvTranspose2d(8, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (8): Sigmoid()\n", + " )\n", + ")\n", + "----------------------------------------------------------------------------------------------------\n", + "Number of model parameters: 45,569\n", + "----------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "data_source = config.provide('source')\n", + "train_dataset = data_source.train_dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def get_sample(idx):\n", + " return train_dataset[idx][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def show_image(axis, sample):\n", + " axis.imshow(train_dataset.denormalize(sample)[:, :, 0], cmap='gray')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA2oAAACzCAYAAAD48u9xAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHTJJREFUeJzt3XuQVdX55vHnRYOXOKiIQUqjmARNaQrbiEocSkhA4xgTUROVUhFjiRVvJKUUxhCHjMEQRafEaOIlgBdGtIJENOOoI15iFAokJj9FETXRH9jBK3LRyChr/uCkgr6r6d3nutbu76eKovth77PXPv3QfVbv3qsthCAAAAAAQDp6tHoAAAAAAIBPYqIGAAAAAIlhogYAAAAAiWGiBgAAAACJYaIGAAAAAIlhogYAAAAAiWGiBgAAAACJYaIGAAAAAImpaaJmZkeZ2TIze8nMLq7XoIBGobPIEb1FbugsckNnkSILIVS3o9lWkl6UdISkFZIWSRoVQli6hX2qOxhQEUKwavels2iFWjordb23dBZ18FYIYddqd6azaIGmdrayD71FTYq8Pqjlitohkl4KIbwSQtggabakY2t4PKDR6CxyRG/RbK/WuD+dRbPRWZRSLRO13SX952bvr6hkQKroLHJEb5EbOovc0FkkaetGH8DMxkoa2+jjAPVCZ5EbOovc0FnkiN6i2WqZqK2U9PnN3t+jkn1CCOFGSTdK/DwvWo7OIked9pbOIjF0Frnh9QGSVMuPPi6SNMDM9jaznpJOljSvPsMCGoLOIkf0Frmhs8gNnUWSqr6iFkL4yMzOk/SApK0kTQ8hPFe3kQF1RmeRI3qL3NBZ5IbOIlVVL89f1cG4TIwa1brUeVfRWdSKziJDT4cQBjXrYHQWddDUzkr0FrVr9PL8AAAAAIAGYKIGAAAAAIlhogYAAAAAiWGiBgAAAACJYaIGAAAAAIlhogYAAAAAiWGiBgAAAACJYaIGAAAAAIlhogYAAAAAiWGiBgAAAACJYaIGAAAAAIlhogYAAAAAiWGiBgAAAACJ2brVAwBQPgcddJDLzjvvPJeNHj3aZbfeeqvLrr322uhxlixZUsXoAAAA0scVNQAAAABIDBM1AAAAAEgMEzUAAAAASExN96iZ2d8lrZX0saSPQgiD6jEooJHoLXJDZ5EbOovc0FmkyEII1e+8qdSDQghvFdy++oNlbKuttnLZjjvuWNNjxhZm2H777V227777uuzcc8+NPubUqVNdNmrUKJf985//dNmUKVNc9rOf/Sx6nFqEEKzWx+hKb7trZ4tqa2uL5vPnz3dZr169qj7Oe++9F8132WWXqh+zWegsNjd8+HCXzZo1y2VDhw512bJlyxoypoina32RSmfTNnHiRJd19DW7Rw//w1fDhg1z2WOPPVbzuGrQ1M5Wtqe3qEmR1wf86CMAAAAAJKbWiVqQ9KCZPW1mY+sxIKAJ6C1yQ2eRGzqL3NBZJKfW36M2JISw0sw+J+khM3shhPD45htUyk7hkZIt9pbOIkF0Frmhs8gNr2mRnJquqIUQVlb+fkPSXEmHRLa5MYQwiJsykYrOektnkRo6i9zQWeSG17RIUdVX1Mzss5J6hBDWVt4+UtL/qNvIWmTPPfeM5j179nTZYYcd5rIhQ4a4bKeddnLZCSecUMXoum7FihUumzZtWnTb4447zmVr16512V/+8heXtfgm4sLK2ttmOOQQ9zVLc+bMiW4bWywntnBRrF8bNmxwWUeLhgwePNhlS5YsKfSYuUihs4cffrjLYh+TuXPnNmM4WTv44INdtmjRohaMpHFS6Cz+bcyYMS6bMGGCyzZu3Fj4MWtZiC5FdBapquVHH/tKmmtm/3qc/xVC+D91GRXQOPQWuaGzyA2dRW7oLJJU9UQthPCKpAPqOBag4egtckNnkRs6i9zQWaSK5fkBAAAAIDFM1AAAAAAgMbUuz5+1trY2l82fPz+6bWxxhNTEbgSeOHGiy9atWxfdf9asWS5rb2932bvvvuuyZcuWFRkiErT99tu77Ktf/arLbr/9dpf169evpmMvX77cZVdccYXLZs+eHd3/T3/6k8tinf/FL35RxejwL8OGDXPZgAEDXMZiIv/Wo0f8+6B77723y/baay+XVe6VAWoW69e2227bgpGgDA499FCXnXrqqS4bOnRodP/999+/0HEuuugil73++usuiy3iJ8VfsyxcuLDQsVPCFTUAAAAASAwTNQAAAABIDBM1AAAAAEgMEzUAAAAASEy3Xkzktddec9nbb78d3bYZi4l0dJPj6tWrXfb1r3/dZRs2bHDZbbfdVvvAUGo33HCDy0aNGtWUY8cWLdlhhx1c9thjj0X3jy1yMXDgwJrHhU8aPXq0y5566qkWjCQfHS20c9ZZZ7ksdtP7Cy+8UPcxofxGjBjhsvPPP7/Qvh117phjjnHZqlWrujYwZOmkk05y2TXXXOOyPn36uKyjBZEeffRRl+26664uu/LKKwuMsOPjxB7z5JNPLvSYKeGKGgAAAAAkhokaAAAAACSGiRoAAAAAJIaJGgAAAAAkhokaAAAAACSmW6/6+M4777hs/Pjx0W1jqx79+c9/dtm0adMKHfuZZ55x2RFHHBHddv369S7bf//9XTZu3LhCx0b3ddBBB7nsW9/6lss6WkXp0zpajfHee+912dSpU132+uuvuyz2/+rdd9+NHucb3/iGy4qOHcX16MH39Lrq5ptvLrzt8uXLGzgSlNWQIUNcNmPGDJcVXbW6o1X2Xn311a4NDMnbemv/8n/QoEEuu+mmm1y2/fbbu+zxxx932WWXXRY99hNPPOGybbbZxmV33XWXy4488sjoY8YsXry48LYp46svAAAAACSGiRoAAAAAJIaJGgAAAAAkptOJmplNN7M3zOzZzbLeZvaQmS2v/L1zY4cJdA29RW7oLHJDZ5EbOovcWAhhyxuYHS5pnaRbQwhfqWRXSHonhDDFzC6WtHMIYUKnBzPb8sES1qtXL5etXbvWZTfccIPLzjzzTJedeuqpLrvjjjuqHF33EUIotFJEvXqbc2fb2tpcNn/+fJfFuh1z//33u2zUqFHRbYcOHeqygQMHuiy24MKbb75ZaDyS9PHHH7vs/fffLzSeJUuWFD5OLXLrbOzj9NRTT7ns7rvvdtlpp51Wy6FL5cknn4zmgwcPdtlhhx3msgULFtR9TF3wdAjBryzwKal0truKLfTw/e9/v9C+jz76qMuGDx9e65BaqamdreyXbW/HjBnjsqILID300EMuO+mkk1y2Zs2awuOJvSaeOXNmoX1XrlwZzWOLo3Tl9UUzFHl90OkVtRDC45I+vTzisZJuqbx9i6SRXR4d0ED0Frmhs8gNnUVu6CxyU+09an1DCO2Vt/8hqW+dxgM0Er1FbugsckNnkRs6i2TV/HvUQghhS5d/zWyspLG1Hgeopy31ls4iRXQWuaGzyA2vaZGaaq+orTKzfpJU+fuNjjYMIdwYQhhU5GeHgQYr1Fs6i4TQWeSGziI3vKZFsqq9ojZP0umSplT+vqduI0pU0Zsi33vvvULbnXXWWS678847o9tu3Lix0GOiU6Xs7T777BPNx48f77Idd9zRZW+99ZbL2tvbXXbLLbe4bN26ddFj/+EPfyiUNcJ2223nsgsvvNBlp5xySjOGU6umd/boo492Wew5xb/17et/UmrvvfcuvH9HN8NnqpSfZ1upT58+0Ty2cEjs9cLq1atd9vOf/7z2gZVHaTt72WWXRfNLLrnEZbHFBa+//nqXTZw40WVdWTgk5ic/+UnV+15wwQXRPLWFQ6pVZHn+OyQ9JWlfM1thZmdqU5mPMLPlkkZU3geSQW+RGzqL3NBZ5IbOIjedXlELIcTX35ayXscV5UZvkRs6i9zQWeSGziI31d6jBgAAAABoECZqAAAAAJCYmpfnxydNmjTJZQcddJDLhg4d6rIRI0ZEH/PBBx+seVwoh2222cZlU6dOjW4bWxRi7dq1Lhs9erTLFi9e7LKcF5TYc889Wz2EbOy7776FtnvuuecaPJJ8xP4PxhYYkaQXX3zRZbH/l+ie+vfv77I5c+bU9JjXXnutyx555JGaHhPpufTSS10WWzREkjZs2OCyBx54wGUTJkxw2QcffFBoPNtuu200P/LII10W+xptZi6LLYJzzz2lWfsliitqAAAAAJAYJmoAAAAAkBgmagAAAACQGCZqAAAAAJAYFhOps/Xr17vsrLPOctmSJUtcdtNNN0UfM3bTb2yxh+uuu85lsd80j3wdeOCBLostGtKRY4891mWPPfZYTWNC97Ro0aJWD6GuevXq5bKjjjrKZaeeeqrLYjfHd+Syyy5z2erVqwvvj3KLdW7gwIGF93/44Yddds0119Q0JqRnp512ctk555zjso5eA8YWDhk5cmTV4/nSl77kslmzZkW3jS2wF/O73/3OZVdccUXXBlYCXFEDAAAAgMQwUQMAAACAxDBRAwAAAIDEMFEDAAAAgMSwmEgTvPzyyy4bM2aMy2bMmBHd/7TTTiuUffazn3XZrbfe6rL29vbocZC+q6++2mVmFt02tkhI2RYO6dHDf69p48aNLRhJ99O7d++6P+YBBxzgsli/R4wY4bI99tjDZT179nTZKaecEj12rEsffPCByxYuXOiyDz/80GVbbx3/8vr0009Hc3Q/scUbpkyZUnj/J554wmWnn366y957772uDQzJi31u69OnT+H9L7jgApd97nOfc9kZZ5zhsu985zsu+8pXvuKyHXbYIXrs2AInsez22293WWzBvrLjihoAAAAAJIaJGgAAAAAkhokaAAAAACSGiRoAAAAAJKbTiZqZTTezN8zs2c2ySWa20syeqfw5urHDBIqjs8gRvUVu6CxyQ2eRmyKrPs6U9CtJn14+8H+GEKbWfUTdxNy5c122fPny6Laxlf6GDx/usssvv9xle+21l8smT54cPc7KlSujeYZmqgSdPeaYY1zW1tbmsthqSZI0b968uo8pNbEVHmPPxzPPPNOM4dRqphLobWylw9hz+pvf/MZll1xySU3HHjhwoMtiqz5+9NFHLnv//fddtnTpUpdNnz49euzFixe7LLZK6qpVq1y2YsUKl2233XbR47zwwgvRPFMzlUBnc9C/f3+XzZkzp6bHfOWVV1wW6yc+YaZK0NkNGza47M0333TZrrvuGt3/b3/7m8s6ei1RxOuvv+6yNWvWRLft16+fy9566y2X3XvvvVWPp0w6vaIWQnhc0jtNGAtQF3QWOaK3yA2dRW7oLHJTyz1q55nZXyuXkXfuaCMzG2tmi83Mf7sSaC46ixx12ls6i8TQWeSG1wdIUrUTtV9L+qKkNkntkq7qaMMQwo0hhEEhhEFVHguoBzqLHBXqLZ1FQugscsPrAySrqolaCGFVCOHjEMJGSTdJOqS+wwLqi84iR/QWuaGzyA2dRcqKLCbimFm/EEJ75d3jJD27pe1RzLPPxp/GE0880WXf/va3XTZjxgyXnX322S4bMGBA9DhHHHFEZ0PMVo6djS1G0LNnT5e98cYb0f3vvPPOuo+pGbbZZhuXTZo0qfD+8+fPd9mPf/zjWobUMq3o7TnnnOOyV1991WWHHXZY3Y/92muvuez3v/+9y55//nmXLViwoO7jiRk7dqzLYjfsxxZ66A5y/FzbDBMmTHBZbDGkrpgyZUpN+2OTHDu7evVql40cOdJl9913X3T/3r17u+zll1922T333OOymTNnuuydd/xtf7Nnz44eO7aYSEfbosBEzczukDRMUh8zWyHpv0saZmZtkoKkv0vyswGgRegsckRvkRs6i9zQWeSm04laCGFUJP5tA8YC1AWdRY7oLXJDZ5EbOovc1LLqIwAAAACgAZioAQAAAEBiqlpMBM0Vu2n0tttuc9nNN9/ssq239h/iww8/PHqcYcOGuezRRx/tfIBoqQ8//DCat7e3R/OUxBYOmThxosvGjx8f3X/FihUuu+oqv7LyunXrqhgd/uWXv/xlq4eQjOHDhxfabs6cOQ0eCVLV1tbmsiOPPLLqx4st6CBJy5Ytq/oxUT4LFy50WWyho0aIva4cOnRodNvYIjrddfGlIriiBgAAAACJYaIGAAAAAIlhogYAAAAAiWGiBgAAAACJYTGRhAwcODCaf/e733XZwQcf7LLYwiExS5cujeaPP/54of2Rlnnz5rV6CIXEbrCPLRJy0kknuayjm+lPOOGE2gcGNMDcuXNbPQS0yIMPPuiynXfeudC+CxYscNmYMWNqHRLQUNttt53LYouGSFIIwWWzZ8+u+5jKgitqAAAAAJAYJmoAAAAAkBgmagAAAACQGCZqAAAAAJAYFhNpgn333ddl5513nsuOP/746P677bZb1cf++OOPXdbe3h7dtqMbP9EaZlYoGzlyZHT/cePG1X1MRf3oRz9y2U9/+lOX7bjjji6bNWuWy0aPHl2fgQFAg+2yyy4uK/r19frrr3fZunXrah4T0EgPPPBAq4dQWlxRAwAAAIDEMFEDAAAAgMQwUQMAAACAxHQ6UTOzz5vZI2a21MyeM7Nxlby3mT1kZssrfxf7bY5Ag9FZ5IbOIkf0Frmhs8hNkStqH0m6MISwn6TBks41s/0kXSzp4RDCAEkPV94HUkBnkRs6ixzRW+SGziIrna76GEJol9ReeXutmT0vaXdJx0oaVtnsFkmPSprQkFEmKrYa46hRo1wWW+Gxf//+dR/P4sWLXTZ58mSXzZs3r+7HTklZOhtCKJR1tCrotGnTXDZ9+nSXvf322y4bPHiwy0477TSXHXDAAdFj77HHHi577bXXXBZbKSq26lnZlaWz3VVsNdZ99tknuu2CBQsaPZymobfSjBkzXNajR/V3lTz55JO1DAedoLON8c1vfrPVQyitLn02MbP+kg6UtFBS30rhJekfkvrWdWRAHdBZ5IbOIkf0Frmhs8hB4d+jZmY7SJoj6YchhDWbfwcxhBDMzH+rf9N+YyWNrXWgQFfRWeSGziJH1fSWzqKV+FyLXBS6omZmn9GmQs8KIdxdiVeZWb/Kv/eT9EZs3xDCjSGEQSGEQfUYMFAEnUVu6CxyVG1v6Sxahc+1yEmRVR9N0m8lPR9CuHqzf5on6fTK26dLuqf+wwO6js4iN3QWOaK3yA2dRW6K/Ojjf5V0mqT/MLNnKtklkqZIusvMzpT0qqQTGzPE5urbN/5jyfvtt5/LfvWrX7nsy1/+ct3HtHDhQpddeeWVLrvnHv95ZePGjXUfTwa6VWe32mqraH7OOee47IQTTnDZmjVrXDZgwICaxhS7If6RRx5x2aWXXlrTcUqkW3W2bGKL/NSyoERGuk1v29raovmIESNcFvu6u2HDBpddd911Llu1alUVo0MXdJvONtMXvvCFVg+htIqs+viEJL+k1SbD6zscoHZ0Frmhs8gRvUVu6Cxy0y2+5QcAAAAAOWGiBgAAAACJYaIGAAAAAIkp/HvUcte7d2+X3XDDDS7r6Ibhet8oGVts4aqrropu+8ADD7jsgw8+qOt4kJ6nnnrKZYsWLXLZwQcfXPgxd9ttN5d1tIDOp7399tsumz17dnTbcePGFR4TUEZf+9rXovnMmTObOxDUxU477RTNY59TY1auXOmyiy66qKYxAan44x//6LKOFlTqpovcVY0ragAAAACQGCZqAAAAAJAYJmoAAAAAkBgmagAAAACQmOwXEzn00ENdNn78eJcdcsghLtt9993rPp7333/fZdOmTXPZ5Zdf7rL169fXfTzI14oVK1x2/PHHu+zss8+O7j9x4sSqj33NNde47Ne//rXLXnrppaqPAZSFWUe/PxcAyu/ZZ5912fLly6Pbxhbn++IXv+iyN998s/aBlQBX1AAAAAAgMUzUAAAAACAxTNQAAAAAIDFM1AAAAAAgMdkvJnLccccVyopaunRpNL/vvvtc9tFHH7nsqquuctnq1aurHg+wufb2dpdNmjQpum1HOYDq3X///S773ve+14KRoJleeOGFaP7kk0+6bMiQIY0eDpC82KJ5knTzzTe7bPLkyS47//zzXdbRa/Qy44oaAAAAACSGiRoAAAAAJIaJGgAAAAAkptOJmpl93sweMbOlZvacmY2r5JPMbKWZPVP5c3Tjhwt0js4iN3QWuaGzyBG9RW4shLDlDcz6SeoXQlhiZv9F0tOSRko6UdK6EMLUwgcz2/LBgE6EEKyzbegsUkJnkaGnQwiDtrQBnUViOu2sRG+bqVevXtH8rrvuctmIESNcdvfdd7vsjDPOcNn69eurGF0airw+6HTVxxBCu6T2yttrzex5SbvXPjygMegsckNnkRs6ixzRW+SmS/eomVl/SQdKWliJzjOzv5rZdDPbuc5jA2pGZ5EbOovc0FnkiN4iB4Unama2g6Q5kn4YQlgj6deSviipTZu+O+F/gdim/caa2WIzW1yH8QKF0Vnkhs4iN3QWOaK3yEWhiZqZfUabCj0rhHC3JIUQVoUQPg4hbJR0k6RDYvuGEG4MIQwq8rPDQL3QWeSGziI3dBY5orfISZFVH03SbyU9H0K4erO832abHSfp2foPD+g6Oovc0Fnkhs4iR/QWuSmy6uMQSX+U9B+SNlbiSySN0qZLxEHS3yWdXblJc0uPxQo5qEnBFfToLJJBZ5GhIqs+0lmkpOiqj/S2xWKrQU6ePNllP/jBD1w2cOBAly1durQ+A2uBeq36+ISk2AP972oGBTQanUVu6CxyQ2eRI3qL3HRp1UcAAAAAQOMxUQMAAACAxDBRAwAAAIDEdLqYSF0Pxo2XqFGRGy/ric6iVnQWGSq0MEO90FnUQVM7K9Fb1K7I6wOuqAEAAABAYpioAQAAAEBimKgBAAAAQGKYqAEAAABAYjr9hdd19pakVytv96m8XwZlOhcp3fPZqwXHpLN5SPV86Gz9lOlcpLTPp9m9LWtnpXKdT8rn0srPtSk/L9Uo0/mkfC6FOtvUVR8/cWCzxc1eoadRynQuUvnOp17K9LyU6Vyk8p1PvZTpeSnTuUjlO596KdvzUqbzKdO51FPZnpcynU8ZzoUffQQAAACAxDBRAwAAAIDEtHKidmMLj11vZToXqXznUy9lel7KdC5S+c6nXsr0vJTpXKTynU+9lO15KdP5lOlc6qlsz0uZzif7c2nZPWoAAAAAgDh+9BEAAAAAEtP0iZqZHWVmy8zsJTO7uNnHr5WZTTezN8zs2c2y3mb2kJktr/y9cyvHWJSZfd7MHjGzpWb2nJmNq+RZnk+j0Nl00Nli6Gw66GxxOfe2TJ2V6G1ROXdWKldvy9rZpk7UzGwrSddJ+m+S9pM0ysz2a+YY6mCmpKM+lV0s6eEQwgBJD1fez8FHki4MIewnabCkcysfj1zPp+7obHLobCfobHLobAEl6O1MlaezEr3tVAk6K5Wrt6XsbLOvqB0i6aUQwishhA2SZks6tsljqEkI4XFJ73wqPlbSLZW3b5E0sqmDqlIIoT2EsKTy9lpJz0vaXZmeT4PQ2YTQ2ULobELobGFZ97ZMnZXobUFZd1YqV2/L2tlmT9R2l/Sfm72/opLlrm8Iob3y9j8k9W3lYKphZv0lHShpoUpwPnVEZxNFZztEZxNFZ7eojL0txceY3naojJ2VSvAxLlNnWUykzsKmZTSzWkrTzHaQNEfSD0MIazb/txzPB12T48eYznZvOX6M6Wz3luvHmN52bzl+jMvW2WZP1FZK+vxm7+9RyXK3ysz6SVLl7zdaPJ7CzOwz2lToWSGEuytxtufTAHQ2MXS2U3Q2MXS2kDL2NuuPMb3tVBk7K2X8MS5jZ5s9UVskaYCZ7W1mPSWdLGlek8fQCPMknV55+3RJ97RwLIWZmUn6raTnQwhXb/ZPWZ5Pg9DZhNDZQuhsQuhsYWXsbbYfY3pbSBk7K2X6MS5tZ0MITf0j6WhJL0p6WdJPmn38Ooz/Dkntkv6fNv088pmSdtGmlWSWS/q/knq3epwFz2WINl0C/qukZyp/js71fBr4PNHZRP7Q2cLPE51N5A+d7dJzlW1vy9TZyvnQ22LPU7adrYy/NL0ta2etcnIAAAAAgESwmAgAAAAAJIaJGgAAAAAkhokaAAAAACSGiRoAAAAAJIaJGgAAAAAkhokaAAAAACSGiRoAAAAAJIaJGgAAAAAk5v8DVMTDbjI6QLUAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Browse examples\n", + "fig, axes = plt.subplots(1, 5)\n", + "\n", + "for index in range(5):\n", + " show_image(axes[index], get_sample(index))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[ 0.7010, 0.7096, 0.2029, -0.8527, -0.1471, 0.1670, -0.0375, 1.2047,\n", + " -1.9497, -0.1735, 2.7477, 0.9634, -1.8239, -1.0749, 0.8230, 0.0965]])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " model.encode(get_sample(0)[None])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Browse examples\n", + "fig, axes = plt.subplots(2, 5)\n", + "\n", + "for index in range(5):\n", + " sample = get_sample(index)\n", + " decoded = model(sample[None])['decoded'][0].detach()\n", + " show_image(axes[0, index], sample)\n", + " show_image(axes[1, index], decoded)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "samples = torch.randn(5, model.representation_length)\n", + "\n", + "fig, axes = plt.subplots(1, 5)\n", + "\n", + "for index in range(5):\n", + " decoded = model.decoder(samples[index][None])[0].detach()\n", + " show_image(axes[index], decoded)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[ 0.8606, 0.9047, 0.1575, -0.7448, -0.3117, 0.0745, -0.3145, 1.4116,\n", + " -1.5365, -0.6043, 2.6963, 0.4136, -1.0794, -0.8664, 0.7766, -0.4429]])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " model.encode(get_sample(0)[None])" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "idx1 = 1\n", + "idx2 = 5\n", + "\n", + "N = 10\n", + "\n", + "encoding1 = model.encode(get_sample(idx1)[None])\n", + "encoding2 = model.encode(get_sample(idx2)[None])\n", + "\n", + "fig, axes = plt.subplots(1, N)\n", + "\n", + "for i in range(10):\n", + " beta = float(i) / float(N - 1)\n", + " alpha = 1.0 - beta\n", + " \n", + " combined = model.decoder(encoding1 * alpha + encoding2 * beta)[0]\n", + " \n", + " show_image(axes[i], combined)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From eebb84cdb34b13952796f3a2f6856ac0f32773b9 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Tue, 9 Apr 2019 08:30:05 -0700 Subject: [PATCH 022/162] Small README updates. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 597aa4e8..f81728f4 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Vel 0.3 +# Vel 0.4 [![Build Status](https://travis-ci.org/MillionIntegrals/vel.svg?branch=master)](https://travis-ci.org/MillionIntegrals/vel) [![PyPI version](https://badge.fury.io/py/vel.svg)](https://badge.fury.io/py/vel) @@ -121,7 +121,7 @@ that are ready to run and easy to modify for other similar usecases: # Implemented models - Unsupervised learning -- Autoencoders and Variational autoencoders with an examples on MNIST dataset. +- Autoencoders and Variational autoencoders with examples on MNIST dataset. # Examples From 9c26921ff261319c453b2affca48bf444705be11 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Wed, 10 Apr 2019 07:59:30 -0700 Subject: [PATCH 023/162] Small autoencoder changes. --- .../autoencoders/mnist/mnist_cnn_autoencoder.yaml | 2 +- examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml | 2 +- vel/models/autoencoder/mnist_cnn_autoencoder.py | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml b/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml index 00501f4c..ea1782b4 100644 --- a/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml +++ b/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml @@ -13,8 +13,8 @@ model: source: name: vel.sources.vision.mnist batch_size: 128 - normalize: False num_workers: 4 + normalize: False unsupervised: true diff --git a/examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml b/examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml index 14224ef0..74c499c9 100644 --- a/examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml +++ b/examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml @@ -20,7 +20,7 @@ source: optimizer: name: vel.optimizers.adam - lr: 1.0e-4 + lr: 1.0e-3 commands: diff --git a/vel/models/autoencoder/mnist_cnn_autoencoder.py b/vel/models/autoencoder/mnist_cnn_autoencoder.py index 1ad5536f..fa90e4d5 100644 --- a/vel/models/autoencoder/mnist_cnn_autoencoder.py +++ b/vel/models/autoencoder/mnist_cnn_autoencoder.py @@ -56,6 +56,7 @@ def __init__(self, img_rows, img_cols, img_channels, channels=None, representati ), nn.ReLU(True), nn.ConvTranspose2d(in_channels=channels[0], out_channels=img_channels, kernel_size=3, padding=1), + nn.Sigmoid() ) @staticmethod @@ -85,7 +86,8 @@ def decode(self, sample): def loss_value(self, x_data, y_true, y_pred): """ Calculate a value of loss function """ - return F.mse_loss(y_pred, y_true) + # return F.mse_loss(y_pred, y_true) + return F.binary_cross_entropy(y_pred, y_true) def metrics(self): """ Set of metrics for this model """ From 053bc3c1a7137d0407ee107fcfbc834a07038e55 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 14 Apr 2019 21:42:33 -0700 Subject: [PATCH 024/162] Download data. --- .../cats_vs_dogs_resnet34.yaml | 3 ++- vel/sources/img_dir_source.py | 25 ++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml b/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml index c10bcd23..41860c8b 100644 --- a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml +++ b/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml @@ -10,7 +10,8 @@ model: source: name: vel.sources.img_dir_source - # Dataset downloaded from http://files.fast.ai/data/dogscats.zip + url: http://files.fast.ai/data/dogscats.zip + extract_parent: true path: data/dogscats num_workers: 8 batch_size: 64 diff --git a/vel/sources/img_dir_source.py b/vel/sources/img_dir_source.py index 1e0c138e..bbca4ad6 100644 --- a/vel/sources/img_dir_source.py +++ b/vel/sources/img_dir_source.py @@ -1,15 +1,19 @@ import os.path +import zipfile import torchvision.datasets as ds +import torchvision.datasets.utils as ds_util from vel.api import SupervisedTrainingData class ImageDirSource(ds.ImageFolder): + """ Source where images are grouped by class in folders """ pass -def create(model_config, path, num_workers, batch_size, augmentations=None, tta=None): +def create(model_config, path, num_workers, batch_size, augmentations=None, tta=None, url=None, + extract_parent=False): """ Create an ImageDirSource with supplied arguments """ if not os.path.isabs(path): path = model_config.project_top_dir(path) @@ -17,6 +21,25 @@ def create(model_config, path, num_workers, batch_size, augmentations=None, tta= train_path = os.path.join(path, 'train') valid_path = os.path.join(path, 'valid') + if not os.path.exists(train_path) or not os.path.exists(valid_path): + filename = url.rpartition('/')[2] + ds_util.download_url(url, root=path, filename=filename) + + full_archive_path = os.path.join(path, filename) + + # Unpack zip archive + if full_archive_path.endswith(".zip"): + zip_ref = zipfile.ZipFile(full_archive_path, 'r') + + if extract_parent: + zip_ref.extractall(os.path.dirname(path)) + else: + zip_ref.extractall(path) + + zip_ref.close() + + os.remove(full_archive_path) + train_ds = ImageDirSource(train_path) val_ds = ImageDirSource(valid_path) From 457724cebb2515ed495e60d935b4015885e90645 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 18 Apr 2019 10:35:16 -0700 Subject: [PATCH 025/162] Additional parameter to the IMDB data set. --- vel/sources/nlp/imdb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vel/sources/nlp/imdb.py b/vel/sources/nlp/imdb.py index c6ac6fe9..92eb2384 100644 --- a/vel/sources/nlp/imdb.py +++ b/vel/sources/nlp/imdb.py @@ -45,9 +45,9 @@ def __init__(self, path, text_field, label_field, **kwargs): data.Dataset.__init__(self, examples, fields, **kwargs) -def create(model_config, batch_size, vectors=None): +def create(model_config, batch_size, data_dir='imdb', vectors=None): """ Create an IMDB dataset """ - path = model_config.data_dir('imdb') + path = model_config.data_dir(data_dir) text_field = data.Field(lower=True, tokenize='spacy', batch_first=True) label_field = data.LabelField(is_target=True) From d9619db3ebb6eb85f4f5fb6e17516ea7949ed59b Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 18 Apr 2019 10:35:37 -0700 Subject: [PATCH 026/162] New "script" model config. --- vel/notebook/__init__.py | 2 +- vel/notebook/loader.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/vel/notebook/__init__.py b/vel/notebook/__init__.py index 3b31f630..68058abb 100644 --- a/vel/notebook/__init__.py +++ b/vel/notebook/__init__.py @@ -1,2 +1,2 @@ -from .loader import load_config +from .loader import load_config, script from .defaults import reasonable_notbook_defaults diff --git a/vel/notebook/loader.py b/vel/notebook/loader.py index 55bd13fc..d28c048d 100644 --- a/vel/notebook/loader.py +++ b/vel/notebook/loader.py @@ -8,3 +8,12 @@ def load_config(config_path, run_number=0, device='cuda:0'): run_number=run_number, device=device ) + + +def script(model_name: str = 'script', run_number=0, device='cuda:0'): + """ Create an ad-hoc script model config """ + return ModelConfig.script( + model_name=model_name, + run_number=run_number, + device=device + ) From 77347483d828ebba91edb996a6e16dc3dce8b311 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 4 May 2019 07:45:26 -0700 Subject: [PATCH 027/162] Small formatting change. --- .../reinforcers/buffered_off_policy_iteration_reinforcer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vel/rl/reinforcers/buffered_off_policy_iteration_reinforcer.py b/vel/rl/reinforcers/buffered_off_policy_iteration_reinforcer.py index f8fc81e9..d9f873b3 100644 --- a/vel/rl/reinforcers/buffered_off_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcers/buffered_off_policy_iteration_reinforcer.py @@ -111,7 +111,8 @@ def roll_out_and_store(self, batch_info): self.model.train() if self.env_roller.is_ready_for_sampling(): - rollout = self.env_roller.rollout(batch_info, self.model, self.settings.rollout_steps).to_device(self.device) + rollout = self.env_roller.rollout(batch_info, self.model, self.settings.rollout_steps) + rollout = rollout.to_device(self.device) # Store some information about the rollout, no training phase batch_info['frames'] = rollout.frames() @@ -122,7 +123,8 @@ def roll_out_and_store(self, batch_info): with tqdm.tqdm(desc="Populating memory", total=self.env_roller.initial_memory_size_hint()) as pbar: while not self.env_roller.is_ready_for_sampling(): - rollout = self.env_roller.rollout(batch_info, self.model, self.settings.rollout_steps).to_device(self.device) + rollout = self.env_roller.rollout(batch_info, self.model, self.settings.rollout_steps) + rollout = rollout.to_device(self.device) new_frames = rollout.frames() frames += new_frames From 01762d7eee833a39d16526fc9ade7f1335677c00 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 4 May 2019 07:45:41 -0700 Subject: [PATCH 028/162] Change initial memory size hint for parallel envs. --- vel/rl/buffers/circular_replay_buffer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vel/rl/buffers/circular_replay_buffer.py b/vel/rl/buffers/circular_replay_buffer.py index 6595e0c5..b5eed39a 100644 --- a/vel/rl/buffers/circular_replay_buffer.py +++ b/vel/rl/buffers/circular_replay_buffer.py @@ -35,7 +35,7 @@ def is_ready_for_sampling(self) -> bool: def initial_memory_size_hint(self) -> typing.Optional[int]: """ Hint how much data is needed to begin sampling, required only for diagnostics """ - return self.buffer_initial_size + return self.buffer_initial_size * self.backend.num_envs def _get_transitions(self, indexes): """ Return batch with given indexes """ From ba975251ecc8f77abf83415b74798ea9faf705a1 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 4 May 2019 07:45:54 -0700 Subject: [PATCH 029/162] Small profiling utility. --- vel/util/profiling.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 vel/util/profiling.py diff --git a/vel/util/profiling.py b/vel/util/profiling.py new file mode 100644 index 00000000..0c21a5fb --- /dev/null +++ b/vel/util/profiling.py @@ -0,0 +1,15 @@ +import contextlib +import time + + +@contextlib.contextmanager +def timing_context(label=None): + """ Measure time of expression as a context """ + start = time.time() + yield + end = time.time() + + if label is None: + print("Context took {:.2f}s".format(end - start)) + else: + print("{} took {:.2f}s".format(label, end - start)) From ff6f81431403ac49549b3d73a3ec0313be4fba97 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 4 May 2019 07:46:17 -0700 Subject: [PATCH 030/162] Machine translation datasets. --- vel/sources/nlp/imdb.py | 4 +- vel/sources/nlp/multi30k.py | 88 +++++++++++++++++++++++++++++++++++++ vel/sources/nlp/wmt14.py | 88 +++++++++++++++++++++++++++++++++++++ 3 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 vel/sources/nlp/multi30k.py create mode 100644 vel/sources/nlp/wmt14.py diff --git a/vel/sources/nlp/imdb.py b/vel/sources/nlp/imdb.py index 92eb2384..ff351b0a 100644 --- a/vel/sources/nlp/imdb.py +++ b/vel/sources/nlp/imdb.py @@ -3,14 +3,14 @@ import io import pickle -import torchtext.datasets.imdb as imdb import torchtext.data as data +import torchtext.datasets as ds from vel.api import SupervisedTextData -class IMDBCached(imdb.IMDB): +class IMDBCached(ds.IMDB): """ Cached version of the IMDB dataset (to save time on tokenization) """ def __init__(self, path, text_field, label_field, **kwargs): diff --git a/vel/sources/nlp/multi30k.py b/vel/sources/nlp/multi30k.py new file mode 100644 index 00000000..c52f5ae8 --- /dev/null +++ b/vel/sources/nlp/multi30k.py @@ -0,0 +1,88 @@ +import io +import os.path +import pickle +import re +import spacy + +import torchtext.data as data +import torchtext.datasets as ds + +from vel.api import SupervisedTextData + + +class Multi30kCached(ds.Multi30k): + """ Cached version of the Multi30K dataset, to save time on tokenization every time """ + + def __init__(self, path, exts, fields, **kwargs): + # Each one is a + if os.path.isdir(path): + cache_file = os.path.join(path, '_cache.pk') + else: + cache_file = path + '_cache.pk' + + if not isinstance(fields[0], (tuple, list)): + fields = [('src', fields[0]), ('trg', fields[1])] + + if os.path.exists(cache_file): + with open(cache_file, 'rb') as fp: + examples = pickle.load(fp) + else: + src_path, trg_path = tuple(os.path.expanduser(path + x) for x in exts) + + examples = [] + + with io.open(src_path, mode='r', encoding='utf-8') as src_file, \ + io.open(trg_path, mode='r', encoding='utf-8') as trg_file: + for src_line, trg_line in zip(src_file, trg_file): + src_line, trg_line = src_line.strip(), trg_line.strip() + if src_line != '' and trg_line != '': + examples.append(data.Example.fromlist( + [src_line, trg_line], fields)) + + with open(cache_file, 'wb') as fp: + pickle.dump(examples, file=fp) + + data.Dataset.__init__(self, examples, fields, **kwargs) + + +def create(model_config, batch_size, data_dir='wmt14'): + """ Create an Multi30k dataset. English-German """ + path = model_config.data_dir(data_dir) + + spacy_de = spacy.load('de') + spacy_en = spacy.load('en') + + url = re.compile('(.*)') + + def tokenize_de(text): + return [tok.text for tok in spacy_de.tokenizer(url.sub('@URL@', text))] + + def tokenize_en(text): + return [tok.text for tok in spacy_en.tokenizer(url.sub('@URL@', text))] + + en_field = data.Field( + lower=True, tokenize=tokenize_en, batch_first=True, init_token='', eos_token='' + ) + + de_field = data.Field( + lower=True, tokenize=tokenize_de, batch_first=True, init_token='', eos_token='' + ) + + train_source, val_source, test_source = Multi30kCached.splits( + root=path, + exts=('.en', '.de'), + fields=(en_field, de_field) + ) + + en_field.build_vocab(train_source.src, min_freq=2) + de_field.build_vocab(train_source.tgt, max_size=17_000) + + train_iter, val_iter, test_iter = data.BucketIterator.splits( + (train_source, val_source, test_source), + batch_size=batch_size, + repeat=False + ) + + return SupervisedTextData( + train_source, val_source, train_iter, val_iter, en_field, de_field + ) diff --git a/vel/sources/nlp/wmt14.py b/vel/sources/nlp/wmt14.py new file mode 100644 index 00000000..1d1f87f8 --- /dev/null +++ b/vel/sources/nlp/wmt14.py @@ -0,0 +1,88 @@ +import io +import os.path +import pickle +import re +import spacy + +import torchtext.data as data +import torchtext.datasets as ds + +from vel.api import SupervisedTextData + + +class WMT14Cached(ds.WMT14): + """ Cached version of the WMT14 dataset, to save time on tokenization every time """ + + def __init__(self, path, exts, fields, **kwargs): + # Each one is a + if os.path.isdir(path): + cache_file = os.path.join(path, '_cache.pk') + else: + cache_file = path + '_cache.pk' + + if not isinstance(fields[0], (tuple, list)): + fields = [('src', fields[0]), ('trg', fields[1])] + + if os.path.exists(cache_file): + with open(cache_file, 'rb') as fp: + examples = pickle.load(fp) + else: + src_path, trg_path = tuple(os.path.expanduser(path + x) for x in exts) + + examples = [] + + with io.open(src_path, mode='r', encoding='utf-8') as src_file, \ + io.open(trg_path, mode='r', encoding='utf-8') as trg_file: + for src_line, trg_line in zip(src_file, trg_file): + src_line, trg_line = src_line.strip(), trg_line.strip() + if src_line != '' and trg_line != '': + examples.append(data.Example.fromlist( + [src_line, trg_line], fields)) + + with open(cache_file, 'wb') as fp: + pickle.dump(examples, file=fp) + + data.Dataset.__init__(self, examples, fields, **kwargs) + + +def create(model_config, batch_size, data_dir='wmt14'): + """ Create an WMT14 dataset. English-German """ + path = model_config.data_dir(data_dir) + + spacy_de = spacy.load('de') + spacy_en = spacy.load('en') + + url = re.compile('(.*)') + + def tokenize_de(text): + return [tok.text for tok in spacy_de.tokenizer(url.sub('@URL@', text))] + + def tokenize_en(text): + return [tok.text for tok in spacy_en.tokenizer(url.sub('@URL@', text))] + + en_field = data.Field( + lower=True, tokenize=tokenize_en, batch_first=True, init_token='', eos_token='' + ) + + de_field = data.Field( + lower=True, tokenize=tokenize_de, batch_first=True, init_token='', eos_token='' + ) + + train_source, val_source, test_source = WMT14Cached.splits( + root=path, + exts=('.en', '.de'), + fields=(en_field, de_field) + ) + + en_field.build_vocab(train_source.src, min_freq=2) + de_field.build_vocab(train_source.tgt, max_size=17_000) + + train_iter, val_iter, test_iter = data.BucketIterator.splits( + (train_source, val_source, test_source), + batch_size=batch_size, + repeat=False + ) + + return SupervisedTextData( + train_source, val_source, train_iter, val_iter, en_field, de_field + ) From dee86ab12a60240769f0fa585eb534023514eb0e Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 16 May 2019 09:11:23 -0700 Subject: [PATCH 031/162] Bumped up version and implemented better tracking of requirements. --- Makefile | 5 ++- requirements.in | 17 +++++++++ requirements.txt | 89 +++++++++++++++++++++--------------------------- setup.py | 6 ++-- 4 files changed, 63 insertions(+), 54 deletions(-) create mode 100644 requirements.in diff --git a/Makefile b/Makefile index e26492f2..f3f23e63 100644 --- a/Makefile +++ b/Makefile @@ -30,4 +30,7 @@ serve-visdom: python -m visdom.server test: - pytest . \ No newline at end of file + pytest . + +requirements.txt: + pip-compile requirements.in \ No newline at end of file diff --git a/requirements.in b/requirements.in new file mode 100644 index 00000000..a1fd3edb --- /dev/null +++ b/requirements.in @@ -0,0 +1,17 @@ +attrs +cloudpickle +matplotlib +numpy +opencv-python +pandas +pyyaml +scikit-learn +torch~=1.1 +torchtext +torchvision +tqdm +visdom +pymongo +dnspython +gym[atari,box2d,classic_control] +pytest diff --git a/requirements.txt b/requirements.txt index d7ae8969..5ab31544 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,61 +1,50 @@ -atari-py==0.1.7 -atomicwrites==1.3.0 +# +# This file is autogenerated by pip-compile +# To update, run: +# +# pip-compile requirements.in +# +atari-py==0.1.7 # via gym +atomicwrites==1.3.0 # via pytest attrs==19.1.0 -bleach==3.1.0 -box2d-py==2.3.8 -certifi==2019.3.9 -chardet==3.0.4 +box2d-py==2.3.8 # via gym +certifi==2019.3.9 # via requests +chardet==3.0.4 # via requests cloudpickle==0.8.0 -cymem==2.0.2 -cytoolz==0.9.0.1 -dill==0.2.9 +cycler==0.10.0 # via matplotlib dnspython==1.16.0 -docutils==0.14 -future==0.17.1 -gym==0.12.0 -idna==2.8 -more-itertools==6.0.0 -msgpack==0.5.6 -msgpack-numpy==0.4.3.2 -murmurhash==1.0.2 +future==0.17.1 # via pyglet +gym[atari,box2d,classic_control]==0.12.1 +idna==2.8 # via requests +kiwisolver==1.1.0 # via matplotlib +matplotlib==3.0.3 +more-itertools==7.0.0 # via pytest numpy==1.16.2 opencv-python==4.0.0.21 pandas==0.24.1 -Pillow==5.4.1 -pkginfo==1.5.0.1 -plac==0.9.6 -pluggy==0.9.0 -preshed==2.0.1 -py==1.8.0 -pyglet==1.3.2 -Pygments==2.3.1 -pymongo==3.7.2 -PyOpenGL==3.1.0 -pytest==4.3.1 -python-dateutil==2.8.0 -pytz==2018.9 -PyYAML==5.1 -pyzmq==18.0.1 -readme-renderer==24.0 -regex==2018.1.10 -requests==2.21.0 -requests-toolbelt==0.9.1 +pillow==5.4.1 # via gym, torchvision, visdom +pluggy==0.11.0 # via pytest +py==1.8.0 # via pytest +pyglet==1.3.2 # via gym +pymongo==3.8.0 +pyopengl==3.1.0 # via gym +pyparsing==2.4.0 # via matplotlib +pytest==4.5.0 +python-dateutil==2.8.0 # via matplotlib, pandas +pytz==2018.9 # via pandas +pyyaml==5.1 +pyzmq==18.0.1 # via visdom +requests==2.21.0 # via gym, torchtext, visdom scikit-learn==0.20.3 -scipy==1.2.1 -six==1.12.0 -spacy==2.0.18 -thinc==6.12.1 -toolz==0.9.0 -torch==1.0.1.post2 -torchfile==0.1.0 +scipy==1.2.1 # via gym, scikit-learn, visdom +six==1.12.0 # via atari-py, cycler, gym, pytest, python-dateutil, torchvision, visdom, websocket-client +torch==1.1.0 +torchfile==0.1.0 # via visdom torchtext==0.3.1 torchvision==0.2.2.post3 -tornado==6.0.1 +tornado==6.0.2 # via visdom tqdm==4.31.1 -twine==1.13.0 -ujson==1.35 -urllib3==1.24.1 +urllib3==1.24.1 # via requests visdom==0.1.8.8 -webencodings==0.5.1 -websocket-client==0.55.0 -wrapt==1.10.11 +wcwidth==0.1.7 # via pytest +websocket-client==0.56.0 # via visdom diff --git a/setup.py b/setup.py index b336050f..f134a862 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( name='vel', - version='0.3.0', + version='0.4.0', description="Velocity in deep-learning research", long_description=long_description, url='https://github.com/MillionIntegrals/vel', @@ -34,7 +34,7 @@ 'pandas', 'pyyaml', 'scikit-learn', - 'torch ~= 1.0', + 'torch ~= 1.1', 'torchtext', 'torchvision', 'tqdm' @@ -44,7 +44,7 @@ 'mongo': ['pymongo', 'dnspython'], 'gym': ['gym[atari,box2d,classic_control]'], 'mujoco': ['gym[mujoco,robotics]'], - 'dev': ['pytest', 'ipython', 'jupyter'], + 'dev': ['pytest', 'ipython', 'jupyter', 'pip-tools'], 'text': ['spacy'], 'all': ['visdom', 'pymongo', 'dnspython', 'gym[all]', 'pytest', 'spacy', 'ipython', 'jupyter'] }, From 6f4f74801d2c8c11336974bf8bec80e728c845c9 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 16 May 2019 09:33:46 -0700 Subject: [PATCH 032/162] Fix makefile. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f3f23e63..54ce78e9 100644 --- a/Makefile +++ b/Makefile @@ -33,4 +33,4 @@ test: pytest . requirements.txt: - pip-compile requirements.in \ No newline at end of file + pip-compile requirements.in From 9f314091b71c50b4337f3f8542b68b34758a74a4 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 16 May 2019 09:33:54 -0700 Subject: [PATCH 033/162] Upgrade cloudpickle. --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index cdec04d4..97b7112f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ attrs==19.1.0 box2d-py==2.3.8 # via gym certifi==2019.3.9 # via requests chardet==3.0.4 # via requests -cloudpickle==0.8.0 +cloudpickle==0.8.1 cycler==0.10.0 # via matplotlib dnspython==1.16.0 future==0.17.1 # via pyglet From fe2d26b278f3af140af8c63f04e852afc759f0b0 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 8 Jun 2019 09:38:37 -0700 Subject: [PATCH 034/162] Fixing a name bug in stochastic_policy_rnn_model.py(#50) --- vel/rl/models/stochastic_policy_rnn_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vel/rl/models/stochastic_policy_rnn_model.py b/vel/rl/models/stochastic_policy_rnn_model.py index d69e3143..78afc436 100644 --- a/vel/rl/models/stochastic_policy_rnn_model.py +++ b/vel/rl/models/stochastic_policy_rnn_model.py @@ -101,7 +101,7 @@ def step(self, observations, state, argmax_sampling=False): return { 'actions': actions, 'values': value_output, - 'logprobs': logprobs, + 'action:logprobs': logprobs, 'state': new_state } From 38e123146d10d4137107888d9087299fe140584c Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Sun, 9 Jun 2019 20:00:11 -0700 Subject: [PATCH 035/162] Redid formatting of image_ops. --- vel/api/data/image_ops.py | 43 +++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/vel/api/data/image_ops.py b/vel/api/data/image_ops.py index 55c86d5e..6bf45d78 100644 --- a/vel/api/data/image_ops.py +++ b/vel/api/data/image_ops.py @@ -4,32 +4,28 @@ def crop_square(im, r, c, sz): - ''' - crop image into a square of size sz, - ''' - return im[r:r+sz, c:c+sz] + """ Crop image into a square of size sz. """ + return im[r:r + sz, c:c + sz] def crop(im, r, c, sz_h, sz_w): - ''' - crop image into a square of size sz, - ''' - return im[r:r+sz_h, c:c+sz_w] + """ Crop image into a of size sz_w x sz_h. """ + return im[r:r + sz_h, c:c + sz_w] def center_crop(im, min_sz=None): """ Returns a center crop of an image""" # return F.center_crop(im, min_sz) - r,c,*_ = im.shape - if min_sz is None: min_sz = min(r,c) - start_r = math.ceil((r-min_sz)/2) - start_c = math.ceil((c-min_sz)/2) + r, c, *_ = im.shape + if min_sz is None: min_sz = min(r, c) + start_r = math.ceil((r - min_sz) / 2) + start_c = math.ceil((c - min_sz) / 2) return crop_square(im, start_r, start_c, min_sz) def scale_to(x, ratio, targ): - '''Calculate dimension of an image during scaling with aspect ratio''' - return max(math.floor(x*ratio), targ) + """ Calculate dimension of an image during scaling with aspect ratio """ + return max(math.floor(x * ratio), targ) def scale_min(im, targ, interpolation=cv2.INTER_AREA): @@ -39,9 +35,9 @@ def scale_min(im, targ, interpolation=cv2.INTER_AREA): im (array): image targ (int): target size """ - r,c,*_ = im.shape + r, c, *_ = im.shape - ratio = targ/min(r,c) + ratio = targ / min(r, c) sz = (scale_to(c, ratio, targ), scale_to(r, ratio, targ)) @@ -59,9 +55,12 @@ def rotate_img(im, deg, mode=cv2.BORDER_CONSTANT, interpolation=cv2.INTER_AREA): Arguments: deg (float): degree to rotate. """ - r,c,*_ = im.shape - M = cv2.getRotationMatrix2D((c//2,r//2),deg,1) - return cv2.warpAffine(im,M,(c,r), borderMode=mode, flags=cv2.WARP_FILL_OUTLIERS+interpolation) + r, c, *_ = im.shape + M = cv2.getRotationMatrix2D((c // 2, r // 2), deg, 1) + return cv2.warpAffine( + im, M, (c, r), borderMode=mode, + flags=cv2.WARP_FILL_OUTLIERS + interpolation + ) def pad(img, pad, mode=cv2.BORDER_REFLECT): @@ -78,7 +77,7 @@ def mode_to_cv2(mode='constant'): def lighting(im, b, c): - ''' adjusts image's balance and contrast''' - if b==0 and c==1: return im + """ Adjusts image's balance and contrast. """ + if b == 0 and c == 1: return im mu = np.average(im) - return np.clip((im-mu)*c+mu+b,0.,1.).astype(np.float32) + return np.clip((im - mu) * c + mu + b, 0., 1.).astype(np.float32) From e409b4845577b66efa50577f05a180b0e1c9e80b Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 13 Jun 2019 10:52:54 -0700 Subject: [PATCH 036/162] Rename comment. --- vel/models/autoencoder/mnist_cnn_vae.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vel/models/autoencoder/mnist_cnn_vae.py b/vel/models/autoencoder/mnist_cnn_vae.py index 0cdfadde..6765c874 100644 --- a/vel/models/autoencoder/mnist_cnn_vae.py +++ b/vel/models/autoencoder/mnist_cnn_vae.py @@ -109,7 +109,7 @@ def forward(self, sample): } def calculate_gradient(self, x_data, y_true): - """ Calculate a value of loss function """ + """ Calculate a gradient of loss function """ output = self(x_data) y_pred = output['decoded'] From 2238dce6ea72ad1bab4a4d596f53d78276c8cdc9 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 13 Jun 2019 11:28:33 -0700 Subject: [PATCH 037/162] New dependencies. --- Makefile | 7 +++++++ requirements.txt | 47 +++++++++++++++++++++++++---------------------- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index 54ce78e9..fd79b924 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,7 @@ +.PHONY: default test requpgrade + +default: test; + tag := $(shell git symbolic-ref -q --short HEAD) docker-build: @@ -34,3 +38,6 @@ test: requirements.txt: pip-compile requirements.in + +requpgrade: + pip-compile --upgrade diff --git a/requirements.txt b/requirements.txt index 97b7112f..e09fcbe8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,49 +2,52 @@ # This file is autogenerated by pip-compile # To update, run: # -# pip-compile requirements.in +# pip-compile # -atari-py==0.1.7 # via gym +atari-py==0.1.15 # via gym atomicwrites==1.3.0 # via pytest attrs==19.1.0 box2d-py==2.3.8 # via gym certifi==2019.3.9 # via requests chardet==3.0.4 # via requests -cloudpickle==0.8.1 +cloudpickle==1.2.1 cycler==0.10.0 # via matplotlib dnspython==1.16.0 future==0.17.1 # via pyglet -gym[atari,box2d,classic_control]==0.12.1 +gym[atari,box2d,classic_control]==0.12.5 idna==2.8 # via requests +importlib-metadata==0.18 # via pluggy, pytest +joblib==0.13.2 # via scikit-learn kiwisolver==1.1.0 # via matplotlib -matplotlib==3.0.3 +matplotlib==3.1.0 more-itertools==7.0.0 # via pytest -numpy==1.16.2 -opencv-python==4.0.0.21 -pandas==0.24.1 -pillow==5.4.1 # via gym, torchvision, visdom -pluggy==0.11.0 # via pytest +numpy==1.16.4 +opencv-python==4.1.0.25 +packaging==19.0 # via pytest +pandas==0.24.2 +pillow==6.0.0 # via gym, torchvision, visdom +pluggy==0.12.0 # via pytest py==1.8.0 # via pytest pyglet==1.3.2 # via gym pymongo==3.8.0 -pyopengl==3.1.0 # via gym -pyparsing==2.4.0 # via matplotlib -pytest==4.5.0 +pyparsing==2.4.0 # via matplotlib, packaging +pytest==4.6.3 python-dateutil==2.8.0 # via matplotlib, pandas -pytz==2018.9 # via pandas -pyyaml==5.1 +pytz==2019.1 # via pandas +pyyaml==5.1.1 pyzmq==18.0.1 # via visdom -requests==2.21.0 # via gym, torchtext, visdom -scikit-learn==0.20.3 -scipy==1.2.1 # via gym, scikit-learn, visdom -six==1.12.0 # via atari-py, cycler, gym, pytest, python-dateutil, torchvision, visdom, websocket-client +requests==2.22.0 # via torchtext, visdom +scikit-learn==0.21.2 +scipy==1.3.0 # via gym, scikit-learn, visdom +six==1.12.0 # via atari-py, cycler, gym, packaging, pytest, python-dateutil, torchvision, visdom, websocket-client torch==1.1.0 torchfile==0.1.0 # via visdom torchtext==0.3.1 -torchvision==0.2.2.post3 +torchvision==0.3.0 tornado==6.0.2 # via visdom -tqdm==4.31.1 -urllib3==1.24.2 # via requests +tqdm==4.32.1 +urllib3==1.25.3 # via requests visdom==0.1.8.8 wcwidth==0.1.7 # via pytest websocket-client==0.56.0 # via visdom +zipp==0.5.1 # via importlib-metadata From 950f41c0ef81f163783ef922518e02ea56d8708d Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 13 Jun 2019 11:29:35 -0700 Subject: [PATCH 038/162] Large rename of recurrent models. --- vel/api/__init__.py | 3 +- vel/api/model.py | 94 +++---------------- vel/modules/rnn_cell.py | 4 +- vel/modules/rnn_layer.py | 4 +- vel/rl/api/model.py | 2 +- vel/rl/commands/enjoy.py | 4 +- vel/rl/commands/evaluate_env_command.py | 6 +- vel/rl/commands/record_movie_command.py | 4 +- vel/rl/env_roller/step_env_roller.py | 8 +- .../trajectory_replay_env_roller.py | 2 +- .../transition_replay_env_roller.py | 2 +- vel/rl/models/backbone/lstm.py | 4 +- vel/rl/models/backbone/nature_cnn_rnn.py | 4 +- vel/rl/models/stochastic_policy_rnn_model.py | 7 +- 14 files changed, 40 insertions(+), 108 deletions(-) diff --git a/vel/api/__init__.py b/vel/api/__init__.py index eeaadd26..47c4284a 100644 --- a/vel/api/__init__.py +++ b/vel/api/__init__.py @@ -2,8 +2,7 @@ from .info import BatchInfo, EpochInfo, TrainingInfo from .learner import Learner from .model import ( - Model, SupervisedModel, LossFunctionModel, - BackboneModel, LinearBackboneModel, RnnLinearBackboneModel, RnnModel, RnnSupervisedModel + Model, SupervisedModel, LossFunctionModel, BackboneModel, LinearBackboneModel ) from .model_factory import ModelFactory from .optimizer import OptimizerFactory diff --git a/vel/api/model.py b/vel/api/model.py index 1060170b..699d8a45 100644 --- a/vel/api/model.py +++ b/vel/api/model.py @@ -1,4 +1,3 @@ -import hashlib import torch import torch.nn as nn @@ -34,7 +33,7 @@ def train(self, mode=True): return self - def summary(self, input_size=None, hashsummary=False): + def summary(self, input_size=None): """ Print a model summary """ if input_size is None: @@ -46,23 +45,8 @@ def summary(self, input_size=None, hashsummary=False): else: summary(self, input_size) - if hashsummary: - for idx, hashvalue in enumerate(self.hashsummary()): - print(f"{idx}: {hashvalue}") - - def hashsummary(self): - """ Print a model summary - checksums of each layer parameters """ - children = list(self.children()) - - result = [] - - for child in children: - result.extend(hashlib.sha256(x.detach().cpu().numpy().tobytes()).hexdigest() for x in child.parameters()) - - return result - def get_layer_groups(self): - """ Return layers grouped """ + """ Return layers grouped for optimization purposes """ return [self] def reset_weights(self): @@ -70,15 +54,19 @@ def reset_weights(self): pass @property - def is_recurrent(self) -> bool: - """ If the network is recurrent and needs to be fed state as well as the observations """ + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ return False class SupervisedModel(Model): """ Model for a supervised learning problem """ - def calculate_gradient(self, x_data, y_true): + def calculate_gradient(self, x_data, y_true) -> dict: + """ + Calculate gradient for given batch of supervised learning. + Returns a dictionary of metrics + """ raise NotImplementedError @@ -89,7 +77,7 @@ def metrics(self) -> list: """ Set of metrics for this model """ return [Loss()] - def calculate_gradient(self, x_data, y_true): + def calculate_gradient(self, x_data, y_true) -> dict: y_pred = self(x_data) loss_value = self.loss_value(x_data, y_true, y_pred) @@ -103,7 +91,7 @@ def calculate_gradient(self, x_data, y_true): 'output': y_pred } - def loss_value(self, x_data, y_true, y_pred): + def loss_value(self, x_data, y_true, y_pred) -> torch.tensor: """ Calculate a value of loss function """ raise NotImplementedError @@ -114,67 +102,11 @@ class BackboneModel(Model): class LinearBackboneModel(BackboneModel): """ - Model that serves as a backbone network to connect your heads to - one that spits out a single-dimension output - """ - - @property - def output_dim(self) -> int: - """ Final dimension of model output """ - raise NotImplementedError - - -class RnnModel(Model): - """ Class representing recurrent model """ - - @property - def is_recurrent(self) -> bool: - """ If the network is recurrent and needs to be fed previous state """ - return True - - @property - def state_dim(self) -> int: - """ Dimension of model state """ - raise NotImplementedError - - def zero_state(self, batch_size): - """ Initial state of the network """ - return torch.zeros(batch_size, self.state_dim) - - -class RnnLinearBackboneModel(BackboneModel): - """ - Model that serves as a backbone network to connect your heads to - - one that spits out a single-dimension output and is a recurrent neural network + Model that serves as a backbone network to connect your heads to. + Has a final output of a single-dimensional linear layer. """ - @property - def is_recurrent(self) -> bool: - """ If the network is recurrent and needs to be fed previous state """ - return True - @property def output_dim(self) -> int: """ Final dimension of model output """ raise NotImplementedError - - @property - def state_dim(self) -> int: - """ Dimension of model state """ - raise NotImplementedError - - def zero_state(self, batch_size): - """ Initial state of the network """ - return torch.zeros(batch_size, self.state_dim, dtype=torch.float32) - - -class RnnSupervisedModel(RnnModel): - """ Model for a supervised learning problem """ - - def loss(self, x_data, y_true): - """ Forward propagate network and return a value of loss function """ - y_pred = self(x_data) - return y_pred, self.loss_value(x_data, y_true, y_pred) - - def loss_value(self, x_data, y_true, y_pred): - """ Calculate a value of loss function """ - raise NotImplementedError diff --git a/vel/modules/rnn_cell.py b/vel/modules/rnn_cell.py index 3ee521fb..08d9582e 100644 --- a/vel/modules/rnn_cell.py +++ b/vel/modules/rnn_cell.py @@ -3,10 +3,10 @@ import torch.nn.init as init -from vel.api import RnnLinearBackboneModel +from vel.api import LinearBackboneModel -class RnnCell(RnnLinearBackboneModel): +class RnnCell(LinearBackboneModel): """ Generalization of RNN cell (Simple RNN, LSTM or GRU) """ def __init__(self, input_size, hidden_size, rnn_type, bias=True, nonlinearity='tanh'): diff --git a/vel/modules/rnn_layer.py b/vel/modules/rnn_layer.py index 5c5f6886..7533dc35 100644 --- a/vel/modules/rnn_layer.py +++ b/vel/modules/rnn_layer.py @@ -3,10 +3,10 @@ import torch.nn.init as init -from vel.api import RnnLinearBackboneModel +from vel.api import LinearBackboneModel -class RnnLayer(RnnLinearBackboneModel): +class RnnLayer(LinearBackboneModel): """ Generalization of RNN layer (Simple RNN, LSTM or GRU) """ def __init__(self, input_size, hidden_size, rnn_type, bias=True, bidirectional=False, nonlinearity='tanh'): diff --git a/vel/rl/api/model.py b/vel/rl/api/model.py index c2d02da2..fb5a691c 100644 --- a/vel/rl/api/model.py +++ b/vel/rl/api/model.py @@ -25,7 +25,7 @@ class RlRnnModel(Model): """ Reinforcement learning recurrent model """ @property - def is_recurrent(self) -> bool: + def is_stateful(self) -> bool: """ If the network is recurrent and needs to be fed previous state """ return True diff --git a/vel/rl/commands/enjoy.py b/vel/rl/commands/enjoy.py index c0a37508..a0f056bc 100644 --- a/vel/rl/commands/enjoy.py +++ b/vel/rl/commands/enjoy.py @@ -46,14 +46,14 @@ def run_model(self, model, environment, device): seconds_per_frame = 1.0 / self.fps - if model.is_recurrent: + if model.is_stateful: hidden_state = model.zero_state(1).to(device) while True: observation_array = np.expand_dims(np.array(observation), axis=0) observation_tensor = torch.from_numpy(observation_array).to(device) - if model.is_recurrent: + if model.is_stateful: output = model.step(observation_tensor, hidden_state, **self.sample_args) hidden_state = output['state'] actions = output['actions'] diff --git a/vel/rl/commands/evaluate_env_command.py b/vel/rl/commands/evaluate_env_command.py index eb5ef763..e9d7c2e4 100644 --- a/vel/rl/commands/evaluate_env_command.py +++ b/vel/rl/commands/evaluate_env_command.py @@ -51,12 +51,12 @@ def run(self): observations = env.reset() observations_tensor = torch.from_numpy(observations).to(device) - if model.is_recurrent: + if model.is_stateful: hidden_state = model.zero_state(observations.shape[0]).to(device) with tqdm.tqdm(total=self.takes) as progress_bar: while len(episode_rewards) < self.takes: - if model.is_recurrent: + if model.is_stateful: output = model.step(observations_tensor, hidden_state, **self.sample_args) hidden_state = output['state'] actions = output['actions'] @@ -75,7 +75,7 @@ def run(self): episode_lengths.append(info['episode']['l']) progress_bar.update(1) - if model.is_recurrent: + if model.is_stateful: # Zero state belongiong to finished episodes dones_tensor = torch.from_numpy(dones.astype(np.float32)).to(device) hidden_state = hidden_state * (1.0 - dones_tensor.unsqueeze(-1)) diff --git a/vel/rl/commands/record_movie_command.py b/vel/rl/commands/record_movie_command.py index b5c1ca75..78e60d5f 100644 --- a/vel/rl/commands/record_movie_command.py +++ b/vel/rl/commands/record_movie_command.py @@ -50,7 +50,7 @@ def record_take(self, model, env_instance, device, take_number): observation = env_instance.reset() - if model.is_recurrent: + if model.is_stateful: hidden_state = model.zero_state(1).to(device) frames.append(env_instance.render('rgb_array')) @@ -61,7 +61,7 @@ def record_take(self, model, env_instance, device, take_number): observation_array = np.expand_dims(np.array(observation), axis=0) observation_tensor = torch.from_numpy(observation_array).to(device) - if model.is_recurrent: + if model.is_stateful: output = model.step(observation_tensor, hidden_state, **self.sample_args) hidden_state = output['state'] actions = output['actions'] diff --git a/vel/rl/env_roller/step_env_roller.py b/vel/rl/env_roller/step_env_roller.py index b9af23bc..a25cecfe 100644 --- a/vel/rl/env_roller/step_env_roller.py +++ b/vel/rl/env_roller/step_env_roller.py @@ -32,14 +32,14 @@ def rollout(self, batch_info: BatchInfo, model: Model, number_of_steps: int) -> accumulator = TensorAccumulator() episode_information = [] # List of dictionaries with episode information - if self.hidden_state is None and model.is_recurrent: + if self.hidden_state is None and model.is_stateful: self.hidden_state = model.zero_state(self.last_observation.size(0)).to(self.device) # Remember rollout initial state, we'll use that for training as well initial_hidden_state = self.hidden_state for step_idx in range(number_of_steps): - if model.is_recurrent: + if model.is_stateful: step = model.step(self.last_observation.to(self.device), state=self.hidden_state) self.hidden_state = step['state'] else: @@ -61,7 +61,7 @@ def rollout(self, batch_info: BatchInfo, model: Model, number_of_steps: int) -> self.last_observation = torch.from_numpy(new_obs).clone() - if model.is_recurrent: + if model.is_stateful: # Zero out state in environments that have finished self.hidden_state = self.hidden_state * (1.0 - dones_tensor.unsqueeze(-1)).to(self.device) @@ -69,7 +69,7 @@ def rollout(self, batch_info: BatchInfo, model: Model, number_of_steps: int) -> episode_information.append(new_infos) - if model.is_recurrent: + if model.is_stateful: final_values = model.value(self.last_observation.to(self.device), state=self.hidden_state).cpu() else: final_values = model.value(self.last_observation.to(self.device)).cpu() diff --git a/vel/rl/env_roller/trajectory_replay_env_roller.py b/vel/rl/env_roller/trajectory_replay_env_roller.py index 87412ee1..38d1f9aa 100644 --- a/vel/rl/env_roller/trajectory_replay_env_roller.py +++ b/vel/rl/env_roller/trajectory_replay_env_roller.py @@ -32,7 +32,7 @@ def environment(self): @torch.no_grad() def rollout(self, batch_info: BatchInfo, model: RlModel, number_of_steps: int) -> Rollout: """ Calculate env rollout """ - assert not model.is_recurrent, "Replay env roller does not support recurrent models" + assert not model.is_stateful, "Replay env roller does not support recurrent models" accumulator = TensorAccumulator() episode_information = [] # List of dictionaries with episode information diff --git a/vel/rl/env_roller/transition_replay_env_roller.py b/vel/rl/env_roller/transition_replay_env_roller.py index fc7e7fba..d64628ae 100644 --- a/vel/rl/env_roller/transition_replay_env_roller.py +++ b/vel/rl/env_roller/transition_replay_env_roller.py @@ -54,7 +54,7 @@ def environment(self): @torch.no_grad() def rollout(self, batch_info: BatchInfo, model: RlModel, number_of_steps: int) -> Rollout: """ Calculate env rollout """ - assert not model.is_recurrent, "Replay env roller does not support recurrent models" + assert not model.is_stateful, "Replay env roller does not support stateful models" accumulator = TensorAccumulator() episode_information = [] # List of dictionaries with episode information diff --git a/vel/rl/models/backbone/lstm.py b/vel/rl/models/backbone/lstm.py index eb2b90b4..874f7ca6 100644 --- a/vel/rl/models/backbone/lstm.py +++ b/vel/rl/models/backbone/lstm.py @@ -1,7 +1,7 @@ -from vel.api import RnnLinearBackboneModel, ModelFactory +from vel.api import LinearBackboneModel, ModelFactory -class LstmBackbone(RnnLinearBackboneModel): +class LstmBackbone(LinearBackboneModel): """ Simple 'LSTM' model backbone """ diff --git a/vel/rl/models/backbone/nature_cnn_rnn.py b/vel/rl/models/backbone/nature_cnn_rnn.py index 837e7a58..832926e4 100644 --- a/vel/rl/models/backbone/nature_cnn_rnn.py +++ b/vel/rl/models/backbone/nature_cnn_rnn.py @@ -1,9 +1,9 @@ -from vel.api import RnnLinearBackboneModel, ModelFactory +from vel.api import LinearBackboneModel, ModelFactory from vel.rl.models.backbone.nature_cnn import NatureCnn from vel.modules.rnn_cell import RnnCell -class NatureCnnRnnBackbone(RnnLinearBackboneModel): +class NatureCnnRnnBackbone(LinearBackboneModel): """ Long-Short-Term Memory rnn cell together with DeepMind-style 'Nature' cnn preprocessing """ diff --git a/vel/rl/models/stochastic_policy_rnn_model.py b/vel/rl/models/stochastic_policy_rnn_model.py index 78afc436..cda7d69a 100644 --- a/vel/rl/models/stochastic_policy_rnn_model.py +++ b/vel/rl/models/stochastic_policy_rnn_model.py @@ -2,7 +2,7 @@ import torch import typing -from vel.api import RnnLinearBackboneModel, ModelFactory, BackboneModel +from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.modules.input.identity import IdentityFactory from vel.rl.api import Rollout, Trajectories, Evaluator, RlRnnModel from vel.rl.modules.action_head import ActionHead @@ -54,7 +54,8 @@ class StochasticPolicyRnnModel(RlRnnModel): RNN version """ - def __init__(self, input_block: BackboneModel, backbone: RnnLinearBackboneModel, action_space: gym.Space): + def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, + action_space: gym.Space): super().__init__() self.input_block = input_block @@ -66,7 +67,7 @@ def __init__(self, input_block: BackboneModel, backbone: RnnLinearBackboneModel, ) self.value_head = ValueHead(input_dim=self.backbone.output_dim) - assert self.backbone.is_recurrent, "Backbone must be a recurrent model" + assert self.backbone.is_stateful, "Backbone must be a recurrent model" @property def state_dim(self) -> int: From 8c0428e3d24fb9becae4583bb83059b35ef99c54 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 13 Jun 2019 11:33:07 -0700 Subject: [PATCH 039/162] Large scale rename and move. --- vel/api/{data => }/augmentation.py | 0 vel/api/data/__init__.py | 3 --- vel/api/{data => }/dataflow.py | 0 vel/api/metrics/__init__.py | 4 ---- vel/api/source.py | 2 +- vel/{augmentations => augmentation}/__init__.py | 0 vel/{augmentations => augmentation}/center_crop.py | 2 +- vel/{augmentations => augmentation}/normalize.py | 2 +- vel/{augmentations => augmentation}/random_crop.py | 2 +- vel/{augmentations => augmentation}/random_horizontal_flip.py | 2 +- vel/{augmentations => augmentation}/random_lighting.py | 2 +- vel/{augmentations => augmentation}/random_rotate.py | 2 +- vel/{augmentations => augmentation}/random_scale.py | 2 +- vel/{augmentations => augmentation}/scale_min_size.py | 2 +- vel/{augmentations => augmentation}/to_array.py | 2 +- vel/{augmentations => augmentation}/to_tensor.py | 2 +- vel/{augmentations => augmentation}/tta/__init__.py | 0 vel/{augmentations => augmentation}/tta/train_tta.py | 0 vel/{augmentations => augmentation}/unsupervised.py | 2 +- vel/{callbacks => callback}/__init__.py | 0 vel/{callbacks => callback}/time_tracker.py | 0 vel/{commands => command}/__init__.py | 0 vel/{commands => command}/augvis_command.py | 0 vel/{commands => command}/lr_find_command.py | 0 vel/{commands => command}/phase_train_command.py | 0 vel/{commands => command}/rnn/__init__.py | 0 vel/{commands => command}/rnn/generate_text.py | 0 vel/{commands => command}/summary_command.py | 0 vel/{commands => command}/train_command.py | 0 vel/{commands => command}/vis_store_command.py | 0 vel/data/__init__.py | 1 + vel/{api => }/data/image_ops.py | 0 vel/{exceptions.py => exception.py} | 0 vel/{internals => internal}/__init__.py | 0 vel/{internals => internal}/context.py | 0 vel/{internals => internal}/generic_factory.py | 0 vel/{internals => internal}/parser.py | 0 vel/{internals => internal}/provider.py | 0 vel/{internals/tests => internal/test}/__init__.py | 0 vel/{internals/tests => internal/test}/fixture_a.py | 0 vel/{internals/tests => internal/test}/fixture_b.py | 0 vel/{internals/tests => internal/test}/test_parser.py | 0 vel/{internals/tests => internal/test}/test_provider.py | 0 vel/{metrics => metric}/__init__.py | 0 vel/{metrics => metric}/accuracy.py | 0 vel/{api/metrics => metric}/averaging_metric.py | 0 vel/{api/metrics => metric}/base_metric.py | 0 vel/{metrics => metric}/loss_metric.py | 0 vel/{api/metrics => metric}/summing_metric.py | 0 vel/{api/metrics => metric}/value_metric.py | 0 vel/{models => model}/__init__.py | 0 vel/{models => model}/autoencoder/__init__.py | 0 vel/{models => model}/autoencoder/mnist_cnn_autoencoder.py | 0 vel/{models => model}/autoencoder/mnist_cnn_vae.py | 0 vel/{models => model}/imagenet/__init__.py | 0 vel/{models => model}/imagenet/resnet34.py | 0 vel/{models => model}/rnn/__init__.py | 0 .../rnn/multilayer_rnn_sequence_classification.py | 0 vel/{models => model}/rnn/multilayer_rnn_sequence_model.py | 0 vel/{models => model}/vision/__init__.py | 0 vel/{models => model}/vision/cifar10_cnn_01.py | 0 vel/{models => model}/vision/cifar_resnet_v1.py | 0 vel/{models => model}/vision/cifar_resnet_v2.py | 0 vel/{models => model}/vision/cifar_resnext.py | 0 vel/{models => model}/vision/mnist_cnn_01.py | 0 vel/{modules => module}/__init__.py | 0 vel/{modules => module}/input/__init__.py | 0 vel/{modules => module}/input/embedding.py | 0 vel/{modules => module}/input/identity.py | 0 vel/{modules => module}/input/image_to_tensor.py | 0 vel/{modules => module}/input/normalize_observations.py | 0 vel/{modules => module}/input/one_hot_encoding.py | 0 vel/{modules => module}/layers.py | 0 vel/{modules => module}/resnet_v1.py | 0 vel/{modules => module}/resnet_v2.py | 0 vel/{modules => module}/resnext.py | 0 vel/{modules => module}/rnn_cell.py | 0 vel/{modules => module}/rnn_layer.py | 0 vel/{optimizers => optimizer}/__init__.py | 0 vel/{optimizers => optimizer}/adadelta.py | 0 vel/{optimizers => optimizer}/adam.py | 0 vel/{optimizers => optimizer}/rmsprop.py | 0 vel/{optimizers => optimizer}/rmsprop_tf.py | 0 vel/{optimizers => optimizer}/sgd.py | 0 vel/{schedules => schedule}/__init__.py | 0 vel/{schedules => schedule}/constant.py | 0 vel/{schedules => schedule}/linear.py | 0 vel/{schedules => schedule}/linear_and_constant.py | 0 vel/{sources => source}/__init__.py | 0 vel/{sources => source}/img_dir_source.py | 0 vel/{sources => source}/nlp/__init__.py | 0 vel/{sources => source}/nlp/imdb.py | 0 vel/{sources => source}/nlp/multi30k.py | 0 vel/{sources => source}/nlp/text_url.py | 0 vel/{sources => source}/nlp/wmt14.py | 0 vel/{sources => source}/vision/__init__.py | 0 vel/{sources => source}/vision/cifar10.py | 0 vel/{sources => source}/vision/mnist.py | 0 98 files changed, 13 insertions(+), 19 deletions(-) rename vel/api/{data => }/augmentation.py (100%) delete mode 100644 vel/api/data/__init__.py rename vel/api/{data => }/dataflow.py (100%) delete mode 100644 vel/api/metrics/__init__.py rename vel/{augmentations => augmentation}/__init__.py (100%) rename vel/{augmentations => augmentation}/center_crop.py (95%) rename vel/{augmentations => augmentation}/normalize.py (95%) rename vel/{augmentations => augmentation}/random_crop.py (99%) rename vel/{augmentations => augmentation}/random_horizontal_flip.py (96%) rename vel/{augmentations => augmentation}/random_lighting.py (96%) rename vel/{augmentations => augmentation}/random_rotate.py (96%) rename vel/{augmentations => augmentation}/random_scale.py (97%) rename vel/{augmentations => augmentation}/scale_min_size.py (95%) rename vel/{augmentations => augmentation}/to_array.py (94%) rename vel/{augmentations => augmentation}/to_tensor.py (95%) rename vel/{augmentations => augmentation}/tta/__init__.py (100%) rename vel/{augmentations => augmentation}/tta/train_tta.py (100%) rename vel/{augmentations => augmentation}/unsupervised.py (91%) rename vel/{callbacks => callback}/__init__.py (100%) rename vel/{callbacks => callback}/time_tracker.py (100%) rename vel/{commands => command}/__init__.py (100%) rename vel/{commands => command}/augvis_command.py (100%) rename vel/{commands => command}/lr_find_command.py (100%) rename vel/{commands => command}/phase_train_command.py (100%) rename vel/{commands => command}/rnn/__init__.py (100%) rename vel/{commands => command}/rnn/generate_text.py (100%) rename vel/{commands => command}/summary_command.py (100%) rename vel/{commands => command}/train_command.py (100%) rename vel/{commands => command}/vis_store_command.py (100%) create mode 100644 vel/data/__init__.py rename vel/{api => }/data/image_ops.py (100%) rename vel/{exceptions.py => exception.py} (100%) rename vel/{internals => internal}/__init__.py (100%) rename vel/{internals => internal}/context.py (100%) rename vel/{internals => internal}/generic_factory.py (100%) rename vel/{internals => internal}/parser.py (100%) rename vel/{internals => internal}/provider.py (100%) rename vel/{internals/tests => internal/test}/__init__.py (100%) rename vel/{internals/tests => internal/test}/fixture_a.py (100%) rename vel/{internals/tests => internal/test}/fixture_b.py (100%) rename vel/{internals/tests => internal/test}/test_parser.py (100%) rename vel/{internals/tests => internal/test}/test_provider.py (100%) rename vel/{metrics => metric}/__init__.py (100%) rename vel/{metrics => metric}/accuracy.py (100%) rename vel/{api/metrics => metric}/averaging_metric.py (100%) rename vel/{api/metrics => metric}/base_metric.py (100%) rename vel/{metrics => metric}/loss_metric.py (100%) rename vel/{api/metrics => metric}/summing_metric.py (100%) rename vel/{api/metrics => metric}/value_metric.py (100%) rename vel/{models => model}/__init__.py (100%) rename vel/{models => model}/autoencoder/__init__.py (100%) rename vel/{models => model}/autoencoder/mnist_cnn_autoencoder.py (100%) rename vel/{models => model}/autoencoder/mnist_cnn_vae.py (100%) rename vel/{models => model}/imagenet/__init__.py (100%) rename vel/{models => model}/imagenet/resnet34.py (100%) rename vel/{models => model}/rnn/__init__.py (100%) rename vel/{models => model}/rnn/multilayer_rnn_sequence_classification.py (100%) rename vel/{models => model}/rnn/multilayer_rnn_sequence_model.py (100%) rename vel/{models => model}/vision/__init__.py (100%) rename vel/{models => model}/vision/cifar10_cnn_01.py (100%) rename vel/{models => model}/vision/cifar_resnet_v1.py (100%) rename vel/{models => model}/vision/cifar_resnet_v2.py (100%) rename vel/{models => model}/vision/cifar_resnext.py (100%) rename vel/{models => model}/vision/mnist_cnn_01.py (100%) rename vel/{modules => module}/__init__.py (100%) rename vel/{modules => module}/input/__init__.py (100%) rename vel/{modules => module}/input/embedding.py (100%) rename vel/{modules => module}/input/identity.py (100%) rename vel/{modules => module}/input/image_to_tensor.py (100%) rename vel/{modules => module}/input/normalize_observations.py (100%) rename vel/{modules => module}/input/one_hot_encoding.py (100%) rename vel/{modules => module}/layers.py (100%) rename vel/{modules => module}/resnet_v1.py (100%) rename vel/{modules => module}/resnet_v2.py (100%) rename vel/{modules => module}/resnext.py (100%) rename vel/{modules => module}/rnn_cell.py (100%) rename vel/{modules => module}/rnn_layer.py (100%) rename vel/{optimizers => optimizer}/__init__.py (100%) rename vel/{optimizers => optimizer}/adadelta.py (100%) rename vel/{optimizers => optimizer}/adam.py (100%) rename vel/{optimizers => optimizer}/rmsprop.py (100%) rename vel/{optimizers => optimizer}/rmsprop_tf.py (100%) rename vel/{optimizers => optimizer}/sgd.py (100%) rename vel/{schedules => schedule}/__init__.py (100%) rename vel/{schedules => schedule}/constant.py (100%) rename vel/{schedules => schedule}/linear.py (100%) rename vel/{schedules => schedule}/linear_and_constant.py (100%) rename vel/{sources => source}/__init__.py (100%) rename vel/{sources => source}/img_dir_source.py (100%) rename vel/{sources => source}/nlp/__init__.py (100%) rename vel/{sources => source}/nlp/imdb.py (100%) rename vel/{sources => source}/nlp/multi30k.py (100%) rename vel/{sources => source}/nlp/text_url.py (100%) rename vel/{sources => source}/nlp/wmt14.py (100%) rename vel/{sources => source}/vision/__init__.py (100%) rename vel/{sources => source}/vision/cifar10.py (100%) rename vel/{sources => source}/vision/mnist.py (100%) diff --git a/vel/api/data/augmentation.py b/vel/api/augmentation.py similarity index 100% rename from vel/api/data/augmentation.py rename to vel/api/augmentation.py diff --git a/vel/api/data/__init__.py b/vel/api/data/__init__.py deleted file mode 100644 index 76bebdab..00000000 --- a/vel/api/data/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .augmentation import Augmentation -from .dataflow import DataFlow -from .image_ops import * \ No newline at end of file diff --git a/vel/api/data/dataflow.py b/vel/api/dataflow.py similarity index 100% rename from vel/api/data/dataflow.py rename to vel/api/dataflow.py diff --git a/vel/api/metrics/__init__.py b/vel/api/metrics/__init__.py deleted file mode 100644 index f496db8a..00000000 --- a/vel/api/metrics/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .base_metric import BaseMetric -from .averaging_metric import AveragingMetric, AveragingNamedMetric, AveragingSupervisedMetric -from .value_metric import ValueMetric -from .summing_metric import SummingMetric, SummingNamedMetric diff --git a/vel/api/source.py b/vel/api/source.py index a566521a..9396b487 100644 --- a/vel/api/source.py +++ b/vel/api/source.py @@ -1,6 +1,6 @@ import torch.utils.data as data -from .data import DataFlow +from vel.data import DataFlow class Source: diff --git a/vel/augmentations/__init__.py b/vel/augmentation/__init__.py similarity index 100% rename from vel/augmentations/__init__.py rename to vel/augmentation/__init__.py diff --git a/vel/augmentations/center_crop.py b/vel/augmentation/center_crop.py similarity index 95% rename from vel/augmentations/center_crop.py rename to vel/augmentation/center_crop.py index b586c61b..21cf33bd 100644 --- a/vel/augmentations/center_crop.py +++ b/vel/augmentation/center_crop.py @@ -3,7 +3,7 @@ https://github.com/fastai/fastai/blob/master/fastai/transforms.py """ -import vel.api.data as data +import vel.data as data class CenterCrop(data.Augmentation): diff --git a/vel/augmentations/normalize.py b/vel/augmentation/normalize.py similarity index 95% rename from vel/augmentations/normalize.py rename to vel/augmentation/normalize.py index 2a1333d9..36ca29fd 100644 --- a/vel/augmentations/normalize.py +++ b/vel/augmentation/normalize.py @@ -1,6 +1,6 @@ import numpy as np -import vel.api.data as data +import vel.data as data class Normalize(data.Augmentation): diff --git a/vel/augmentations/random_crop.py b/vel/augmentation/random_crop.py similarity index 99% rename from vel/augmentations/random_crop.py rename to vel/augmentation/random_crop.py index 3cb8faf5..bbc56dec 100644 --- a/vel/augmentations/random_crop.py +++ b/vel/augmentation/random_crop.py @@ -6,7 +6,7 @@ import numbers import random -import vel.api.data as data +import vel.data as data class RandomCrop(data.Augmentation): diff --git a/vel/augmentations/random_horizontal_flip.py b/vel/augmentation/random_horizontal_flip.py similarity index 96% rename from vel/augmentations/random_horizontal_flip.py rename to vel/augmentation/random_horizontal_flip.py index a5549673..2d8bdafd 100644 --- a/vel/augmentations/random_horizontal_flip.py +++ b/vel/augmentation/random_horizontal_flip.py @@ -1,7 +1,7 @@ import random import numpy as np -import vel.api.data as data +import vel.data as data class RandomHorizontalFlip(data.Augmentation): diff --git a/vel/augmentations/random_lighting.py b/vel/augmentation/random_lighting.py similarity index 96% rename from vel/augmentations/random_lighting.py rename to vel/augmentation/random_lighting.py index b5c0da5a..82fc9cb1 100644 --- a/vel/augmentations/random_lighting.py +++ b/vel/augmentation/random_lighting.py @@ -1,6 +1,6 @@ import random -import vel.api.data as data +import vel.data as data class RandomLighting(data.Augmentation): diff --git a/vel/augmentations/random_rotate.py b/vel/augmentation/random_rotate.py similarity index 96% rename from vel/augmentations/random_rotate.py rename to vel/augmentation/random_rotate.py index 1a646b22..c2c02246 100644 --- a/vel/augmentations/random_rotate.py +++ b/vel/augmentation/random_rotate.py @@ -5,7 +5,7 @@ import cv2 import random -import vel.api.data as data +import vel.data as data class RandomRotate(data.Augmentation): diff --git a/vel/augmentations/random_scale.py b/vel/augmentation/random_scale.py similarity index 97% rename from vel/augmentations/random_scale.py rename to vel/augmentation/random_scale.py index c60d5852..882a3eb6 100644 --- a/vel/augmentations/random_scale.py +++ b/vel/augmentation/random_scale.py @@ -6,7 +6,7 @@ import collections.abc as abc import random -import vel.api.data as data +import vel.data as data class RandomScale(data.Augmentation): diff --git a/vel/augmentations/scale_min_size.py b/vel/augmentation/scale_min_size.py similarity index 95% rename from vel/augmentations/scale_min_size.py rename to vel/augmentation/scale_min_size.py index a89f0c8e..c1ebfa5d 100644 --- a/vel/augmentations/scale_min_size.py +++ b/vel/augmentation/scale_min_size.py @@ -4,7 +4,7 @@ """ import PIL.Image as Image -import vel.api.data as data +import vel.data as data class ScaleMinSize(data.Augmentation): diff --git a/vel/augmentations/to_array.py b/vel/augmentation/to_array.py similarity index 94% rename from vel/augmentations/to_array.py rename to vel/augmentation/to_array.py index ffbd353d..e1f3a5f0 100644 --- a/vel/augmentations/to_array.py +++ b/vel/augmentation/to_array.py @@ -1,6 +1,6 @@ import numpy as np -import vel.api.data as data +import vel.data as data class ToArray(data.Augmentation): diff --git a/vel/augmentations/to_tensor.py b/vel/augmentation/to_tensor.py similarity index 95% rename from vel/augmentations/to_tensor.py rename to vel/augmentation/to_tensor.py index dc4030e0..33285e77 100644 --- a/vel/augmentations/to_tensor.py +++ b/vel/augmentation/to_tensor.py @@ -2,7 +2,7 @@ import torchvision.transforms.functional as F -import vel.api.data as data +import vel.data as data class ToTensor(data.Augmentation): diff --git a/vel/augmentations/tta/__init__.py b/vel/augmentation/tta/__init__.py similarity index 100% rename from vel/augmentations/tta/__init__.py rename to vel/augmentation/tta/__init__.py diff --git a/vel/augmentations/tta/train_tta.py b/vel/augmentation/tta/train_tta.py similarity index 100% rename from vel/augmentations/tta/train_tta.py rename to vel/augmentation/tta/train_tta.py diff --git a/vel/augmentations/unsupervised.py b/vel/augmentation/unsupervised.py similarity index 91% rename from vel/augmentations/unsupervised.py rename to vel/augmentation/unsupervised.py index 2caeb448..678ab7d3 100644 --- a/vel/augmentations/unsupervised.py +++ b/vel/augmentation/unsupervised.py @@ -1,4 +1,4 @@ -import vel.api.data as data +import vel.data as data class Unsupervised(data.Augmentation): diff --git a/vel/callbacks/__init__.py b/vel/callback/__init__.py similarity index 100% rename from vel/callbacks/__init__.py rename to vel/callback/__init__.py diff --git a/vel/callbacks/time_tracker.py b/vel/callback/time_tracker.py similarity index 100% rename from vel/callbacks/time_tracker.py rename to vel/callback/time_tracker.py diff --git a/vel/commands/__init__.py b/vel/command/__init__.py similarity index 100% rename from vel/commands/__init__.py rename to vel/command/__init__.py diff --git a/vel/commands/augvis_command.py b/vel/command/augvis_command.py similarity index 100% rename from vel/commands/augvis_command.py rename to vel/command/augvis_command.py diff --git a/vel/commands/lr_find_command.py b/vel/command/lr_find_command.py similarity index 100% rename from vel/commands/lr_find_command.py rename to vel/command/lr_find_command.py diff --git a/vel/commands/phase_train_command.py b/vel/command/phase_train_command.py similarity index 100% rename from vel/commands/phase_train_command.py rename to vel/command/phase_train_command.py diff --git a/vel/commands/rnn/__init__.py b/vel/command/rnn/__init__.py similarity index 100% rename from vel/commands/rnn/__init__.py rename to vel/command/rnn/__init__.py diff --git a/vel/commands/rnn/generate_text.py b/vel/command/rnn/generate_text.py similarity index 100% rename from vel/commands/rnn/generate_text.py rename to vel/command/rnn/generate_text.py diff --git a/vel/commands/summary_command.py b/vel/command/summary_command.py similarity index 100% rename from vel/commands/summary_command.py rename to vel/command/summary_command.py diff --git a/vel/commands/train_command.py b/vel/command/train_command.py similarity index 100% rename from vel/commands/train_command.py rename to vel/command/train_command.py diff --git a/vel/commands/vis_store_command.py b/vel/command/vis_store_command.py similarity index 100% rename from vel/commands/vis_store_command.py rename to vel/command/vis_store_command.py diff --git a/vel/data/__init__.py b/vel/data/__init__.py new file mode 100644 index 00000000..806cbe66 --- /dev/null +++ b/vel/data/__init__.py @@ -0,0 +1 @@ +from .image_ops import * \ No newline at end of file diff --git a/vel/api/data/image_ops.py b/vel/data/image_ops.py similarity index 100% rename from vel/api/data/image_ops.py rename to vel/data/image_ops.py diff --git a/vel/exceptions.py b/vel/exception.py similarity index 100% rename from vel/exceptions.py rename to vel/exception.py diff --git a/vel/internals/__init__.py b/vel/internal/__init__.py similarity index 100% rename from vel/internals/__init__.py rename to vel/internal/__init__.py diff --git a/vel/internals/context.py b/vel/internal/context.py similarity index 100% rename from vel/internals/context.py rename to vel/internal/context.py diff --git a/vel/internals/generic_factory.py b/vel/internal/generic_factory.py similarity index 100% rename from vel/internals/generic_factory.py rename to vel/internal/generic_factory.py diff --git a/vel/internals/parser.py b/vel/internal/parser.py similarity index 100% rename from vel/internals/parser.py rename to vel/internal/parser.py diff --git a/vel/internals/provider.py b/vel/internal/provider.py similarity index 100% rename from vel/internals/provider.py rename to vel/internal/provider.py diff --git a/vel/internals/tests/__init__.py b/vel/internal/test/__init__.py similarity index 100% rename from vel/internals/tests/__init__.py rename to vel/internal/test/__init__.py diff --git a/vel/internals/tests/fixture_a.py b/vel/internal/test/fixture_a.py similarity index 100% rename from vel/internals/tests/fixture_a.py rename to vel/internal/test/fixture_a.py diff --git a/vel/internals/tests/fixture_b.py b/vel/internal/test/fixture_b.py similarity index 100% rename from vel/internals/tests/fixture_b.py rename to vel/internal/test/fixture_b.py diff --git a/vel/internals/tests/test_parser.py b/vel/internal/test/test_parser.py similarity index 100% rename from vel/internals/tests/test_parser.py rename to vel/internal/test/test_parser.py diff --git a/vel/internals/tests/test_provider.py b/vel/internal/test/test_provider.py similarity index 100% rename from vel/internals/tests/test_provider.py rename to vel/internal/test/test_provider.py diff --git a/vel/metrics/__init__.py b/vel/metric/__init__.py similarity index 100% rename from vel/metrics/__init__.py rename to vel/metric/__init__.py diff --git a/vel/metrics/accuracy.py b/vel/metric/accuracy.py similarity index 100% rename from vel/metrics/accuracy.py rename to vel/metric/accuracy.py diff --git a/vel/api/metrics/averaging_metric.py b/vel/metric/averaging_metric.py similarity index 100% rename from vel/api/metrics/averaging_metric.py rename to vel/metric/averaging_metric.py diff --git a/vel/api/metrics/base_metric.py b/vel/metric/base_metric.py similarity index 100% rename from vel/api/metrics/base_metric.py rename to vel/metric/base_metric.py diff --git a/vel/metrics/loss_metric.py b/vel/metric/loss_metric.py similarity index 100% rename from vel/metrics/loss_metric.py rename to vel/metric/loss_metric.py diff --git a/vel/api/metrics/summing_metric.py b/vel/metric/summing_metric.py similarity index 100% rename from vel/api/metrics/summing_metric.py rename to vel/metric/summing_metric.py diff --git a/vel/api/metrics/value_metric.py b/vel/metric/value_metric.py similarity index 100% rename from vel/api/metrics/value_metric.py rename to vel/metric/value_metric.py diff --git a/vel/models/__init__.py b/vel/model/__init__.py similarity index 100% rename from vel/models/__init__.py rename to vel/model/__init__.py diff --git a/vel/models/autoencoder/__init__.py b/vel/model/autoencoder/__init__.py similarity index 100% rename from vel/models/autoencoder/__init__.py rename to vel/model/autoencoder/__init__.py diff --git a/vel/models/autoencoder/mnist_cnn_autoencoder.py b/vel/model/autoencoder/mnist_cnn_autoencoder.py similarity index 100% rename from vel/models/autoencoder/mnist_cnn_autoencoder.py rename to vel/model/autoencoder/mnist_cnn_autoencoder.py diff --git a/vel/models/autoencoder/mnist_cnn_vae.py b/vel/model/autoencoder/mnist_cnn_vae.py similarity index 100% rename from vel/models/autoencoder/mnist_cnn_vae.py rename to vel/model/autoencoder/mnist_cnn_vae.py diff --git a/vel/models/imagenet/__init__.py b/vel/model/imagenet/__init__.py similarity index 100% rename from vel/models/imagenet/__init__.py rename to vel/model/imagenet/__init__.py diff --git a/vel/models/imagenet/resnet34.py b/vel/model/imagenet/resnet34.py similarity index 100% rename from vel/models/imagenet/resnet34.py rename to vel/model/imagenet/resnet34.py diff --git a/vel/models/rnn/__init__.py b/vel/model/rnn/__init__.py similarity index 100% rename from vel/models/rnn/__init__.py rename to vel/model/rnn/__init__.py diff --git a/vel/models/rnn/multilayer_rnn_sequence_classification.py b/vel/model/rnn/multilayer_rnn_sequence_classification.py similarity index 100% rename from vel/models/rnn/multilayer_rnn_sequence_classification.py rename to vel/model/rnn/multilayer_rnn_sequence_classification.py diff --git a/vel/models/rnn/multilayer_rnn_sequence_model.py b/vel/model/rnn/multilayer_rnn_sequence_model.py similarity index 100% rename from vel/models/rnn/multilayer_rnn_sequence_model.py rename to vel/model/rnn/multilayer_rnn_sequence_model.py diff --git a/vel/models/vision/__init__.py b/vel/model/vision/__init__.py similarity index 100% rename from vel/models/vision/__init__.py rename to vel/model/vision/__init__.py diff --git a/vel/models/vision/cifar10_cnn_01.py b/vel/model/vision/cifar10_cnn_01.py similarity index 100% rename from vel/models/vision/cifar10_cnn_01.py rename to vel/model/vision/cifar10_cnn_01.py diff --git a/vel/models/vision/cifar_resnet_v1.py b/vel/model/vision/cifar_resnet_v1.py similarity index 100% rename from vel/models/vision/cifar_resnet_v1.py rename to vel/model/vision/cifar_resnet_v1.py diff --git a/vel/models/vision/cifar_resnet_v2.py b/vel/model/vision/cifar_resnet_v2.py similarity index 100% rename from vel/models/vision/cifar_resnet_v2.py rename to vel/model/vision/cifar_resnet_v2.py diff --git a/vel/models/vision/cifar_resnext.py b/vel/model/vision/cifar_resnext.py similarity index 100% rename from vel/models/vision/cifar_resnext.py rename to vel/model/vision/cifar_resnext.py diff --git a/vel/models/vision/mnist_cnn_01.py b/vel/model/vision/mnist_cnn_01.py similarity index 100% rename from vel/models/vision/mnist_cnn_01.py rename to vel/model/vision/mnist_cnn_01.py diff --git a/vel/modules/__init__.py b/vel/module/__init__.py similarity index 100% rename from vel/modules/__init__.py rename to vel/module/__init__.py diff --git a/vel/modules/input/__init__.py b/vel/module/input/__init__.py similarity index 100% rename from vel/modules/input/__init__.py rename to vel/module/input/__init__.py diff --git a/vel/modules/input/embedding.py b/vel/module/input/embedding.py similarity index 100% rename from vel/modules/input/embedding.py rename to vel/module/input/embedding.py diff --git a/vel/modules/input/identity.py b/vel/module/input/identity.py similarity index 100% rename from vel/modules/input/identity.py rename to vel/module/input/identity.py diff --git a/vel/modules/input/image_to_tensor.py b/vel/module/input/image_to_tensor.py similarity index 100% rename from vel/modules/input/image_to_tensor.py rename to vel/module/input/image_to_tensor.py diff --git a/vel/modules/input/normalize_observations.py b/vel/module/input/normalize_observations.py similarity index 100% rename from vel/modules/input/normalize_observations.py rename to vel/module/input/normalize_observations.py diff --git a/vel/modules/input/one_hot_encoding.py b/vel/module/input/one_hot_encoding.py similarity index 100% rename from vel/modules/input/one_hot_encoding.py rename to vel/module/input/one_hot_encoding.py diff --git a/vel/modules/layers.py b/vel/module/layers.py similarity index 100% rename from vel/modules/layers.py rename to vel/module/layers.py diff --git a/vel/modules/resnet_v1.py b/vel/module/resnet_v1.py similarity index 100% rename from vel/modules/resnet_v1.py rename to vel/module/resnet_v1.py diff --git a/vel/modules/resnet_v2.py b/vel/module/resnet_v2.py similarity index 100% rename from vel/modules/resnet_v2.py rename to vel/module/resnet_v2.py diff --git a/vel/modules/resnext.py b/vel/module/resnext.py similarity index 100% rename from vel/modules/resnext.py rename to vel/module/resnext.py diff --git a/vel/modules/rnn_cell.py b/vel/module/rnn_cell.py similarity index 100% rename from vel/modules/rnn_cell.py rename to vel/module/rnn_cell.py diff --git a/vel/modules/rnn_layer.py b/vel/module/rnn_layer.py similarity index 100% rename from vel/modules/rnn_layer.py rename to vel/module/rnn_layer.py diff --git a/vel/optimizers/__init__.py b/vel/optimizer/__init__.py similarity index 100% rename from vel/optimizers/__init__.py rename to vel/optimizer/__init__.py diff --git a/vel/optimizers/adadelta.py b/vel/optimizer/adadelta.py similarity index 100% rename from vel/optimizers/adadelta.py rename to vel/optimizer/adadelta.py diff --git a/vel/optimizers/adam.py b/vel/optimizer/adam.py similarity index 100% rename from vel/optimizers/adam.py rename to vel/optimizer/adam.py diff --git a/vel/optimizers/rmsprop.py b/vel/optimizer/rmsprop.py similarity index 100% rename from vel/optimizers/rmsprop.py rename to vel/optimizer/rmsprop.py diff --git a/vel/optimizers/rmsprop_tf.py b/vel/optimizer/rmsprop_tf.py similarity index 100% rename from vel/optimizers/rmsprop_tf.py rename to vel/optimizer/rmsprop_tf.py diff --git a/vel/optimizers/sgd.py b/vel/optimizer/sgd.py similarity index 100% rename from vel/optimizers/sgd.py rename to vel/optimizer/sgd.py diff --git a/vel/schedules/__init__.py b/vel/schedule/__init__.py similarity index 100% rename from vel/schedules/__init__.py rename to vel/schedule/__init__.py diff --git a/vel/schedules/constant.py b/vel/schedule/constant.py similarity index 100% rename from vel/schedules/constant.py rename to vel/schedule/constant.py diff --git a/vel/schedules/linear.py b/vel/schedule/linear.py similarity index 100% rename from vel/schedules/linear.py rename to vel/schedule/linear.py diff --git a/vel/schedules/linear_and_constant.py b/vel/schedule/linear_and_constant.py similarity index 100% rename from vel/schedules/linear_and_constant.py rename to vel/schedule/linear_and_constant.py diff --git a/vel/sources/__init__.py b/vel/source/__init__.py similarity index 100% rename from vel/sources/__init__.py rename to vel/source/__init__.py diff --git a/vel/sources/img_dir_source.py b/vel/source/img_dir_source.py similarity index 100% rename from vel/sources/img_dir_source.py rename to vel/source/img_dir_source.py diff --git a/vel/sources/nlp/__init__.py b/vel/source/nlp/__init__.py similarity index 100% rename from vel/sources/nlp/__init__.py rename to vel/source/nlp/__init__.py diff --git a/vel/sources/nlp/imdb.py b/vel/source/nlp/imdb.py similarity index 100% rename from vel/sources/nlp/imdb.py rename to vel/source/nlp/imdb.py diff --git a/vel/sources/nlp/multi30k.py b/vel/source/nlp/multi30k.py similarity index 100% rename from vel/sources/nlp/multi30k.py rename to vel/source/nlp/multi30k.py diff --git a/vel/sources/nlp/text_url.py b/vel/source/nlp/text_url.py similarity index 100% rename from vel/sources/nlp/text_url.py rename to vel/source/nlp/text_url.py diff --git a/vel/sources/nlp/wmt14.py b/vel/source/nlp/wmt14.py similarity index 100% rename from vel/sources/nlp/wmt14.py rename to vel/source/nlp/wmt14.py diff --git a/vel/sources/vision/__init__.py b/vel/source/vision/__init__.py similarity index 100% rename from vel/sources/vision/__init__.py rename to vel/source/vision/__init__.py diff --git a/vel/sources/vision/cifar10.py b/vel/source/vision/cifar10.py similarity index 100% rename from vel/sources/vision/cifar10.py rename to vel/source/vision/cifar10.py diff --git a/vel/sources/vision/mnist.py b/vel/source/vision/mnist.py similarity index 100% rename from vel/sources/vision/mnist.py rename to vel/source/vision/mnist.py From 31c5b05ee83c5651098be69085cbc7f853835ae2 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 13 Jun 2019 11:36:15 -0700 Subject: [PATCH 040/162] Second stage of large renames. --- .../mnist/mnist_cnn_autoencoder.yaml | 0 .../{autoencoders => autoencoder}/mnist/mnist_cnn_vae.yaml | 0 vel/data/{image_ops.py => image_op.py} | 0 vel/math/{functions.py => function.py} | 0 vel/math/{processes.py => process.py} | 0 vel/rl/{buffers => buffer}/__init__.py | 0 vel/rl/{buffers => buffer}/backend/__init__.py | 0 vel/rl/{buffers => buffer}/backend/circular_buffer_backend.py | 0 vel/rl/{buffers => buffer}/backend/circular_vec_buffer_backend.py | 0 vel/rl/{buffers => buffer}/backend/prioritized_buffer_backend.py | 0 .../{buffers => buffer}/backend/prioritized_vec_buffer_backend.py | 0 vel/rl/{buffers => buffer}/backend/segment_tree.py | 0 vel/rl/{buffers => buffer}/circular_replay_buffer.py | 0 vel/rl/{buffers => buffer}/prioritized_circular_replay_buffer.py | 0 vel/rl/{buffers => buffer}/tests/__init__.py | 0 vel/rl/{buffers => buffer}/tests/test_circular_buffer_backend.py | 0 .../tests/test_circular_vec_env_buffer_backend.py | 0 .../tests/test_prioritized_circular_buffer_backend.py | 0 .../tests/test_prioritized_vec_buffer_backend.py | 0 vel/rl/{commands => command}/__init__.py | 0 vel/rl/{commands => command}/enjoy.py | 0 vel/rl/{commands => command}/evaluate_env_command.py | 0 vel/rl/{commands => command}/record_movie_command.py | 0 vel/rl/{commands => command}/rl_train_command.py | 0 vel/rl/{models => model}/__init__.py | 0 vel/rl/{models => model}/backbone/__init__.py | 0 vel/rl/{models => model}/backbone/double_nature_cnn.py | 0 vel/rl/{models => model}/backbone/double_noisy_nature_cnn.py | 0 vel/rl/{models => model}/backbone/lstm.py | 0 vel/rl/{models => model}/backbone/mlp.py | 0 vel/rl/{models => model}/backbone/nature_cnn.py | 0 vel/rl/{models => model}/backbone/nature_cnn_rnn.py | 0 vel/rl/{models => model}/backbone/nature_cnn_small.py | 0 vel/rl/{models => model}/backbone/noisy_nature_cnn.py | 0 vel/rl/{models => model}/deterministic_policy_model.py | 0 vel/rl/{models => model}/q_distributional_model.py | 0 vel/rl/{models => model}/q_dueling_model.py | 0 vel/rl/{models => model}/q_model.py | 0 vel/rl/{models => model}/q_noisy_model.py | 0 vel/rl/{models => model}/q_rainbow_model.py | 0 vel/rl/{models => model}/q_stochastic_policy_model.py | 0 vel/rl/{models => model}/stochastic_policy_model.py | 0 vel/rl/{models => model}/stochastic_policy_model_separate.py | 0 vel/rl/{models => model}/stochastic_policy_rnn_model.py | 0 vel/rl/{modules => module}/__init__.py | 0 vel/rl/{modules => module}/action_head.py | 0 vel/rl/{modules => module}/deterministic_action_head.py | 0 vel/rl/{modules => module}/deterministic_critic_head.py | 0 vel/rl/{modules => module}/noise/__init__.py | 0 vel/rl/{modules => module}/noise/eps_greedy.py | 0 vel/rl/{modules => module}/noise/ou_noise.py | 0 vel/rl/{modules => module}/noisy_linear.py | 0 vel/rl/{modules => module}/q_distributional_head.py | 0 vel/rl/{modules => module}/q_distributional_noisy_dueling_head.py | 0 vel/rl/{modules => module}/q_dueling_head.py | 0 vel/rl/{modules => module}/q_head.py | 0 vel/rl/{modules => module}/q_noisy_head.py | 0 vel/rl/{modules => module}/test/__init__.py | 0 vel/rl/{modules => module}/test/test_action_head.py | 0 vel/rl/{modules => module}/value_head.py | 0 vel/rl/{reinforcers => reinforcer}/__init__.py | 0 .../buffered_mixed_policy_iteration_reinforcer.py | 0 .../buffered_off_policy_iteration_reinforcer.py | 0 .../{reinforcers => reinforcer}/on_policy_iteration_reinforcer.py | 0 64 files changed, 0 insertions(+), 0 deletions(-) rename examples-configs/{autoencoders => autoencoder}/mnist/mnist_cnn_autoencoder.yaml (100%) rename examples-configs/{autoencoders => autoencoder}/mnist/mnist_cnn_vae.yaml (100%) rename vel/data/{image_ops.py => image_op.py} (100%) rename vel/math/{functions.py => function.py} (100%) rename vel/math/{processes.py => process.py} (100%) rename vel/rl/{buffers => buffer}/__init__.py (100%) rename vel/rl/{buffers => buffer}/backend/__init__.py (100%) rename vel/rl/{buffers => buffer}/backend/circular_buffer_backend.py (100%) rename vel/rl/{buffers => buffer}/backend/circular_vec_buffer_backend.py (100%) rename vel/rl/{buffers => buffer}/backend/prioritized_buffer_backend.py (100%) rename vel/rl/{buffers => buffer}/backend/prioritized_vec_buffer_backend.py (100%) rename vel/rl/{buffers => buffer}/backend/segment_tree.py (100%) rename vel/rl/{buffers => buffer}/circular_replay_buffer.py (100%) rename vel/rl/{buffers => buffer}/prioritized_circular_replay_buffer.py (100%) rename vel/rl/{buffers => buffer}/tests/__init__.py (100%) rename vel/rl/{buffers => buffer}/tests/test_circular_buffer_backend.py (100%) rename vel/rl/{buffers => buffer}/tests/test_circular_vec_env_buffer_backend.py (100%) rename vel/rl/{buffers => buffer}/tests/test_prioritized_circular_buffer_backend.py (100%) rename vel/rl/{buffers => buffer}/tests/test_prioritized_vec_buffer_backend.py (100%) rename vel/rl/{commands => command}/__init__.py (100%) rename vel/rl/{commands => command}/enjoy.py (100%) rename vel/rl/{commands => command}/evaluate_env_command.py (100%) rename vel/rl/{commands => command}/record_movie_command.py (100%) rename vel/rl/{commands => command}/rl_train_command.py (100%) rename vel/rl/{models => model}/__init__.py (100%) rename vel/rl/{models => model}/backbone/__init__.py (100%) rename vel/rl/{models => model}/backbone/double_nature_cnn.py (100%) rename vel/rl/{models => model}/backbone/double_noisy_nature_cnn.py (100%) rename vel/rl/{models => model}/backbone/lstm.py (100%) rename vel/rl/{models => model}/backbone/mlp.py (100%) rename vel/rl/{models => model}/backbone/nature_cnn.py (100%) rename vel/rl/{models => model}/backbone/nature_cnn_rnn.py (100%) rename vel/rl/{models => model}/backbone/nature_cnn_small.py (100%) rename vel/rl/{models => model}/backbone/noisy_nature_cnn.py (100%) rename vel/rl/{models => model}/deterministic_policy_model.py (100%) rename vel/rl/{models => model}/q_distributional_model.py (100%) rename vel/rl/{models => model}/q_dueling_model.py (100%) rename vel/rl/{models => model}/q_model.py (100%) rename vel/rl/{models => model}/q_noisy_model.py (100%) rename vel/rl/{models => model}/q_rainbow_model.py (100%) rename vel/rl/{models => model}/q_stochastic_policy_model.py (100%) rename vel/rl/{models => model}/stochastic_policy_model.py (100%) rename vel/rl/{models => model}/stochastic_policy_model_separate.py (100%) rename vel/rl/{models => model}/stochastic_policy_rnn_model.py (100%) rename vel/rl/{modules => module}/__init__.py (100%) rename vel/rl/{modules => module}/action_head.py (100%) rename vel/rl/{modules => module}/deterministic_action_head.py (100%) rename vel/rl/{modules => module}/deterministic_critic_head.py (100%) rename vel/rl/{modules => module}/noise/__init__.py (100%) rename vel/rl/{modules => module}/noise/eps_greedy.py (100%) rename vel/rl/{modules => module}/noise/ou_noise.py (100%) rename vel/rl/{modules => module}/noisy_linear.py (100%) rename vel/rl/{modules => module}/q_distributional_head.py (100%) rename vel/rl/{modules => module}/q_distributional_noisy_dueling_head.py (100%) rename vel/rl/{modules => module}/q_dueling_head.py (100%) rename vel/rl/{modules => module}/q_head.py (100%) rename vel/rl/{modules => module}/q_noisy_head.py (100%) rename vel/rl/{modules => module}/test/__init__.py (100%) rename vel/rl/{modules => module}/test/test_action_head.py (100%) rename vel/rl/{modules => module}/value_head.py (100%) rename vel/rl/{reinforcers => reinforcer}/__init__.py (100%) rename vel/rl/{reinforcers => reinforcer}/buffered_mixed_policy_iteration_reinforcer.py (100%) rename vel/rl/{reinforcers => reinforcer}/buffered_off_policy_iteration_reinforcer.py (100%) rename vel/rl/{reinforcers => reinforcer}/on_policy_iteration_reinforcer.py (100%) diff --git a/examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml b/examples-configs/autoencoder/mnist/mnist_cnn_autoencoder.yaml similarity index 100% rename from examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml rename to examples-configs/autoencoder/mnist/mnist_cnn_autoencoder.yaml diff --git a/examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml b/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml similarity index 100% rename from examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml rename to examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml diff --git a/vel/data/image_ops.py b/vel/data/image_op.py similarity index 100% rename from vel/data/image_ops.py rename to vel/data/image_op.py diff --git a/vel/math/functions.py b/vel/math/function.py similarity index 100% rename from vel/math/functions.py rename to vel/math/function.py diff --git a/vel/math/processes.py b/vel/math/process.py similarity index 100% rename from vel/math/processes.py rename to vel/math/process.py diff --git a/vel/rl/buffers/__init__.py b/vel/rl/buffer/__init__.py similarity index 100% rename from vel/rl/buffers/__init__.py rename to vel/rl/buffer/__init__.py diff --git a/vel/rl/buffers/backend/__init__.py b/vel/rl/buffer/backend/__init__.py similarity index 100% rename from vel/rl/buffers/backend/__init__.py rename to vel/rl/buffer/backend/__init__.py diff --git a/vel/rl/buffers/backend/circular_buffer_backend.py b/vel/rl/buffer/backend/circular_buffer_backend.py similarity index 100% rename from vel/rl/buffers/backend/circular_buffer_backend.py rename to vel/rl/buffer/backend/circular_buffer_backend.py diff --git a/vel/rl/buffers/backend/circular_vec_buffer_backend.py b/vel/rl/buffer/backend/circular_vec_buffer_backend.py similarity index 100% rename from vel/rl/buffers/backend/circular_vec_buffer_backend.py rename to vel/rl/buffer/backend/circular_vec_buffer_backend.py diff --git a/vel/rl/buffers/backend/prioritized_buffer_backend.py b/vel/rl/buffer/backend/prioritized_buffer_backend.py similarity index 100% rename from vel/rl/buffers/backend/prioritized_buffer_backend.py rename to vel/rl/buffer/backend/prioritized_buffer_backend.py diff --git a/vel/rl/buffers/backend/prioritized_vec_buffer_backend.py b/vel/rl/buffer/backend/prioritized_vec_buffer_backend.py similarity index 100% rename from vel/rl/buffers/backend/prioritized_vec_buffer_backend.py rename to vel/rl/buffer/backend/prioritized_vec_buffer_backend.py diff --git a/vel/rl/buffers/backend/segment_tree.py b/vel/rl/buffer/backend/segment_tree.py similarity index 100% rename from vel/rl/buffers/backend/segment_tree.py rename to vel/rl/buffer/backend/segment_tree.py diff --git a/vel/rl/buffers/circular_replay_buffer.py b/vel/rl/buffer/circular_replay_buffer.py similarity index 100% rename from vel/rl/buffers/circular_replay_buffer.py rename to vel/rl/buffer/circular_replay_buffer.py diff --git a/vel/rl/buffers/prioritized_circular_replay_buffer.py b/vel/rl/buffer/prioritized_circular_replay_buffer.py similarity index 100% rename from vel/rl/buffers/prioritized_circular_replay_buffer.py rename to vel/rl/buffer/prioritized_circular_replay_buffer.py diff --git a/vel/rl/buffers/tests/__init__.py b/vel/rl/buffer/tests/__init__.py similarity index 100% rename from vel/rl/buffers/tests/__init__.py rename to vel/rl/buffer/tests/__init__.py diff --git a/vel/rl/buffers/tests/test_circular_buffer_backend.py b/vel/rl/buffer/tests/test_circular_buffer_backend.py similarity index 100% rename from vel/rl/buffers/tests/test_circular_buffer_backend.py rename to vel/rl/buffer/tests/test_circular_buffer_backend.py diff --git a/vel/rl/buffers/tests/test_circular_vec_env_buffer_backend.py b/vel/rl/buffer/tests/test_circular_vec_env_buffer_backend.py similarity index 100% rename from vel/rl/buffers/tests/test_circular_vec_env_buffer_backend.py rename to vel/rl/buffer/tests/test_circular_vec_env_buffer_backend.py diff --git a/vel/rl/buffers/tests/test_prioritized_circular_buffer_backend.py b/vel/rl/buffer/tests/test_prioritized_circular_buffer_backend.py similarity index 100% rename from vel/rl/buffers/tests/test_prioritized_circular_buffer_backend.py rename to vel/rl/buffer/tests/test_prioritized_circular_buffer_backend.py diff --git a/vel/rl/buffers/tests/test_prioritized_vec_buffer_backend.py b/vel/rl/buffer/tests/test_prioritized_vec_buffer_backend.py similarity index 100% rename from vel/rl/buffers/tests/test_prioritized_vec_buffer_backend.py rename to vel/rl/buffer/tests/test_prioritized_vec_buffer_backend.py diff --git a/vel/rl/commands/__init__.py b/vel/rl/command/__init__.py similarity index 100% rename from vel/rl/commands/__init__.py rename to vel/rl/command/__init__.py diff --git a/vel/rl/commands/enjoy.py b/vel/rl/command/enjoy.py similarity index 100% rename from vel/rl/commands/enjoy.py rename to vel/rl/command/enjoy.py diff --git a/vel/rl/commands/evaluate_env_command.py b/vel/rl/command/evaluate_env_command.py similarity index 100% rename from vel/rl/commands/evaluate_env_command.py rename to vel/rl/command/evaluate_env_command.py diff --git a/vel/rl/commands/record_movie_command.py b/vel/rl/command/record_movie_command.py similarity index 100% rename from vel/rl/commands/record_movie_command.py rename to vel/rl/command/record_movie_command.py diff --git a/vel/rl/commands/rl_train_command.py b/vel/rl/command/rl_train_command.py similarity index 100% rename from vel/rl/commands/rl_train_command.py rename to vel/rl/command/rl_train_command.py diff --git a/vel/rl/models/__init__.py b/vel/rl/model/__init__.py similarity index 100% rename from vel/rl/models/__init__.py rename to vel/rl/model/__init__.py diff --git a/vel/rl/models/backbone/__init__.py b/vel/rl/model/backbone/__init__.py similarity index 100% rename from vel/rl/models/backbone/__init__.py rename to vel/rl/model/backbone/__init__.py diff --git a/vel/rl/models/backbone/double_nature_cnn.py b/vel/rl/model/backbone/double_nature_cnn.py similarity index 100% rename from vel/rl/models/backbone/double_nature_cnn.py rename to vel/rl/model/backbone/double_nature_cnn.py diff --git a/vel/rl/models/backbone/double_noisy_nature_cnn.py b/vel/rl/model/backbone/double_noisy_nature_cnn.py similarity index 100% rename from vel/rl/models/backbone/double_noisy_nature_cnn.py rename to vel/rl/model/backbone/double_noisy_nature_cnn.py diff --git a/vel/rl/models/backbone/lstm.py b/vel/rl/model/backbone/lstm.py similarity index 100% rename from vel/rl/models/backbone/lstm.py rename to vel/rl/model/backbone/lstm.py diff --git a/vel/rl/models/backbone/mlp.py b/vel/rl/model/backbone/mlp.py similarity index 100% rename from vel/rl/models/backbone/mlp.py rename to vel/rl/model/backbone/mlp.py diff --git a/vel/rl/models/backbone/nature_cnn.py b/vel/rl/model/backbone/nature_cnn.py similarity index 100% rename from vel/rl/models/backbone/nature_cnn.py rename to vel/rl/model/backbone/nature_cnn.py diff --git a/vel/rl/models/backbone/nature_cnn_rnn.py b/vel/rl/model/backbone/nature_cnn_rnn.py similarity index 100% rename from vel/rl/models/backbone/nature_cnn_rnn.py rename to vel/rl/model/backbone/nature_cnn_rnn.py diff --git a/vel/rl/models/backbone/nature_cnn_small.py b/vel/rl/model/backbone/nature_cnn_small.py similarity index 100% rename from vel/rl/models/backbone/nature_cnn_small.py rename to vel/rl/model/backbone/nature_cnn_small.py diff --git a/vel/rl/models/backbone/noisy_nature_cnn.py b/vel/rl/model/backbone/noisy_nature_cnn.py similarity index 100% rename from vel/rl/models/backbone/noisy_nature_cnn.py rename to vel/rl/model/backbone/noisy_nature_cnn.py diff --git a/vel/rl/models/deterministic_policy_model.py b/vel/rl/model/deterministic_policy_model.py similarity index 100% rename from vel/rl/models/deterministic_policy_model.py rename to vel/rl/model/deterministic_policy_model.py diff --git a/vel/rl/models/q_distributional_model.py b/vel/rl/model/q_distributional_model.py similarity index 100% rename from vel/rl/models/q_distributional_model.py rename to vel/rl/model/q_distributional_model.py diff --git a/vel/rl/models/q_dueling_model.py b/vel/rl/model/q_dueling_model.py similarity index 100% rename from vel/rl/models/q_dueling_model.py rename to vel/rl/model/q_dueling_model.py diff --git a/vel/rl/models/q_model.py b/vel/rl/model/q_model.py similarity index 100% rename from vel/rl/models/q_model.py rename to vel/rl/model/q_model.py diff --git a/vel/rl/models/q_noisy_model.py b/vel/rl/model/q_noisy_model.py similarity index 100% rename from vel/rl/models/q_noisy_model.py rename to vel/rl/model/q_noisy_model.py diff --git a/vel/rl/models/q_rainbow_model.py b/vel/rl/model/q_rainbow_model.py similarity index 100% rename from vel/rl/models/q_rainbow_model.py rename to vel/rl/model/q_rainbow_model.py diff --git a/vel/rl/models/q_stochastic_policy_model.py b/vel/rl/model/q_stochastic_policy_model.py similarity index 100% rename from vel/rl/models/q_stochastic_policy_model.py rename to vel/rl/model/q_stochastic_policy_model.py diff --git a/vel/rl/models/stochastic_policy_model.py b/vel/rl/model/stochastic_policy_model.py similarity index 100% rename from vel/rl/models/stochastic_policy_model.py rename to vel/rl/model/stochastic_policy_model.py diff --git a/vel/rl/models/stochastic_policy_model_separate.py b/vel/rl/model/stochastic_policy_model_separate.py similarity index 100% rename from vel/rl/models/stochastic_policy_model_separate.py rename to vel/rl/model/stochastic_policy_model_separate.py diff --git a/vel/rl/models/stochastic_policy_rnn_model.py b/vel/rl/model/stochastic_policy_rnn_model.py similarity index 100% rename from vel/rl/models/stochastic_policy_rnn_model.py rename to vel/rl/model/stochastic_policy_rnn_model.py diff --git a/vel/rl/modules/__init__.py b/vel/rl/module/__init__.py similarity index 100% rename from vel/rl/modules/__init__.py rename to vel/rl/module/__init__.py diff --git a/vel/rl/modules/action_head.py b/vel/rl/module/action_head.py similarity index 100% rename from vel/rl/modules/action_head.py rename to vel/rl/module/action_head.py diff --git a/vel/rl/modules/deterministic_action_head.py b/vel/rl/module/deterministic_action_head.py similarity index 100% rename from vel/rl/modules/deterministic_action_head.py rename to vel/rl/module/deterministic_action_head.py diff --git a/vel/rl/modules/deterministic_critic_head.py b/vel/rl/module/deterministic_critic_head.py similarity index 100% rename from vel/rl/modules/deterministic_critic_head.py rename to vel/rl/module/deterministic_critic_head.py diff --git a/vel/rl/modules/noise/__init__.py b/vel/rl/module/noise/__init__.py similarity index 100% rename from vel/rl/modules/noise/__init__.py rename to vel/rl/module/noise/__init__.py diff --git a/vel/rl/modules/noise/eps_greedy.py b/vel/rl/module/noise/eps_greedy.py similarity index 100% rename from vel/rl/modules/noise/eps_greedy.py rename to vel/rl/module/noise/eps_greedy.py diff --git a/vel/rl/modules/noise/ou_noise.py b/vel/rl/module/noise/ou_noise.py similarity index 100% rename from vel/rl/modules/noise/ou_noise.py rename to vel/rl/module/noise/ou_noise.py diff --git a/vel/rl/modules/noisy_linear.py b/vel/rl/module/noisy_linear.py similarity index 100% rename from vel/rl/modules/noisy_linear.py rename to vel/rl/module/noisy_linear.py diff --git a/vel/rl/modules/q_distributional_head.py b/vel/rl/module/q_distributional_head.py similarity index 100% rename from vel/rl/modules/q_distributional_head.py rename to vel/rl/module/q_distributional_head.py diff --git a/vel/rl/modules/q_distributional_noisy_dueling_head.py b/vel/rl/module/q_distributional_noisy_dueling_head.py similarity index 100% rename from vel/rl/modules/q_distributional_noisy_dueling_head.py rename to vel/rl/module/q_distributional_noisy_dueling_head.py diff --git a/vel/rl/modules/q_dueling_head.py b/vel/rl/module/q_dueling_head.py similarity index 100% rename from vel/rl/modules/q_dueling_head.py rename to vel/rl/module/q_dueling_head.py diff --git a/vel/rl/modules/q_head.py b/vel/rl/module/q_head.py similarity index 100% rename from vel/rl/modules/q_head.py rename to vel/rl/module/q_head.py diff --git a/vel/rl/modules/q_noisy_head.py b/vel/rl/module/q_noisy_head.py similarity index 100% rename from vel/rl/modules/q_noisy_head.py rename to vel/rl/module/q_noisy_head.py diff --git a/vel/rl/modules/test/__init__.py b/vel/rl/module/test/__init__.py similarity index 100% rename from vel/rl/modules/test/__init__.py rename to vel/rl/module/test/__init__.py diff --git a/vel/rl/modules/test/test_action_head.py b/vel/rl/module/test/test_action_head.py similarity index 100% rename from vel/rl/modules/test/test_action_head.py rename to vel/rl/module/test/test_action_head.py diff --git a/vel/rl/modules/value_head.py b/vel/rl/module/value_head.py similarity index 100% rename from vel/rl/modules/value_head.py rename to vel/rl/module/value_head.py diff --git a/vel/rl/reinforcers/__init__.py b/vel/rl/reinforcer/__init__.py similarity index 100% rename from vel/rl/reinforcers/__init__.py rename to vel/rl/reinforcer/__init__.py diff --git a/vel/rl/reinforcers/buffered_mixed_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py similarity index 100% rename from vel/rl/reinforcers/buffered_mixed_policy_iteration_reinforcer.py rename to vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py diff --git a/vel/rl/reinforcers/buffered_off_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py similarity index 100% rename from vel/rl/reinforcers/buffered_off_policy_iteration_reinforcer.py rename to vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py diff --git a/vel/rl/reinforcers/on_policy_iteration_reinforcer.py b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py similarity index 100% rename from vel/rl/reinforcers/on_policy_iteration_reinforcer.py rename to vel/rl/reinforcer/on_policy_iteration_reinforcer.py From 3dcd0f23c6527c67180c6e04d4c9809ca8594f05 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 13 Jun 2019 12:22:18 -0700 Subject: [PATCH 041/162] Fixed linter issues. --- .flake8 | 3 +++ Makefile | 7 +++++-- setup.py | 2 +- vel/api/__init__.py | 2 ++ vel/api/info.py | 9 +++++---- vel/api/learner.py | 10 ++++++---- vel/api/model_config.py | 10 +++++----- vel/api/source.py | 2 +- vel/augmentation/normalize.py | 5 ++--- vel/augmentation/random_horizontal_flip.py | 6 +++--- vel/augmentation/random_lighting.py | 7 ++++--- vel/augmentation/tta/train_tta.py | 3 ++- vel/command/phase_train_command.py | 1 - vel/data/__init__.py | 2 +- vel/data/image_op.py | 6 ++++-- vel/math/process.py | 5 ++++- vel/metric/value_metric.py | 1 - vel/model/imagenet/resnet34.py | 10 +++++----- .../multilayer_rnn_sequence_classification.py | 14 +++++++------- vel/model/rnn/multilayer_rnn_sequence_model.py | 11 ++++++----- vel/model/vision/cifar_resnext.py | 14 ++++++++------ vel/module/input/embedding.py | 8 ++++---- vel/module/layers.py | 1 - vel/module/resnet_v2.py | 4 +++- vel/module/resnext.py | 1 - vel/module/rnn_cell.py | 3 --- vel/module/rnn_layer.py | 3 --- vel/notebook/__init__.py | 4 ++-- vel/optimizer/sgd.py | 2 +- vel/phase/cycle.py | 9 +++++++-- vel/rl/algo/dqn.py | 8 ++++---- vel/rl/algo/policy_gradient/ddpg.py | 6 +++--- vel/rl/algo/policy_gradient/ppo.py | 8 ++++---- vel/rl/algo/policy_gradient/trpo.py | 16 +++++++++------- vel/rl/api/env_base.py | 1 - vel/rl/api/evaluator.py | 1 + vel/rl/api/rollout.py | 13 +++++++++---- .../backend/circular_vec_buffer_backend.py | 4 ++-- vel/rl/buffer/{tests => test}/__init__.py | 0 .../test_circular_buffer_backend.py | 0 .../test_circular_vec_env_buffer_backend.py | 0 .../test_prioritized_circular_buffer_backend.py | 0 .../test_prioritized_vec_buffer_backend.py | 4 ++-- vel/rl/command/evaluate_env_command.py | 7 +++++-- vel/rl/command/record_movie_command.py | 4 +++- .../env_roller/transition_replay_env_roller.py | 14 ++++++++------ vel/rl/model/backbone/lstm.py | 2 +- vel/rl/model/backbone/mlp.py | 4 ++-- vel/rl/model/backbone/nature_cnn_rnn.py | 8 ++++---- vel/rl/model/deterministic_policy_model.py | 8 ++++---- vel/rl/model/q_distributional_model.py | 8 ++++---- vel/rl/model/q_dueling_model.py | 8 ++++---- vel/rl/model/q_model.py | 6 +++--- vel/rl/model/q_noisy_model.py | 4 ++-- vel/rl/model/q_stochastic_policy_model.py | 8 ++++---- vel/rl/model/stochastic_policy_model.py | 8 ++++---- vel/rl/model/stochastic_policy_rnn_model.py | 8 ++++---- vel/rl/module/q_head.py | 1 - .../buffered_off_policy_iteration_reinforcer.py | 2 +- vel/schedule/linear.py | 1 - vel/scheduler/linear_batch_scaler.py | 2 -- vel/scheduler/multi_step.py | 3 +-- vel/scheduler/reduce_lr_on_plateau.py | 9 +++++---- vel/source/nlp/text_url.py | 4 +++- vel/source/vision/cifar10.py | 8 ++++---- vel/storage/backend/mongodb.py | 1 - vel/storage/strategy/checkpoint_strategy.py | 2 -- vel/storage/streaming/visdom.py | 2 +- vel/util/{intepolate.py => interpolate.py} | 1 - vel/util/module_util.py | 3 ++- vel/util/situational.py | 5 ++--- vel/util/summary.py | 4 ++-- vel/util/visdom.py | 1 - 73 files changed, 198 insertions(+), 174 deletions(-) create mode 100644 .flake8 rename vel/rl/buffer/{tests => test}/__init__.py (100%) rename vel/rl/buffer/{tests => test}/test_circular_buffer_backend.py (100%) rename vel/rl/buffer/{tests => test}/test_circular_vec_env_buffer_backend.py (100%) rename vel/rl/buffer/{tests => test}/test_prioritized_circular_buffer_backend.py (100%) rename vel/rl/buffer/{tests => test}/test_prioritized_vec_buffer_backend.py (98%) rename vel/util/{intepolate.py => interpolate.py} (99%) diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..25d4293b --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 120 +exclude = vel/openai, test, vel/api/__init__.py, vel/rl/api/__init__.py diff --git a/Makefile b/Makefile index fd79b924..23022e26 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: default test requpgrade +.PHONY: default test requpgrade lint default: test; @@ -40,4 +40,7 @@ requirements.txt: pip-compile requirements.in requpgrade: - pip-compile --upgrade + pip-compile --upgrade + +lint: + flake8 vel diff --git a/setup.py b/setup.py index f134a862..e6290b56 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ 'mongo': ['pymongo', 'dnspython'], 'gym': ['gym[atari,box2d,classic_control]'], 'mujoco': ['gym[mujoco,robotics]'], - 'dev': ['pytest', 'ipython', 'jupyter', 'pip-tools'], + 'dev': ['pytest', 'ipython', 'jupyter', 'pip-tools', 'flake8'], 'text': ['spacy'], 'all': ['visdom', 'pymongo', 'dnspython', 'gym[all]', 'pytest', 'spacy', 'ipython', 'jupyter'] }, diff --git a/vel/api/__init__.py b/vel/api/__init__.py index 47c4284a..06b8490c 100644 --- a/vel/api/__init__.py +++ b/vel/api/__init__.py @@ -1,3 +1,5 @@ +from .augmentation import Augmentation +from .dataflow import DataFlow from .callback import Callback from .info import BatchInfo, EpochInfo, TrainingInfo from .learner import Learner diff --git a/vel/api/info.py b/vel/api/info.py index e4372b4e..b5f978c3 100644 --- a/vel/api/info.py +++ b/vel/api/info.py @@ -5,7 +5,7 @@ import torch -from vel.exceptions import VelException +from vel.exception import VelException class TrainingHistory: @@ -33,7 +33,7 @@ class TrainingInfo(abc.MutableMapping): Data dict is any extra information processes may want to store """ - def __init__(self, start_epoch_idx=0, run_name: typing.Optional[str]=None, metrics=None, callbacks=None): + def __init__(self, start_epoch_idx=0, run_name: typing.Optional[str] = None, metrics=None, callbacks=None): self.data_dict = {} self.start_epoch_idx = start_epoch_idx @@ -162,7 +162,8 @@ class EpochInfo(abc.MutableMapping): """ def __init__(self, training_info: TrainingInfo, global_epoch_idx: int, batches_per_epoch: int, - optimizer: torch.optim.Optimizer=None, local_epoch_idx: int = None, callbacks: list=None): + optimizer: typing.Optional[torch.optim.Optimizer] = None, local_epoch_idx: int = None, + callbacks: typing.Optional[list] = None): self.training_info = training_info self.optimizer = optimizer self.batches_per_epoch = batches_per_epoch @@ -348,4 +349,4 @@ def __contains__(self, item): return item in self.data_dict def __repr__(self): - return f"[BatchInfo epoch:{self.epoch_info.global_epoch_idx} batch:{self.batch_number}/{self.batches_per_epoch}]" + return f"[BatchInfo epoch:{self.epoch_info.global_epoch_idx} batch:{self.batch_number}/{self.batches_per_epoch}]" # noqa diff --git a/vel/api/learner.py b/vel/api/learner.py index 6ebb1949..5dfd4393 100644 --- a/vel/api/learner.py +++ b/vel/api/learner.py @@ -1,15 +1,17 @@ import sys import torch +import torch.nn import tqdm import typing from .model import SupervisedModel from .info import BatchInfo, EpochInfo, TrainingInfo +from .source import Source class Learner: """ Manages training process of a single model """ - def __init__(self, device: torch.device, model: SupervisedModel, max_grad_norm: typing.Optional[float]=None): + def __init__(self, device: torch.device, model: SupervisedModel, max_grad_norm: typing.Optional[float] = None): self.device = device self.model = model.to(device) self.max_grad_norm = max_grad_norm @@ -41,7 +43,7 @@ def initialize_training(self, training_info: TrainingInfo, model_state=None, hid else: self.model.load_state_dict(model_state) - def run_epoch(self, epoch_info: EpochInfo, source: 'vel.api.Source'): + def run_epoch(self, epoch_info: EpochInfo, source: 'Source'): """ Run full epoch of learning """ epoch_info.on_epoch_begin() @@ -56,7 +58,7 @@ def run_epoch(self, epoch_info: EpochInfo, source: 'vel.api.Source'): epoch_info.on_epoch_end() - def train_epoch(self, epoch_info, source: 'vel.api.Source', interactive=True): + def train_epoch(self, epoch_info, source: 'Source', interactive=True): """ Run a single training epoch """ self.train() @@ -74,7 +76,7 @@ def train_epoch(self, epoch_info, source: 'vel.api.Source', interactive=True): iterator.set_postfix(loss=epoch_info.result_accumulator.intermediate_value('loss')) - def validation_epoch(self, epoch_info, source: 'vel.api.Source'): + def validation_epoch(self, epoch_info, source: 'Source'): """ Run a single evaluation epoch """ self.eval() diff --git a/vel/api/model_config.py b/vel/api/model_config.py index e2f98cd2..23488916 100644 --- a/vel/api/model_config.py +++ b/vel/api/model_config.py @@ -2,9 +2,9 @@ import os.path import typing -from vel.exceptions import VelInitializationException -from vel.internals.parser import Parser -from vel.internals.provider import Provider +from vel.exception import VelInitializationException +from vel.internal.parser import Parser +from vel.internal.provider import Provider from .info import TrainingInfo @@ -196,8 +196,8 @@ def banner(self, command_name) -> None: device = self.torch_device() print("=" * 80) - print(f"Pytorch version: {torch.__version__} cuda version {torch.version.cuda} cudnn version {torch.backends.cudnn.version()}") - print("Running model {}, run {} -- command {} -- device {}".format(self._model_name, self.run_number, command_name, self.device)) + print(f"Pytorch version: {torch.__version__} cuda version {torch.version.cuda} cudnn version {torch.backends.cudnn.version()}") # noqa + print("Running model {}, run {} -- command {} -- device {}".format(self._model_name, self.run_number, command_name, self.device)) # noqa if device.type == 'cuda': device_idx = 0 if device.index is None else device.index print(f"CUDA Device name {torch.cuda.get_device_name(device_idx)}") diff --git a/vel/api/source.py b/vel/api/source.py index 9396b487..be1d864e 100644 --- a/vel/api/source.py +++ b/vel/api/source.py @@ -1,6 +1,6 @@ import torch.utils.data as data -from vel.data import DataFlow +from .dataflow import DataFlow class Source: diff --git a/vel/augmentation/normalize.py b/vel/augmentation/normalize.py index 36ca29fd..b0b787a6 100644 --- a/vel/augmentation/normalize.py +++ b/vel/augmentation/normalize.py @@ -1,9 +1,9 @@ import numpy as np -import vel.data as data +import vel.api as api -class Normalize(data.Augmentation): +class Normalize(api.Augmentation): """ Normalize input mean and standard deviation """ def __init__(self, mean, std, mode='x', tags=None): @@ -22,4 +22,3 @@ def denormalize(self, x_data): def create(mean, std, mode='x', tags=None): """ Vel factory function """ return Normalize(mean=mean, std=std, mode=mode, tags=tags) - diff --git a/vel/augmentation/random_horizontal_flip.py b/vel/augmentation/random_horizontal_flip.py index 2d8bdafd..e4d6c142 100644 --- a/vel/augmentation/random_horizontal_flip.py +++ b/vel/augmentation/random_horizontal_flip.py @@ -1,10 +1,10 @@ import random import numpy as np -import vel.data as data +import vel.api as api -class RandomHorizontalFlip(data.Augmentation): +class RandomHorizontalFlip(api.Augmentation): """ Apply a horizontal flip randomly to input images """ def __init__(self, p=0.5, mode='x', tags=None): @@ -28,4 +28,4 @@ def __repr__(self): def create(p=0.5): - return RandomHorizontalFlip(p) \ No newline at end of file + return RandomHorizontalFlip(p) diff --git a/vel/augmentation/random_lighting.py b/vel/augmentation/random_lighting.py index 82fc9cb1..d85c450c 100644 --- a/vel/augmentation/random_lighting.py +++ b/vel/augmentation/random_lighting.py @@ -1,9 +1,10 @@ import random +import vel.api as api import vel.data as data -class RandomLighting(data.Augmentation): +class RandomLighting(api.Augmentation): """ Apply a horizontal flip randomly to input images """ def __init__(self, b, c, mode='x', tags=None): @@ -14,11 +15,11 @@ def __call__(self, img): """ Adjust lighting """ rand_b = random.uniform(-self.b, self.b) rand_c = random.uniform(-self.c, self.c) - rand_c = -1/(rand_c-1) if rand_c<0 else rand_c+1 + rand_c = -1/(rand_c-1) if rand_c < 0 else rand_c+1 return data.lighting(img, rand_b, rand_c) def __repr__(self): - return self.__class__.__name__ + '(p={})'.format(self.p) + return self.__class__.__name__ + '(b={}, c={})'.format(self.b, self.c) def create(b, c, mode='x', tags=None): diff --git a/vel/augmentation/tta/train_tta.py b/vel/augmentation/tta/train_tta.py index 212937da..78621428 100644 --- a/vel/augmentation/tta/train_tta.py +++ b/vel/augmentation/tta/train_tta.py @@ -70,7 +70,8 @@ # if self.index == (1 + self.n_augmentations): # new_output = torch.mean(torch.stack(self.accumulated_output, dim=-1), dim=-1) # new_context = { -# k: torch.mean(torch.stack([c[k] for c in self.accumulated_context], dim=-1), dim=-1) for k in context.keys() +# k: torch.mean(torch.stack([c[k] for c in self.accumulated_context], dim=-1), dim=-1) +# for k in context.keys() # } # # self.metric_accumulator.calculate(self.data, self.target, new_output, new_context) diff --git a/vel/command/phase_train_command.py b/vel/command/phase_train_command.py index fc541286..e80768b2 100644 --- a/vel/command/phase_train_command.py +++ b/vel/command/phase_train_command.py @@ -1,4 +1,3 @@ -import torch import numpy as np import bisect import typing diff --git a/vel/data/__init__.py b/vel/data/__init__.py index 806cbe66..dd02c4a5 100644 --- a/vel/data/__init__.py +++ b/vel/data/__init__.py @@ -1 +1 @@ -from .image_ops import * \ No newline at end of file +from .image_op import * # noqa diff --git a/vel/data/image_op.py b/vel/data/image_op.py index 6bf45d78..9b6833f2 100644 --- a/vel/data/image_op.py +++ b/vel/data/image_op.py @@ -17,7 +17,8 @@ def center_crop(im, min_sz=None): """ Returns a center crop of an image""" # return F.center_crop(im, min_sz) r, c, *_ = im.shape - if min_sz is None: min_sz = min(r, c) + if min_sz is None: + min_sz = min(r, c) start_r = math.ceil((r - min_sz) / 2) start_c = math.ceil((c - min_sz) / 2) return crop_square(im, start_r, start_c, min_sz) @@ -78,6 +79,7 @@ def mode_to_cv2(mode='constant'): def lighting(im, b, c): """ Adjusts image's balance and contrast. """ - if b == 0 and c == 1: return im + if b == 0 and c == 1: + return im mu = np.average(im) return np.clip((im - mu) * c + mu + b, 0., 1.).astype(np.float32) diff --git a/vel/math/process.py b/vel/math/process.py index cd176b83..923cad44 100644 --- a/vel/math/process.py +++ b/vel/math/process.py @@ -15,7 +15,10 @@ def __init__(self, mu, sigma, theta=.15, dt=1e-2, x0=None): self.reset() def __call__(self): - x = self.x_prev + self.theta * (self.mu - self.x_prev) * self.dt + self.sigma * np.sqrt(self.dt) * np.random.normal(size=self.mu.shape) + x = ( + self.x_prev + self.theta * (self.mu - self.x_prev) * self.dt + + self.sigma * np.sqrt(self.dt) * np.random.normal(size=self.mu.shape) + ) self.x_prev = x return x diff --git a/vel/metric/value_metric.py b/vel/metric/value_metric.py index ac17adbf..309e9e57 100644 --- a/vel/metric/value_metric.py +++ b/vel/metric/value_metric.py @@ -23,4 +23,3 @@ def value(self): def _value_function(self, batch_info): raise NotImplementedError - diff --git a/vel/model/imagenet/resnet34.py b/vel/model/imagenet/resnet34.py index fc819a2a..a4a78f86 100644 --- a/vel/model/imagenet/resnet34.py +++ b/vel/model/imagenet/resnet34.py @@ -2,7 +2,7 @@ import torch.nn as nn import torch.nn.functional as F -import vel.modules.layers as l +import vel.module.layers as layers import vel.util.module_util as mu from vel.api import LossFunctionModel, ModelFactory @@ -35,8 +35,8 @@ def __init__(self, fc_layers=None, dropout=None, pretrained=True): valid_children = list(backbone.children())[:-2] valid_children.extend([ - l.AdaptiveConcatPool2d(), - l.Flatten() + layers.AdaptiveConcatPool2d(), + layers.Flatten() ]) layer_inputs = [NET_OUTPUT] + fc_layers[:-1] @@ -94,8 +94,8 @@ def loss_value(self, x_data, y_true, y_pred): def metrics(self): """ Set of metrics for this model """ - from vel.metrics.loss_metric import Loss - from vel.metrics.accuracy import Accuracy + from vel.metric.loss_metric import Loss + from vel.metric.accuracy import Accuracy return [Loss(), Accuracy()] diff --git a/vel/model/rnn/multilayer_rnn_sequence_classification.py b/vel/model/rnn/multilayer_rnn_sequence_classification.py index 0d3e7439..db5da9f0 100644 --- a/vel/model/rnn/multilayer_rnn_sequence_classification.py +++ b/vel/model/rnn/multilayer_rnn_sequence_classification.py @@ -5,17 +5,17 @@ import torch.nn as nn from vel.api import LossFunctionModel, ModelFactory, LinearBackboneModel -from vel.metrics.accuracy import Accuracy -from vel.metrics.loss_metric import Loss -from vel.modules.rnn_layer import RnnLayer +from vel.metric.accuracy import Accuracy +from vel.metric.loss_metric import Loss +from vel.module.rnn_layer import RnnLayer class MultilayerRnnSequenceClassification(LossFunctionModel): """ Multilayer GRU network for sequence modeling (n:1) """ def __init__(self, input_block: LinearBackboneModel, rnn_type: str, output_dim: int, - rnn_layers: typing.List[int], rnn_dropout: float=0.0, bidirectional: bool=False, - linear_layers: typing.List[int]=None, linear_dropout: float=0.0): + rnn_layers: typing.List[int], rnn_dropout: float = 0.0, bidirectional: bool = False, + linear_layers: typing.List[int] = None, linear_dropout: float = 0.0): super().__init__() self.output_dim = output_dim @@ -144,8 +144,8 @@ def metrics(self) -> list: def create(input_block: ModelFactory, rnn_type: str, output_dim: int, - rnn_layers: typing.List[int], rnn_dropout: float=0.0, bidirectional: bool=False, - linear_layers: typing.List[int]=None, linear_dropout: float=0.0): + rnn_layers: typing.List[int], rnn_dropout: float = 0.0, bidirectional: bool = False, + linear_layers: typing.List[int] = None, linear_dropout: float = 0.0): """ Vel factory function """ if linear_layers is None: linear_layers = [] diff --git a/vel/model/rnn/multilayer_rnn_sequence_model.py b/vel/model/rnn/multilayer_rnn_sequence_model.py index 9352ef10..2e90c2d3 100644 --- a/vel/model/rnn/multilayer_rnn_sequence_model.py +++ b/vel/model/rnn/multilayer_rnn_sequence_model.py @@ -4,15 +4,15 @@ import torch.nn.functional as F import torch.nn as nn -from vel.api import RnnSupervisedModel, ModelFactory, LinearBackboneModel -from vel.modules.rnn_layer import RnnLayer +from vel.api import LossFunctionModel, ModelFactory, LinearBackboneModel +from vel.module.rnn_layer import RnnLayer -class MultilayerRnnSequenceModel(RnnSupervisedModel): +class MultilayerRnnSequenceModel(LossFunctionModel): """ Multilayer GRU network for sequence modeling (n:n) """ def __init__(self, input_block: LinearBackboneModel, rnn_type: str, hidden_layers: typing.List[int], - output_dim: int, dropout: float=0.0): + output_dim: int, dropout: float = 0.0): super().__init__() self.output_dim = output_dim @@ -115,7 +115,8 @@ def create(input_block: ModelFactory, rnn_type: str, hidden_layers: typing.List[ """ Vel factory function """ def instantiate(**_): return MultilayerRnnSequenceModel( - input_block.instantiate(), rnn_type=rnn_type, hidden_layers=hidden_layers, output_dim=output_dim, dropout=dropout + input_block.instantiate(), rnn_type=rnn_type, hidden_layers=hidden_layers, output_dim=output_dim, + dropout=dropout ) return ModelFactory.generic(instantiate) diff --git a/vel/model/vision/cifar_resnext.py b/vel/model/vision/cifar_resnext.py index ffa0fc76..edb6d8a2 100644 --- a/vel/model/vision/cifar_resnext.py +++ b/vel/model/vision/cifar_resnext.py @@ -7,7 +7,7 @@ import torch.nn.functional as F from vel.api import LossFunctionModel, ModelFactory -from vel.modules.resnext import ResNeXtBottleneck +from vel.module.resnext import ResNeXtBottleneck class ResNeXt(LossFunctionModel): @@ -44,8 +44,7 @@ def __init__(self, block, layers, inplanes, image_features, cardinality=4, divis nn.init.constant_(m.bias, 0.0) def _make_layer(self, block, in_channels, out_channels, blocks, stride=1): - layers = [] - layers.append(block(in_channels, out_channels, self.cardinality, self.divisor, stride=stride)) + layers = [block(in_channels, out_channels, self.cardinality, self.divisor, stride=stride)] for i in range(1, blocks): layers.append(block(out_channels, out_channels, self.cardinality, self.divisor, stride=1)) @@ -74,8 +73,8 @@ def loss_value(self, x_data, y_true, y_pred): def metrics(self): """ Set of metrics for this model """ - from vel.metrics.loss_metric import Loss - from vel.metrics.accuracy import Accuracy + from vel.metric.loss_metric import Loss + from vel.metric.accuracy import Accuracy return [Loss(), Accuracy()] @@ -87,6 +86,9 @@ def create(blocks, mode='basic', inplanes=64, cardinality=4, image_features=64, } def instantiate(**_): - return ResNeXt(block_dict[mode], blocks, inplanes=inplanes, image_features=image_features, cardinality=cardinality, divisor=divisor, num_classes=num_classes) + return ResNeXt( + block_dict[mode], blocks, inplanes=inplanes, image_features=image_features, + cardinality=cardinality, divisor=divisor, num_classes=num_classes + ) return ModelFactory.generic(instantiate) diff --git a/vel/module/input/embedding.py b/vel/module/input/embedding.py index ab4aee1c..37d3387b 100644 --- a/vel/module/input/embedding.py +++ b/vel/module/input/embedding.py @@ -6,8 +6,8 @@ class EmbeddingInput(LinearBackboneModel): """ Learnable Embedding input layer """ - def __init__(self, alphabet_size: int, output_dim: int, pretrained: bool=False, frozen: bool=False, - source: SupervisedTextData=None): + def __init__(self, alphabet_size: int, output_dim: int, pretrained: bool = False, frozen: bool = False, + source: SupervisedTextData = None): super().__init__() self._output_dim = output_dim @@ -34,7 +34,8 @@ def forward(self, input_data): return self.layer(input_data) -def create(alphabet_size: int, output_dim: int, pretrained: bool=False, frozen: bool=False, source: SupervisedTextData=None): +def create(alphabet_size: int, output_dim: int, pretrained: bool = False, frozen: bool = False, + source: SupervisedTextData = None): """ Vel factory function """ def instantiate(**_): return EmbeddingInput(alphabet_size, output_dim, pretrained=pretrained, frozen=frozen, source=source) @@ -44,4 +45,3 @@ def instantiate(**_): # Scripting interface EmbeddingInputFactory = create - diff --git a/vel/module/layers.py b/vel/module/layers.py index 9a95e31d..08fadb0a 100644 --- a/vel/module/layers.py +++ b/vel/module/layers.py @@ -58,4 +58,3 @@ def __init__(self, num_classes): def forward(self, x): return one_hot_encoding(x, self.num_classes) - diff --git a/vel/module/resnet_v2.py b/vel/module/resnet_v2.py index 31445f35..8a7e2518 100644 --- a/vel/module/resnet_v2.py +++ b/vel/module/resnet_v2.py @@ -81,7 +81,9 @@ def __init__(self, in_channels, out_channels, stride=1, divisor=4): self.conv1 = nn.Conv2d(in_channels, self.bottleneck_channels, kernel_size=1, bias=False) self.bn2 = nn.BatchNorm2d(self.bottleneck_channels) - self.conv2 = nn.Conv2d(self.bottleneck_channels, self.bottleneck_channels, kernel_size=3, stride=stride, padding=1, bias=False) + self.conv2 = nn.Conv2d( + self.bottleneck_channels, self.bottleneck_channels, kernel_size=3, stride=stride, padding=1, bias=False + ) self.bn3 = nn.BatchNorm2d(self.bottleneck_channels) self.conv3 = nn.Conv2d(self.bottleneck_channels, out_channels, kernel_size=1, bias=False) diff --git a/vel/module/resnext.py b/vel/module/resnext.py index 3ae26e49..7e943402 100644 --- a/vel/module/resnext.py +++ b/vel/module/resnext.py @@ -40,7 +40,6 @@ def __init__(self, in_channels, out_channels, cardinality, divisor, stride=1): self.conv_expand = nn.Conv2d(D * C, out_channels, kernel_size=1, stride=1, padding=0, bias=False) self.bn_expand = nn.BatchNorm2d(out_channels) - def forward(self, x): bottleneck = self.conv_reduce(x) bottleneck = F.relu(self.bn_reduce(bottleneck), inplace=True) diff --git a/vel/module/rnn_cell.py b/vel/module/rnn_cell.py index 08d9582e..5b62a046 100644 --- a/vel/module/rnn_cell.py +++ b/vel/module/rnn_cell.py @@ -55,6 +55,3 @@ def forward(self, input_data, state): else: new_hidden_state = self.rnn_cell(input_data, state) return new_hidden_state, new_hidden_state - - - diff --git a/vel/module/rnn_layer.py b/vel/module/rnn_layer.py index 7533dc35..54d61904 100644 --- a/vel/module/rnn_layer.py +++ b/vel/module/rnn_layer.py @@ -73,6 +73,3 @@ def forward(self, input_data, state=None): return output, new_state else: return self.rnn_cell(input_data, state) - - - diff --git a/vel/notebook/__init__.py b/vel/notebook/__init__.py index 68058abb..a8655a0b 100644 --- a/vel/notebook/__init__.py +++ b/vel/notebook/__init__.py @@ -1,2 +1,2 @@ -from .loader import load_config, script -from .defaults import reasonable_notbook_defaults +from .loader import load_config, script # noqa +from .defaults import reasonable_notbook_defaults # noqa diff --git a/vel/optimizer/sgd.py b/vel/optimizer/sgd.py index 93272f0b..128ccd1e 100644 --- a/vel/optimizer/sgd.py +++ b/vel/optimizer/sgd.py @@ -8,7 +8,7 @@ class SgdFactory(OptimizerFactory): """ SGD optimizer factory """ - def __init__(self, lr, momentum=0, dampening=0, weight_decay=0, nesterov=False, layer_groups: bool=False): + def __init__(self, lr, momentum=0, dampening=0, weight_decay=0, nesterov=False, layer_groups: bool = False): self.lr = lr self.momentum = momentum self.dampening = dampening diff --git a/vel/phase/cycle.py b/vel/phase/cycle.py index 1c89915c..b1862323 100644 --- a/vel/phase/cycle.py +++ b/vel/phase/cycle.py @@ -57,7 +57,9 @@ def on_batch_begin(self, batch_info: BatchInfo): cycle_length = self.cycle_lengths[batch_info.local_epoch_number - 1] cycle_start = self.cycle_starts[batch_info.local_epoch_number - 1] - numerator = (batch_info.local_epoch_number - cycle_start - 1) * batch_info.batches_per_epoch + batch_info.batch_number + numerator = ( + (batch_info.local_epoch_number - cycle_start - 1) * batch_info.batches_per_epoch + batch_info.batch_number + ) denominator = cycle_length * batch_info.batches_per_epoch interpolation_number = numerator / denominator @@ -66,7 +68,10 @@ def on_batch_begin(self, batch_info: BatchInfo): lr = self.init_lr else: if isinstance(self.max_lr, list): - lr = [interp.interpolate_single(max_lr, min_lr, interpolation_number, how=self.interpolate) for max_lr, min_lr in zip(self.max_lr, self.min_lr)] + lr = [ + interp.interpolate_single(max_lr, min_lr, interpolation_number, how=self.interpolate) + for max_lr, min_lr in zip(self.max_lr, self.min_lr) + ] else: lr = interp.interpolate_single(self.max_lr, self.min_lr, interpolation_number, how=self.interpolate) diff --git a/vel/rl/algo/dqn.py b/vel/rl/algo/dqn.py index f3de2eaa..1437a062 100644 --- a/vel/rl/algo/dqn.py +++ b/vel/rl/algo/dqn.py @@ -3,15 +3,15 @@ import torch.nn.utils from vel.api import ModelFactory -from vel.api.metrics.averaging_metric import AveragingNamedMetric +from vel.metric.averaging_metric import AveragingNamedMetric from vel.rl.api import OptimizerAlgoBase class DeepQLearning(OptimizerAlgoBase): """ Deep Q-Learning algorithm """ - def __init__(self, model_factory: ModelFactory, discount_factor: float, double_dqn: bool, target_update_frequency: int, - max_grad_norm: float): + def __init__(self, model_factory: ModelFactory, discount_factor: float, double_dqn: bool, + target_update_frequency: int, max_grad_norm: float): super().__init__(max_grad_norm) self.model_factory = model_factory @@ -91,7 +91,7 @@ def metrics(self) -> list: def create(model: ModelFactory, discount_factor: float, target_update_frequency: int, - max_grad_norm: float, double_dqn: bool=False): + max_grad_norm: float, double_dqn: bool = False): """ Vel factory function """ return DeepQLearning( model_factory=model, diff --git a/vel/rl/algo/policy_gradient/ddpg.py b/vel/rl/algo/policy_gradient/ddpg.py index 72560f80..2150cab4 100644 --- a/vel/rl/algo/policy_gradient/ddpg.py +++ b/vel/rl/algo/policy_gradient/ddpg.py @@ -4,13 +4,13 @@ import torch.nn.functional as F from vel.rl.api import OptimizerAlgoBase -from vel.api.metrics.averaging_metric import AveragingNamedMetric +from vel.metric.averaging_metric import AveragingNamedMetric class DeepDeterministicPolicyGradient(OptimizerAlgoBase): """ Deep Deterministic Policy Gradient (DDPG) - policy gradient calculations """ - def __init__(self, model_factory, discount_factor: float, tau: float, max_grad_norm: typing.Optional[float]=None): + def __init__(self, model_factory, discount_factor: float, tau: float, max_grad_norm: typing.Optional[float] = None): super().__init__(max_grad_norm) self.model_factory = model_factory @@ -84,7 +84,7 @@ def metrics(self) -> list: ] -def create(model, discount_factor: float, tau: float, max_grad_norm: float=None): +def create(model, discount_factor: float, tau: float, max_grad_norm: float = None): """ Vel factory function """ return DeepDeterministicPolicyGradient( tau=tau, diff --git a/vel/rl/algo/policy_gradient/ppo.py b/vel/rl/algo/policy_gradient/ppo.py index 5f0b3538..d835cf6a 100644 --- a/vel/rl/algo/policy_gradient/ppo.py +++ b/vel/rl/algo/policy_gradient/ppo.py @@ -2,17 +2,17 @@ import numbers -from vel.api.metrics.averaging_metric import AveragingNamedMetric -from vel.math.functions import explained_variance +from vel.math.function import explained_variance +from vel.metric.averaging_metric import AveragingNamedMetric from vel.rl.api import OptimizerAlgoBase, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae -from vel.schedules.constant import ConstantSchedule +from vel.schedule.constant import ConstantSchedule class PpoPolicyGradient(OptimizerAlgoBase): """ Proximal Policy Optimization - https://arxiv.org/abs/1707.06347 """ def __init__(self, entropy_coefficient, value_coefficient, cliprange, max_grad_norm, discount_factor: float, - normalize_advantage: bool=True, gae_lambda: float=1.0): + normalize_advantage: bool = True, gae_lambda: float = 1.0): super().__init__(max_grad_norm) self.entropy_coefficient = entropy_coefficient diff --git a/vel/rl/algo/policy_gradient/trpo.py b/vel/rl/algo/policy_gradient/trpo.py index b73f3a67..d97d1bf3 100644 --- a/vel/rl/algo/policy_gradient/trpo.py +++ b/vel/rl/algo/policy_gradient/trpo.py @@ -4,8 +4,8 @@ import torch.nn.functional as F import torch.nn.utils -from vel.api.metrics.averaging_metric import AveragingNamedMetric -from vel.math.functions import explained_variance +from vel.metric.averaging_metric import AveragingNamedMetric +from vel.math.function import explained_variance from vel.rl.api import AlgoBase, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae @@ -30,11 +30,11 @@ def conjugate_gradient_method(matrix_vector_operator, loss_gradient, nsteps, rdo rdotr = torch.dot(r, r) for i in range(nsteps): - Avp = matrix_vector_operator(p) - alpha = rdotr / torch.dot(p, Avp) + avp = matrix_vector_operator(p) + alpha = rdotr / torch.dot(p, avp) x += alpha * p - r -= alpha * Avp + r -= alpha * avp new_rdotr = torch.dot(r, r) betta = new_rdotr / rdotr @@ -122,8 +122,10 @@ def optimizer_step(self, batch_info, device, model, rollout): expected_improvement = (-policy_grad) @ full_step original_parameter_vec = p2v(model.policy_parameters()).detach_() - policy_optimization_success, ratio, policy_loss_improvement, new_policy_loss, kl_divergence_step = self.line_search( - model, rollout, policy_loss, policy_params, original_parameter_vec, full_step, expected_improvement + (policy_optimization_success, ratio, policy_loss_improvement, new_policy_loss, kl_divergence_step) = ( + self.line_search( + model, rollout, policy_loss, policy_params, original_parameter_vec, full_step, expected_improvement + ) ) gradient_norms = [] diff --git a/vel/rl/api/env_base.py b/vel/rl/api/env_base.py index 07e0be1a..eb0c6fba 100644 --- a/vel/rl/api/env_base.py +++ b/vel/rl/api/env_base.py @@ -25,4 +25,3 @@ def instantiate(self, parallel_envs, seed=0, preset='default') -> VecEnv: def instantiate_single(self, seed=0, preset='default') -> VecEnv: """ Create a new VecEnv instance - single """ raise NotImplementedError - diff --git a/vel/rl/api/evaluator.py b/vel/rl/api/evaluator.py index dd5cb9ec..c8a98307 100644 --- a/vel/rl/api/evaluator.py +++ b/vel/rl/api/evaluator.py @@ -108,6 +108,7 @@ def is_provided(self, name): return True elif name.startswith('rollout:'): rollout_name = name[8:] + return self.is_provided(rollout_name) else: return False diff --git a/vel/rl/api/rollout.py b/vel/rl/api/rollout.py index 01720b97..4acf8cbb 100644 --- a/vel/rl/api/rollout.py +++ b/vel/rl/api/rollout.py @@ -100,7 +100,8 @@ class Trajectories(Rollout): transition_tensors - tensors that have a row (multidimensional) per each transition. E.g. state, reward, done rollout_tensors - tensors that have a row (multidimensional) per whole rollout. E.g. final_value, initial rnn state """ - def __init__(self, num_steps, num_envs, environment_information, transition_tensors, rollout_tensors, extra_data=None): + def __init__(self, num_steps, num_envs, environment_information, transition_tensors, rollout_tensors, + extra_data=None): self.num_steps = num_steps self.num_envs = num_envs self.environment_information = environment_information @@ -111,11 +112,15 @@ def __init__(self, num_steps, num_envs, environment_information, transition_tens def to_transitions(self) -> 'Transitions': """ Convert given rollout to Transitions """ # No need to propagate 'rollout_tensors' as they won't mean anything + + if self.environment_information is not None: + env_info = [ei for l in self.environment_information for ei in l] + else: + env_info = None + return Transitions( size=self.num_steps * self.num_envs, - environment_information= - [ei for l in self.environment_information for ei in l] - if self.environment_information is not None else None, + environment_information=env_info, transition_tensors={ name: tensor_util.merge_first_two_dims(t) for name, t in self.transition_tensors.items() }, diff --git a/vel/rl/buffer/backend/circular_vec_buffer_backend.py b/vel/rl/buffer/backend/circular_vec_buffer_backend.py index 074700bc..3b30fb6f 100644 --- a/vel/rl/buffer/backend/circular_vec_buffer_backend.py +++ b/vel/rl/buffer/backend/circular_vec_buffer_backend.py @@ -1,7 +1,7 @@ import gym import numpy as np -from vel.exceptions import VelException +from vel.exception import VelException def take_along_axis(large_array, indexes): @@ -22,7 +22,7 @@ class CircularVecEnvBufferBackend: """ def __init__(self, buffer_capacity: int, num_envs: int, observation_space: gym.Space, action_space: gym.Space, - frame_stack_compensation: bool=False, frame_history: int=1): + frame_stack_compensation: bool = False, frame_history: int = 1): # Maximum number of items in the buffer self.buffer_capacity = buffer_capacity diff --git a/vel/rl/buffer/tests/__init__.py b/vel/rl/buffer/test/__init__.py similarity index 100% rename from vel/rl/buffer/tests/__init__.py rename to vel/rl/buffer/test/__init__.py diff --git a/vel/rl/buffer/tests/test_circular_buffer_backend.py b/vel/rl/buffer/test/test_circular_buffer_backend.py similarity index 100% rename from vel/rl/buffer/tests/test_circular_buffer_backend.py rename to vel/rl/buffer/test/test_circular_buffer_backend.py diff --git a/vel/rl/buffer/tests/test_circular_vec_env_buffer_backend.py b/vel/rl/buffer/test/test_circular_vec_env_buffer_backend.py similarity index 100% rename from vel/rl/buffer/tests/test_circular_vec_env_buffer_backend.py rename to vel/rl/buffer/test/test_circular_vec_env_buffer_backend.py diff --git a/vel/rl/buffer/tests/test_prioritized_circular_buffer_backend.py b/vel/rl/buffer/test/test_prioritized_circular_buffer_backend.py similarity index 100% rename from vel/rl/buffer/tests/test_prioritized_circular_buffer_backend.py rename to vel/rl/buffer/test/test_prioritized_circular_buffer_backend.py diff --git a/vel/rl/buffer/tests/test_prioritized_vec_buffer_backend.py b/vel/rl/buffer/test/test_prioritized_vec_buffer_backend.py similarity index 98% rename from vel/rl/buffer/tests/test_prioritized_vec_buffer_backend.py rename to vel/rl/buffer/test/test_prioritized_vec_buffer_backend.py index cd90ebe6..5fb21f3b 100644 --- a/vel/rl/buffer/tests/test_prioritized_vec_buffer_backend.py +++ b/vel/rl/buffer/test/test_prioritized_vec_buffer_backend.py @@ -5,8 +5,8 @@ import numpy.testing as nt import pytest -from vel.exceptions import VelException -from vel.rl.buffers.backend.prioritized_vec_buffer_backend import PrioritizedCircularVecEnvBufferBackend +from vel.exception import VelException +from vel.rl.buffer.backend.prioritized_vec_buffer_backend import PrioritizedCircularVecEnvBufferBackend def get_halfempty_buffer_with_dones(frame_history=1): diff --git a/vel/rl/command/evaluate_env_command.py b/vel/rl/command/evaluate_env_command.py index e9d7c2e4..33f7f4dc 100644 --- a/vel/rl/command/evaluate_env_command.py +++ b/vel/rl/command/evaluate_env_command.py @@ -11,7 +11,8 @@ class EvaluateEnvCommand: """ Record environment playthrough as a game """ def __init__(self, model_config: ModelConfig, env_factory: VecEnvFactory, model_factory: ModelFactory, - storage: Storage, parallel_envs: int, action_noise: typing.Optional[ModelFactory], takes: int, sample_args: dict = None): + storage: Storage, parallel_envs: int, action_noise: typing.Optional[ModelFactory], takes: int, + sample_args: dict = None): self.model_config = model_config self.model_factory = model_factory self.env_factory = env_factory @@ -26,7 +27,9 @@ def __init__(self, model_config: ModelConfig, env_factory: VecEnvFactory, model_ def run(self): device = self.model_config.torch_device() - env = self.env_factory.instantiate(parallel_envs=self.parallel_envs, preset='record', seed=self.model_config.seed) + env = self.env_factory.instantiate( + parallel_envs=self.parallel_envs, preset='record', seed=self.model_config.seed + ) model = self.model_factory.instantiate(action_space=env.action_space).to(device) if self.action_noise_factory is not None: diff --git a/vel/rl/command/record_movie_command.py b/vel/rl/command/record_movie_command.py index 78e60d5f..6b6f3c4c 100644 --- a/vel/rl/command/record_movie_command.py +++ b/vel/rl/command/record_movie_command.py @@ -78,7 +78,9 @@ def record_take(self, model, env_instance, device, take_number): # End of an episode break - takename = self.model_config.output_dir('videos', self.model_config.run_name, self.videoname.format(take_number)) + takename = self.model_config.output_dir( + 'videos', self.model_config.run_name, self.videoname.format(take_number) + ) pathlib.Path(os.path.dirname(takename)).mkdir(parents=True, exist_ok=True) fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') diff --git a/vel/rl/env_roller/transition_replay_env_roller.py b/vel/rl/env_roller/transition_replay_env_roller.py index d64628ae..d0ed933b 100644 --- a/vel/rl/env_roller/transition_replay_env_roller.py +++ b/vel/rl/env_roller/transition_replay_env_roller.py @@ -18,8 +18,9 @@ class TransitionReplayEnvRoller(ReplayEnvRollerBase): Samples transitions from the replay buffer (individual frame transitions) """ - def __init__(self, environment, device, replay_buffer: ReplayBuffer, discount_factor: typing.Optional[float]=None, - normalize_returns: bool=False, forward_steps: int=1, action_noise: typing.Optional[nn.Module]=None): + def __init__(self, environment, device, replay_buffer: ReplayBuffer, discount_factor: typing.Optional[float] = None, + normalize_returns: bool = False, forward_steps: int = 1, + action_noise: typing.Optional[nn.Module] = None): self._environment = environment self.device = device self.replay_buffer = replay_buffer @@ -156,8 +157,9 @@ def update(self, rollout, batch_info): class TransitionReplayEnvRollerFactory(ReplayEnvRollerFactoryBase): """ Factory for the ReplayEnvRoller """ - def __init__(self, replay_buffer_factory: ReplayBufferFactory, discount_factor: typing.Optional[float]=None, - normalize_returns: bool=False, forward_steps: int=1, action_noise: typing.Optional[ModelFactory]=None): + def __init__(self, replay_buffer_factory: ReplayBufferFactory, discount_factor: typing.Optional[float] = None, + normalize_returns: bool = False, forward_steps: int = 1, + action_noise: typing.Optional[ModelFactory] = None): self.replay_buffer_factory = replay_buffer_factory self.normalize_returns = normalize_returns self.forward_steps = forward_steps @@ -183,8 +185,8 @@ def instantiate(self, environment, device): ) -def create(replay_buffer, discount_factor: typing.Optional[float]=None, normalize_returns: bool=False, - forward_steps: int=1, action_noise: typing.Optional[ModelFactory]=None): +def create(replay_buffer, discount_factor: typing.Optional[float] = None, normalize_returns: bool = False, + forward_steps: int = 1, action_noise: typing.Optional[ModelFactory] = None): """ Vel factory function """ return TransitionReplayEnvRollerFactory( replay_buffer_factory=replay_buffer, diff --git a/vel/rl/model/backbone/lstm.py b/vel/rl/model/backbone/lstm.py index 874f7ca6..50356d07 100644 --- a/vel/rl/model/backbone/lstm.py +++ b/vel/rl/model/backbone/lstm.py @@ -1,4 +1,4 @@ -from vel.api import LinearBackboneModel, ModelFactory +from vel.api import LinearBackboneModel class LstmBackbone(LinearBackboneModel): diff --git a/vel/rl/model/backbone/mlp.py b/vel/rl/model/backbone/mlp.py index cc87e9e9..f4e03ae3 100644 --- a/vel/rl/model/backbone/mlp.py +++ b/vel/rl/model/backbone/mlp.py @@ -17,8 +17,8 @@ class MLP(LinearBackboneModel): """ Simple Multi-Layer-Perceptron network """ - def __init__(self, input_length: int, hidden_layers: typing.List[int], activation: str='tanh', - normalization: typing.Optional[str]=None): + def __init__(self, input_length: int, hidden_layers: typing.List[int], activation: str = 'tanh', + normalization: typing.Optional[str] = None): super().__init__() self.input_length = input_length diff --git a/vel/rl/model/backbone/nature_cnn_rnn.py b/vel/rl/model/backbone/nature_cnn_rnn.py index 832926e4..9662a444 100644 --- a/vel/rl/model/backbone/nature_cnn_rnn.py +++ b/vel/rl/model/backbone/nature_cnn_rnn.py @@ -1,6 +1,6 @@ from vel.api import LinearBackboneModel, ModelFactory -from vel.rl.models.backbone.nature_cnn import NatureCnn -from vel.modules.rnn_cell import RnnCell +from vel.rl.model.backbone.nature_cnn import NatureCnn +from vel.module.rnn_cell import RnnCell class NatureCnnRnnBackbone(LinearBackboneModel): @@ -8,8 +8,8 @@ class NatureCnnRnnBackbone(LinearBackboneModel): Long-Short-Term Memory rnn cell together with DeepMind-style 'Nature' cnn preprocessing """ - def __init__(self, input_width: int, input_height: int, input_channels: int, rnn_type='lstm', - cnn_output_dim: int=512, hidden_units: int=128): + def __init__(self, input_width: int, input_height: int, input_channels: int, rnn_type: str = 'lstm', + cnn_output_dim: int = 512, hidden_units: int = 128): super().__init__() self.hidden_units = hidden_units diff --git a/vel/rl/model/deterministic_policy_model.py b/vel/rl/model/deterministic_policy_model.py index 6be633ce..da7b31d0 100644 --- a/vel/rl/model/deterministic_policy_model.py +++ b/vel/rl/model/deterministic_policy_model.py @@ -4,10 +4,10 @@ import typing from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.modules.input.identity import IdentityFactory +from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, Evaluator, RlModel -from vel.rl.modules.deterministic_action_head import DeterministicActionHead -from vel.rl.modules.deterministic_critic_head import DeterministicCriticHead +from vel.rl.module.deterministic_action_head import DeterministicActionHead +from vel.rl.module.deterministic_critic_head import DeterministicCriticHead class DeterministicPolicyEvaluator(Evaluator): @@ -154,7 +154,7 @@ def instantiate(self, **extra_args): def create(policy_backbone: ModelFactory, value_backbone: ModelFactory, - input_block: typing.Optional[ModelFactory]=None): + input_block: typing.Optional[ModelFactory] = None): """ Vel factory function """ if input_block is None: input_block = IdentityFactory() diff --git a/vel/rl/model/q_distributional_model.py b/vel/rl/model/q_distributional_model.py index a769e741..209b002b 100644 --- a/vel/rl/model/q_distributional_model.py +++ b/vel/rl/model/q_distributional_model.py @@ -2,9 +2,9 @@ import typing from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.modules.input.identity import IdentityFactory +from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, RlModel, Evaluator -from vel.rl.modules.q_distributional_head import QDistributionalHead +from vel.rl.module.q_distributional_head import QDistributionalHead class QDistributionalModelEvaluator(Evaluator): @@ -60,7 +60,7 @@ class QDistributionalModel(RlModel): Supports only discrete action spaces (ones that can be enumerated) """ def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, action_space: gym.Space, - vmin: float, vmax: float, atoms: int=1): + vmin: float, vmax: float, atoms: int = 1): super().__init__() self.action_space = action_space @@ -131,7 +131,7 @@ def instantiate(self, **extra_args): def create(backbone: ModelFactory, vmin: float, vmax: float, atoms: int, - input_block: typing.Optional[ModelFactory]=None): + input_block: typing.Optional[ModelFactory] = None): """ Vel factory function """ if input_block is None: input_block = IdentityFactory() diff --git a/vel/rl/model/q_dueling_model.py b/vel/rl/model/q_dueling_model.py index 09d8518b..74fff35a 100644 --- a/vel/rl/model/q_dueling_model.py +++ b/vel/rl/model/q_dueling_model.py @@ -2,10 +2,10 @@ import typing from vel.api import LinearBackboneModel, Model, ModelFactory, BackboneModel -from vel.modules.input.identity import IdentityFactory +from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, Evaluator -from vel.rl.modules.q_dueling_head import QDuelingHead -from vel.rl.models.q_model import QModelEvaluator +from vel.rl.module.q_dueling_head import QDuelingHead +from vel.rl.model.q_model import QModelEvaluator class QDuelingModel(Model): @@ -65,7 +65,7 @@ def instantiate(self, **extra_args): return QDuelingModel(input_block, backbone, extra_args['action_space']) -def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory]=None): +def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): """ Vel factory function """ if input_block is None: input_block = IdentityFactory() diff --git a/vel/rl/model/q_model.py b/vel/rl/model/q_model.py index 208137e9..7472e0bb 100644 --- a/vel/rl/model/q_model.py +++ b/vel/rl/model/q_model.py @@ -2,9 +2,9 @@ import typing from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.modules.input.identity import IdentityFactory +from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, RlModel, Evaluator -from vel.rl.modules.q_head import QHead +from vel.rl.module.q_head import QHead class QModelEvaluator(Evaluator): @@ -89,7 +89,7 @@ def instantiate(self, **extra_args): return QModel(input_block, backbone, extra_args['action_space']) -def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory]=None): +def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): """ Vel factory function """ if input_block is None: input_block = IdentityFactory() diff --git a/vel/rl/model/q_noisy_model.py b/vel/rl/model/q_noisy_model.py index cfc3e491..9dc73e6e 100644 --- a/vel/rl/model/q_noisy_model.py +++ b/vel/rl/model/q_noisy_model.py @@ -75,8 +75,8 @@ def instantiate(self, **extra_args): ) -def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory]=None, initial_std_dev=0.4, - factorized_noise=True): +def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None, initial_std_dev: float = 0.4, + factorized_noise: bool = True): """ Vel factory function """ if input_block is None: input_block = IdentityFactory() diff --git a/vel/rl/model/q_stochastic_policy_model.py b/vel/rl/model/q_stochastic_policy_model.py index 929dc3b9..5cf97893 100644 --- a/vel/rl/model/q_stochastic_policy_model.py +++ b/vel/rl/model/q_stochastic_policy_model.py @@ -3,10 +3,10 @@ import typing from vel.api import LinearBackboneModel, Model, ModelFactory, BackboneModel -from vel.modules.input.identity import IdentityFactory +from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, Evaluator -from vel.rl.modules.action_head import ActionHead -from vel.rl.modules.q_head import QHead +from vel.rl.module.action_head import ActionHead +from vel.rl.module.q_head import QHead class QStochasticPolicyEvaluator(Evaluator): @@ -120,7 +120,7 @@ def instantiate(self, **extra_args): return QStochasticPolicyModel(input_block, backbone, extra_args['action_space']) -def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory]=None): +def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): """ Vel factory function """ if input_block is None: input_block = IdentityFactory() diff --git a/vel/rl/model/stochastic_policy_model.py b/vel/rl/model/stochastic_policy_model.py index 27a7c4c2..cee084b6 100644 --- a/vel/rl/model/stochastic_policy_model.py +++ b/vel/rl/model/stochastic_policy_model.py @@ -2,10 +2,10 @@ import typing from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.modules.input.identity import IdentityFactory +from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, Evaluator, RlModel -from vel.rl.modules.action_head import ActionHead -from vel.rl.modules.value_head import ValueHead +from vel.rl.module.action_head import ActionHead +from vel.rl.module.value_head import ValueHead class StochasticPolicyEvaluator(Evaluator): @@ -115,7 +115,7 @@ def instantiate(self, **extra_args): return StochasticPolicyModel(input_block, backbone, extra_args['action_space']) -def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory]=None): +def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): """ Vel factory function """ if input_block is None: input_block = IdentityFactory() diff --git a/vel/rl/model/stochastic_policy_rnn_model.py b/vel/rl/model/stochastic_policy_rnn_model.py index cda7d69a..7147a381 100644 --- a/vel/rl/model/stochastic_policy_rnn_model.py +++ b/vel/rl/model/stochastic_policy_rnn_model.py @@ -3,10 +3,10 @@ import typing from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.modules.input.identity import IdentityFactory +from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, Trajectories, Evaluator, RlRnnModel -from vel.rl.modules.action_head import ActionHead -from vel.rl.modules.value_head import ValueHead +from vel.rl.module.action_head import ActionHead +from vel.rl.module.value_head import ValueHead class StochasticPolicyRnnEvaluator(Evaluator): @@ -142,7 +142,7 @@ def instantiate(self, **extra_args): return StochasticPolicyRnnModel(input_block, backbone, extra_args['action_space']) -def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory]=None): +def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): """ Vel factory function """ if input_block is None: input_block = IdentityFactory() diff --git a/vel/rl/module/q_head.py b/vel/rl/module/q_head.py index 52abaaeb..00431c2f 100644 --- a/vel/rl/module/q_head.py +++ b/vel/rl/module/q_head.py @@ -26,4 +26,3 @@ def forward(self, input_data): def sample(self, q_values): """ Sample from epsilon-greedy strategy with given q-values """ return q_values.argmax(dim=1) - diff --git a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py index d9f873b3..d3ce3349 100644 --- a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py @@ -190,7 +190,7 @@ def instantiate(self, device: torch.device) -> BufferedOffPolicyIterationReinfor def create(model_config, vec_env, model, algo, env_roller, parallel_envs: int, - rollout_steps: int, training_steps: int, training_rounds: int=1): + rollout_steps: int, training_steps: int, training_rounds: int = 1): """ Vel factory function """ settings = BufferedOffPolicyIterationReinforcerSettings( rollout_steps=rollout_steps, diff --git a/vel/schedule/linear.py b/vel/schedule/linear.py index ef74b03c..a3f88c4f 100644 --- a/vel/schedule/linear.py +++ b/vel/schedule/linear.py @@ -18,4 +18,3 @@ def value(self, progress_indicator): def create(initial_value, final_value): """ Vel factory function """ return LinearSchedule(initial_value, final_value) - diff --git a/vel/scheduler/linear_batch_scaler.py b/vel/scheduler/linear_batch_scaler.py index 59088401..584251f4 100644 --- a/vel/scheduler/linear_batch_scaler.py +++ b/vel/scheduler/linear_batch_scaler.py @@ -32,5 +32,3 @@ def instantiate(self, optimizer, last_epoch=-1) -> LinearBatchScaler: def create(): """ Vel factory function """ return LinearBatchScalerFactory() - - diff --git a/vel/scheduler/multi_step.py b/vel/scheduler/multi_step.py index 172f7ee7..eeeb735d 100644 --- a/vel/scheduler/multi_step.py +++ b/vel/scheduler/multi_step.py @@ -1,5 +1,4 @@ -import torch.optim.lr_scheduler as scheduler - +# import torch.optim.lr_scheduler as scheduler # class MultiStepScheduler: # def __init__(self, optimizer, milestones, gamma, last_epoch): diff --git a/vel/scheduler/reduce_lr_on_plateau.py b/vel/scheduler/reduce_lr_on_plateau.py index daf97999..98dc67a3 100644 --- a/vel/scheduler/reduce_lr_on_plateau.py +++ b/vel/scheduler/reduce_lr_on_plateau.py @@ -1,8 +1,9 @@ -import torch.optim.lr_scheduler as scheduler +# import torch.optim.lr_scheduler as scheduler # class ReduceLrOnPlateau: -# def __init__(self, optimizer, metric_name, mode, factor, patience, threshold, threshold_mode, cooldown, min_lr, epsilon): +# def __init__(self, optimizer, metric_name, mode, factor, patience, threshold, threshold_mode, +# cooldown, min_lr, epsilon): # self.metric_name = metric_name # self.scheduler = scheduler.ReduceLROnPlateau( # optimizer, @@ -30,7 +31,7 @@ # min_lr=0, epsilon=1e-8): # """ Create a scheduler that lowers the LR on metric plateau """ # def scheduler_fn(optimizer): -# return ReduceLrOnPlateau(optimizer, metric_name, mode, factor, patience, threshold, threshold_mode, cooldown, min_lr, epsilon) +# return ReduceLrOnPlateau(optimizer, metric_name, mode, factor, patience, threshold, threshold_mode, +# cooldown, min_lr, epsilon) # # return scheduler_fn - diff --git a/vel/source/nlp/text_url.py b/vel/source/nlp/text_url.py index e1b2095d..5478837c 100644 --- a/vel/source/nlp/text_url.py +++ b/vel/source/nlp/text_url.py @@ -18,7 +18,9 @@ def __init__(self, padded_sequence, sequence_length, batch_size, alphabet_size, self.alphabet_size = alphabet_size self.padded_sequence = padded_sequence[:-1].reshape(self.num_batches * self.batch_size, self.sequence_length) - self.padded_sequence_next = padded_sequence[1:].reshape(self.num_batches * self.batch_size, self.sequence_length) + self.padded_sequence_next = padded_sequence[1:].reshape( + self.num_batches * self.batch_size, self.sequence_length + ) self.sequence_indices = np.arange(self.num_batches * self.batch_size) diff --git a/vel/source/vision/cifar10.py b/vel/source/vision/cifar10.py index 4d8c02ee..53ec1080 100644 --- a/vel/source/vision/cifar10.py +++ b/vel/source/vision/cifar10.py @@ -2,9 +2,9 @@ from vel.api import SupervisedTrainingData -from vel.augmentations.normalize import Normalize -from vel.augmentations.to_tensor import ToTensor -from vel.augmentations.to_array import ToArray +from vel.augmentation.normalize import Normalize +from vel.augmentation.to_tensor import ToTensor +from vel.augmentation.to_array import ToArray def create(model_config, batch_size, normalize=True, num_workers=0, augmentations=None): @@ -18,7 +18,7 @@ def create(model_config, batch_size, normalize=True, num_workers=0, augmentation test_dataset = datasets.CIFAR10(path, train=False, download=True) augmentations = [ToArray()] + (augmentations if augmentations is not None else []) - + if normalize: train_data = train_dataset.data mean_value = (train_data / 255).mean(axis=(0, 1, 2)) diff --git a/vel/storage/backend/mongodb.py b/vel/storage/backend/mongodb.py index d9d197eb..ff663737 100644 --- a/vel/storage/backend/mongodb.py +++ b/vel/storage/backend/mongodb.py @@ -48,4 +48,3 @@ def store(self, metrics): def create(model_config, uri, database): """ Vel factory function """ return MongoDbBackend(model_config, uri, database) - diff --git a/vel/storage/strategy/checkpoint_strategy.py b/vel/storage/strategy/checkpoint_strategy.py index a2d245c4..272b3681 100644 --- a/vel/storage/strategy/checkpoint_strategy.py +++ b/vel/storage/strategy/checkpoint_strategy.py @@ -22,5 +22,3 @@ def current_best_checkpoint_idx(self) -> typing.Union[int, None]: def write_state_dict(self, hidden_state_dict): pass def restore(self, hidden_state_dict): pass - - diff --git a/vel/storage/streaming/visdom.py b/vel/storage/streaming/visdom.py index 917bb390..a32bf83b 100644 --- a/vel/storage/streaming/visdom.py +++ b/vel/storage/streaming/visdom.py @@ -36,7 +36,7 @@ def on_batch_end(self, batch_info): float(batch_info.epoch_number) + float(batch_info.batch_number) / batch_info.batches_per_epoch ) - + lr = batch_info.optimizer.param_groups[-1]['lr'] metrics_df = pd.DataFrame([lr], index=[iteration_idx], columns=['lr']) diff --git a/vel/util/intepolate.py b/vel/util/interpolate.py similarity index 99% rename from vel/util/intepolate.py rename to vel/util/interpolate.py index 29eb72a4..985773c0 100644 --- a/vel/util/intepolate.py +++ b/vel/util/interpolate.py @@ -53,4 +53,3 @@ def interpolate_series(start, end, steps, how='linear'): def interpolate_single(start, end, coefficient, how='linear'): """ Interpolate single value between start and end in given number of steps """ return INTERP_SINGLE_DICT[how](start, end, coefficient) - diff --git a/vel/util/module_util.py b/vel/util/module_util.py index e08e9eda..1c501336 100644 --- a/vel/util/module_util.py +++ b/vel/util/module_util.py @@ -38,7 +38,8 @@ def module_broadcast(m, broadcast_fn, *args, **kwargs): def set_train_mode(module): # Only fix ones which we don't want to "train" - if hasattr(module, 'running_mean') and (getattr(module, 'bn_freeze', False) or not getattr(module, 'trainable', True)): + if hasattr(module, 'running_mean') and (getattr(module, 'bn_freeze', False) or + not getattr(module, 'trainable', True)): module.eval() elif getattr(module, 'drop_freeze', False) and hasattr(module, 'p') and ('drop' in type(module).__name__.lower()): module.eval() diff --git a/vel/util/situational.py b/vel/util/situational.py index d57924f1..56bb25a8 100644 --- a/vel/util/situational.py +++ b/vel/util/situational.py @@ -1,8 +1,8 @@ import typing -def process_environment_settings(default_dictionary: dict, settings: typing.Optional[dict]=None, - presets: typing.Optional[dict]=None): +def process_environment_settings(default_dictionary: dict, settings: typing.Optional[dict] = None, + presets: typing.Optional[dict] = None): """ Process a dictionary of env settings """ settings = settings if settings is not None else {} presets = presets if presets is not None else {} @@ -25,4 +25,3 @@ def process_environment_settings(default_dictionary: dict, settings: typing.Opti result_dict[key] = new_dict return result_dict - diff --git a/vel/util/summary.py b/vel/util/summary.py index b5809f39..d7608601 100644 --- a/vel/util/summary.py +++ b/vel/util/summary.py @@ -76,7 +76,7 @@ def hook(module, input, output): '{0:,}'.format(summary[layer]['nb_params'])) total_params += summary[layer]['nb_params'] if 'trainable' in summary[layer]: - if summary[layer]['trainable'] == True: + if summary[layer]['trainable']: trainable_params += summary[layer]['nb_params'] print(line_new) print('================================================================') @@ -84,4 +84,4 @@ def hook(module, input, output): print('Trainable params: {0:,}'.format(trainable_params)) print('Non-trainable params: {0:,}'.format(total_params - trainable_params)) print('----------------------------------------------------------------') - # return summary \ No newline at end of file + # return summary diff --git a/vel/util/visdom.py b/vel/util/visdom.py index 38877987..6d30369a 100644 --- a/vel/util/visdom.py +++ b/vel/util/visdom.py @@ -118,4 +118,3 @@ def visdom_append_metrics(vis, metrics, first_epoch=False): }, update=update ) - From 2e9d926e2461e9ee96a011f3e0b9703c1ddf7b64 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 13 Jun 2019 12:37:08 -0700 Subject: [PATCH 042/162] Fixed tests after the refactoring. --- Makefile | 5 ++- .../rl/mujoco/ddpg/half_cheetah_ddpg.py | 14 +++---- setup.py | 2 +- vel/api/model.py | 2 +- vel/api/model_factory.py | 2 +- vel/command/lr_find_command.py | 4 +- vel/command/train_command.py | 2 +- vel/internal/parser.py | 2 +- vel/internal/provider.py | 4 +- vel/internal/test/test_parser.py | 2 +- vel/internal/test/test_provider.py | 36 +++++++++--------- vel/launcher.py | 2 +- vel/metric/__init__.py | 3 ++ vel/metric/accuracy.py | 2 +- vel/metric/loss_metric.py | 2 +- .../autoencoder/mnist_cnn_autoencoder.py | 4 +- vel/model/autoencoder/mnist_cnn_vae.py | 6 +-- vel/model/vision/cifar10_cnn_01.py | 4 +- vel/model/vision/cifar_resnet_v1.py | 4 +- vel/model/vision/cifar_resnet_v2.py | 10 +---- vel/model/vision/mnist_cnn_01.py | 4 +- vel/phase/cycle.py | 2 +- vel/rl/algo/distributional_dqn.py | 2 +- vel/rl/algo/policy_gradient/a2c.py | 4 +- vel/rl/algo/policy_gradient/acer.py | 2 +- .../buffer/backend/circular_buffer_backend.py | 2 +- .../test/test_circular_buffer_backend.py | 4 +- .../test_circular_vec_env_buffer_backend.py | 4 +- ...est_prioritized_circular_buffer_backend.py | 4 +- vel/rl/command/rl_train_command.py | 2 +- vel/rl/metrics.py | 2 +- .../model/backbone/double_noisy_nature_cnn.py | 2 +- vel/rl/model/backbone/noisy_nature_cnn.py | 2 +- vel/rl/model/q_noisy_model.py | 6 +-- vel/rl/model/q_rainbow_model.py | 6 +-- .../model/stochastic_policy_model_separate.py | 8 ++-- vel/rl/module/noise/eps_greedy.py | 4 +- vel/rl/module/noise/ou_noise.py | 4 +- .../q_distributional_noisy_dueling_head.py | 2 +- vel/rl/module/q_noisy_head.py | 2 +- vel/rl/module/test/test_action_head.py | 2 +- vel/rl/test/test_integration.py | 38 +++++++++---------- vel/schedule/linear.py | 2 +- vel/schedule/linear_and_constant.py | 2 +- 44 files changed, 112 insertions(+), 112 deletions(-) diff --git a/Makefile b/Makefile index 23022e26..3f2fc3d3 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: default test requpgrade lint +.PHONY: default test partest requpgrade lint default: test; @@ -36,6 +36,9 @@ serve-visdom: test: pytest . +partestc: + pytest -n 4 . + requirements.txt: pip-compile requirements.in diff --git a/examples-scripts/rl/mujoco/ddpg/half_cheetah_ddpg.py b/examples-scripts/rl/mujoco/ddpg/half_cheetah_ddpg.py index 6b0e200b..9c0a679b 100644 --- a/examples-scripts/rl/mujoco/ddpg/half_cheetah_ddpg.py +++ b/examples-scripts/rl/mujoco/ddpg/half_cheetah_ddpg.py @@ -2,22 +2,22 @@ import torch.optim from vel.api import TrainingInfo, EpochInfo -from vel.modules.input.normalize_observations import NormalizeObservationsFactory -from vel.rl.buffers.circular_replay_buffer import CircularReplayBuffer +from vel.module.input.normalize_observations import NormalizeObservationsFactory +from vel.rl.buffer.circular_replay_buffer import CircularReplayBuffer from vel.rl.env_roller.transition_replay_env_roller import TransitionReplayEnvRoller from vel.rl.metrics import EpisodeRewardMetric -from vel.rl.modules.noise.ou_noise import OuNoise +from vel.rl.module.noise.ou_noise import OuNoise from vel.storage.streaming.stdout import StdoutStreaming from vel.util.random import set_seed from vel.rl.env.mujoco import MujocoEnv -from vel.rl.models.deterministic_policy_model import DeterministicPolicyModelFactory -from vel.rl.models.backbone.mlp import MLPFactory -from vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer import ( +from vel.rl.model.deterministic_policy_model import DeterministicPolicyModelFactory +from vel.rl.model.backbone.mlp import MLPFactory +from vel.rl.reinforcer.buffered_off_policy_iteration_reinforcer import ( BufferedOffPolicyIterationReinforcer, BufferedOffPolicyIterationReinforcerSettings ) from vel.rl.algo.policy_gradient.ddpg import DeepDeterministicPolicyGradient from vel.rl.vecenv.dummy import DummyVecEnvWrapper -from vel.optimizers.adam import AdamFactory +from vel.optimizer.adam import AdamFactory def half_cheetah_ddpg(): diff --git a/setup.py b/setup.py index e6290b56..8a8fa5b9 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ 'mongo': ['pymongo', 'dnspython'], 'gym': ['gym[atari,box2d,classic_control]'], 'mujoco': ['gym[mujoco,robotics]'], - 'dev': ['pytest', 'ipython', 'jupyter', 'pip-tools', 'flake8'], + 'dev': ['pytest', 'ipython', 'jupyter', 'pip-tools', 'flake8', 'pytest-xdist'], 'text': ['spacy'], 'all': ['visdom', 'pymongo', 'dnspython', 'gym[all]', 'pytest', 'spacy', 'ipython', 'jupyter'] }, diff --git a/vel/api/model.py b/vel/api/model.py index 699d8a45..9164442f 100644 --- a/vel/api/model.py +++ b/vel/api/model.py @@ -3,7 +3,7 @@ import vel.util.module_util as mu -from vel.metrics.loss_metric import Loss +from vel.metric.loss_metric import Loss from vel.util.summary import summary diff --git a/vel/api/model_factory.py b/vel/api/model_factory.py index e2e61896..eeb533a0 100644 --- a/vel/api/model_factory.py +++ b/vel/api/model_factory.py @@ -1,5 +1,5 @@ from .model import Model -from vel.internals.generic_factory import GenericFactory +from vel.internal.generic_factory import GenericFactory class ModelFactory: diff --git a/vel/command/lr_find_command.py b/vel/command/lr_find_command.py index f1de1e20..32544c5d 100644 --- a/vel/command/lr_find_command.py +++ b/vel/command/lr_find_command.py @@ -6,10 +6,10 @@ import numpy as np import tqdm -import vel.util.intepolate as interp +import vel.util.interpolate as interp from vel.api import Learner, TrainingInfo, EpochInfo, BatchInfo -from vel.api.metrics.averaging_metric import AveragingNamedMetric +from vel.metric.averaging_metric import AveragingNamedMetric class LrFindCommand: diff --git a/vel/command/train_command.py b/vel/command/train_command.py index f9b6afd2..2e708706 100644 --- a/vel/command/train_command.py +++ b/vel/command/train_command.py @@ -2,7 +2,7 @@ import vel.api as api -from vel.callbacks.time_tracker import TimeTracker +from vel.callback.time_tracker import TimeTracker class SimpleTrainCommand: diff --git a/vel/internal/parser.py b/vel/internal/parser.py index 6c79e10d..d26f000a 100644 --- a/vel/internal/parser.py +++ b/vel/internal/parser.py @@ -1,7 +1,7 @@ import os import yaml -from vel.exceptions import VelException +from vel.exception import VelException class Dummy: diff --git a/vel/internal/provider.py b/vel/internal/provider.py index 526181cc..d694ad24 100644 --- a/vel/internal/provider.py +++ b/vel/internal/provider.py @@ -1,8 +1,8 @@ import importlib import inspect -from vel.internals.parser import Variable -from vel.internals.generic_factory import GenericFactory +from vel.internal.parser import Variable +from vel.internal.generic_factory import GenericFactory class Provider: diff --git a/vel/internal/test/test_parser.py b/vel/internal/test/test_parser.py index 67fdf2d5..d2fc7053 100644 --- a/vel/internal/test/test_parser.py +++ b/vel/internal/test/test_parser.py @@ -1,6 +1,6 @@ import pytest -import vel.internals.parser as v +import vel.internal.parser as v @pytest.fixture diff --git a/vel/internal/test/test_provider.py b/vel/internal/test/test_provider.py index 4f49e675..7428756f 100644 --- a/vel/internal/test/test_provider.py +++ b/vel/internal/test/test_provider.py @@ -1,9 +1,9 @@ import os import pytest -import vel.internals.provider as v -import vel.internals.parser as p -import vel.exceptions as e +import vel.internal.provider as v +import vel.internal.parser as p +import vel.exception as e def data_function(a, b): @@ -37,17 +37,17 @@ def test_simple_injection(): 'a': 1, 'b': 2, 'one': { - 'name': 'vel.internals.tests.fixture_a' + 'name': 'vel.internal.test.fixture_a' }, 'two': { - 'name': 'vel.internals.tests.fixture_a', + 'name': 'vel.internal.test.fixture_a', 'a': 5, 'b': 6 }, 'three': { - 'name': 'vel.internals.tests.fixture_b', + 'name': 'vel.internal.test.fixture_b', 'd': 'd' } }) @@ -78,20 +78,20 @@ def test_parameter_resolution(): 'a': 1, 'b': p.Parameter("xxx"), 'one': { - 'name': 'vel.internals.tests.fixture_a' + 'name': 'vel.internal.test.fixture_a' }, 'two': { - 'name': 'vel.internals.tests.fixture_a', + 'name': 'vel.internal.test.fixture_a', 'b': p.Parameter('yyy') }, 'three': { - 'name': 'vel.internals.tests.fixture_a', + 'name': 'vel.internal.test.fixture_a', 'b': p.Parameter('yyy', 7) }, 'four': { - 'name': 'vel.internals.tests.fixture_a', + 'name': 'vel.internal.test.fixture_a', 'b': p.EnvironmentVariable('TEST_VAR') }, @@ -120,20 +120,20 @@ def test_render_configuration(): 'a': 1, 'b': p.Parameter("xxx"), 'one': { - 'name': 'vel.internals.tests.fixture_a' + 'name': 'vel.internal.test.fixture_a' }, 'two': { - 'name': 'vel.internals.tests.fixture_a', + 'name': 'vel.internal.test.fixture_a', 'b': p.Parameter('yyy', 5) }, 'three': { - 'name': 'vel.internals.tests.fixture_a', + 'name': 'vel.internal.test.fixture_a', 'b': p.Parameter('yyy', 7) }, 'four': { - 'name': 'vel.internals.tests.fixture_a', + 'name': 'vel.internal.test.fixture_a', 'b': p.EnvironmentVariable('TEST_VAR') }, @@ -145,20 +145,20 @@ def test_render_configuration(): 'a': 1, 'b': 5, 'one': { - 'name': 'vel.internals.tests.fixture_a' + 'name': 'vel.internal.test.fixture_a' }, 'two': { - 'name': 'vel.internals.tests.fixture_a', + 'name': 'vel.internal.test.fixture_a', 'b': 5 }, 'three': { - 'name': 'vel.internals.tests.fixture_a', + 'name': 'vel.internal.test.fixture_a', 'b': 7 }, 'four': { - 'name': 'vel.internals.tests.fixture_a', + 'name': 'vel.internal.test.fixture_a', 'b': '10' }, } diff --git a/vel/launcher.py b/vel/launcher.py index e4c68114..3f800638 100644 --- a/vel/launcher.py +++ b/vel/launcher.py @@ -4,7 +4,7 @@ import sys from vel.api.model_config import ModelConfig -from vel.internals.parser import Parser +from vel.internal.parser import Parser def main(): diff --git a/vel/metric/__init__.py b/vel/metric/__init__.py index e69de29b..7bb2fe79 100644 --- a/vel/metric/__init__.py +++ b/vel/metric/__init__.py @@ -0,0 +1,3 @@ +from .base_metric import BaseMetric # noqa +from .averaging_metric import AveragingMetric, AveragingNamedMetric, AveragingSupervisedMetric # noqa +from .value_metric import ValueMetric # noqa diff --git a/vel/metric/accuracy.py b/vel/metric/accuracy.py index 8cb332bf..442f8470 100644 --- a/vel/metric/accuracy.py +++ b/vel/metric/accuracy.py @@ -1,4 +1,4 @@ -from vel.api.metrics.averaging_metric import AveragingSupervisedMetric +from vel.metric.averaging_metric import AveragingSupervisedMetric class Accuracy(AveragingSupervisedMetric): diff --git a/vel/metric/loss_metric.py b/vel/metric/loss_metric.py index 8de3707d..1e02ce4d 100644 --- a/vel/metric/loss_metric.py +++ b/vel/metric/loss_metric.py @@ -1,4 +1,4 @@ -from vel.api.metrics.averaging_metric import AveragingMetric +from vel.metric.averaging_metric import AveragingMetric class Loss(AveragingMetric): diff --git a/vel/model/autoencoder/mnist_cnn_autoencoder.py b/vel/model/autoencoder/mnist_cnn_autoencoder.py index fa90e4d5..0bb3197e 100644 --- a/vel/model/autoencoder/mnist_cnn_autoencoder.py +++ b/vel/model/autoencoder/mnist_cnn_autoencoder.py @@ -7,8 +7,8 @@ import vel.util.network as net_util from vel.api import LossFunctionModel, ModelFactory -from vel.metrics.loss_metric import Loss -from vel.modules.layers import Flatten, Reshape +from vel.metric.loss_metric import Loss +from vel.module.layers import Flatten, Reshape class MnistCnnAutoencoder(LossFunctionModel): diff --git a/vel/model/autoencoder/mnist_cnn_vae.py b/vel/model/autoencoder/mnist_cnn_vae.py index 6765c874..b678a9de 100644 --- a/vel/model/autoencoder/mnist_cnn_vae.py +++ b/vel/model/autoencoder/mnist_cnn_vae.py @@ -8,9 +8,9 @@ import vel.util.network as net_util from vel.api import SupervisedModel, ModelFactory -from vel.api.metrics import AveragingNamedMetric -from vel.metrics.loss_metric import Loss -from vel.modules.layers import Flatten, Reshape +from vel.metric.averaging_metric import AveragingNamedMetric +from vel.metric.loss_metric import Loss +from vel.module.layers import Flatten, Reshape class MnistCnnVAE(SupervisedModel): diff --git a/vel/model/vision/cifar10_cnn_01.py b/vel/model/vision/cifar10_cnn_01.py index 50dc1328..3f3551af 100644 --- a/vel/model/vision/cifar10_cnn_01.py +++ b/vel/model/vision/cifar10_cnn_01.py @@ -9,8 +9,8 @@ import torch.nn.functional as F from vel.api import LossFunctionModel, ModelFactory -from vel.metrics.loss_metric import Loss -from vel.metrics.accuracy import Accuracy +from vel.metric.loss_metric import Loss +from vel.metric.accuracy import Accuracy class Net(LossFunctionModel): diff --git a/vel/model/vision/cifar_resnet_v1.py b/vel/model/vision/cifar_resnet_v1.py index fef562c8..2a19ffa8 100644 --- a/vel/model/vision/cifar_resnet_v1.py +++ b/vel/model/vision/cifar_resnet_v1.py @@ -74,8 +74,8 @@ def loss_value(self, x_data, y_true, y_pred): def metrics(self): """ Set of metrics for this model """ - from vel.metrics.loss_metric import Loss - from vel.metrics.accuracy import Accuracy + from vel.metric.loss_metric import Loss + from vel.metric.accuracy import Accuracy return [Loss(), Accuracy()] diff --git a/vel/model/vision/cifar_resnet_v2.py b/vel/model/vision/cifar_resnet_v2.py index 3bc03b52..2d44ab01 100644 --- a/vel/model/vision/cifar_resnet_v2.py +++ b/vel/model/vision/cifar_resnet_v2.py @@ -76,16 +76,10 @@ def loss_value(self, x_data, y_true, y_pred): def metrics(self): """ Set of metrics for this model """ - from vel.metrics.loss_metric import Loss - from vel.metrics.accuracy import Accuracy + from vel.metric.loss_metric import Loss + from vel.metric.accuracy import Accuracy return [Loss(), Accuracy()] - def summary(self): - """ Print model summary """ - # import torchsummary - # torchsummary.summary(self, input_size=(3, 32, 32)) - print(self) - def create(blocks, mode='basic', inplanes=16, divisor=4, num_classes=1000): """ Vel factory function """ diff --git a/vel/model/vision/mnist_cnn_01.py b/vel/model/vision/mnist_cnn_01.py index 08472f4d..513f33c0 100644 --- a/vel/model/vision/mnist_cnn_01.py +++ b/vel/model/vision/mnist_cnn_01.py @@ -10,8 +10,8 @@ from vel.api import LossFunctionModel, ModelFactory -from vel.metrics.loss_metric import Loss -from vel.metrics.accuracy import Accuracy +from vel.metric.loss_metric import Loss +from vel.metric.accuracy import Accuracy class Net(LossFunctionModel): diff --git a/vel/phase/cycle.py b/vel/phase/cycle.py index b1862323..9b38b8be 100644 --- a/vel/phase/cycle.py +++ b/vel/phase/cycle.py @@ -1,6 +1,6 @@ import numpy as np -import vel.util.intepolate as interp +import vel.util.interpolate as interp from vel.api import BatchInfo, EpochInfo, TrainingInfo, Callback, TrainPhase diff --git a/vel/rl/algo/distributional_dqn.py b/vel/rl/algo/distributional_dqn.py index dfa050ec..4b05ecf2 100644 --- a/vel/rl/algo/distributional_dqn.py +++ b/vel/rl/algo/distributional_dqn.py @@ -2,7 +2,7 @@ import torch.nn.utils from vel.api import ModelFactory -from vel.api.metrics.averaging_metric import AveragingNamedMetric +from vel.metric.averaging_metric import AveragingNamedMetric from vel.rl.api import OptimizerAlgoBase diff --git a/vel/rl/algo/policy_gradient/a2c.py b/vel/rl/algo/policy_gradient/a2c.py index 783f954a..86485184 100644 --- a/vel/rl/algo/policy_gradient/a2c.py +++ b/vel/rl/algo/policy_gradient/a2c.py @@ -1,8 +1,8 @@ import torch import torch.nn.functional as F -from vel.api.metrics.averaging_metric import AveragingNamedMetric -from vel.math.functions import explained_variance +from vel.metric.averaging_metric import AveragingNamedMetric +from vel.math.function import explained_variance from vel.rl.api import OptimizerAlgoBase, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae diff --git a/vel/rl/algo/policy_gradient/acer.py b/vel/rl/algo/policy_gradient/acer.py index 82cd3d9a..9426957d 100644 --- a/vel/rl/algo/policy_gradient/acer.py +++ b/vel/rl/algo/policy_gradient/acer.py @@ -1,7 +1,7 @@ import torch import torch.nn.functional as F -from vel.api.metrics.averaging_metric import AveragingNamedMetric +from vel.metric.averaging_metric import AveragingNamedMetric from vel.rl.api import Trajectories, OptimizerAlgoBase diff --git a/vel/rl/buffer/backend/circular_buffer_backend.py b/vel/rl/buffer/backend/circular_buffer_backend.py index a328d7fa..7be43246 100644 --- a/vel/rl/buffer/backend/circular_buffer_backend.py +++ b/vel/rl/buffer/backend/circular_buffer_backend.py @@ -1,7 +1,7 @@ import gym import numpy as np -from vel.exceptions import VelException +from vel.exception import VelException class CircularBufferBackend: diff --git a/vel/rl/buffer/test/test_circular_buffer_backend.py b/vel/rl/buffer/test/test_circular_buffer_backend.py index 53031de8..6cf7c9fc 100644 --- a/vel/rl/buffer/test/test_circular_buffer_backend.py +++ b/vel/rl/buffer/test/test_circular_buffer_backend.py @@ -4,8 +4,8 @@ import numpy.testing as nt import pytest -from vel.exceptions import VelException -from vel.rl.buffers.backend.circular_buffer_backend import CircularBufferBackend +from vel.exception import VelException +from vel.rl.buffer.backend.circular_buffer_backend import CircularBufferBackend def get_half_filled_buffer(): diff --git a/vel/rl/buffer/test/test_circular_vec_env_buffer_backend.py b/vel/rl/buffer/test/test_circular_vec_env_buffer_backend.py index 7b45a77e..53336980 100644 --- a/vel/rl/buffer/test/test_circular_vec_env_buffer_backend.py +++ b/vel/rl/buffer/test/test_circular_vec_env_buffer_backend.py @@ -4,8 +4,8 @@ import numpy.testing as nt import pytest -from vel.exceptions import VelException -from vel.rl.buffers.circular_replay_buffer import CircularVecEnvBufferBackend +from vel.exception import VelException +from vel.rl.buffer.circular_replay_buffer import CircularVecEnvBufferBackend def get_half_filled_buffer(frame_history=1): diff --git a/vel/rl/buffer/test/test_prioritized_circular_buffer_backend.py b/vel/rl/buffer/test/test_prioritized_circular_buffer_backend.py index d9907ebc..f603d65e 100644 --- a/vel/rl/buffer/test/test_prioritized_circular_buffer_backend.py +++ b/vel/rl/buffer/test/test_prioritized_circular_buffer_backend.py @@ -5,8 +5,8 @@ import numpy.testing as nt import pytest -from vel.exceptions import VelException -from vel.rl.buffers.backend.prioritized_buffer_backend import PrioritizedCircularBufferBackend +from vel.exception import VelException +from vel.rl.buffer.backend.prioritized_buffer_backend import PrioritizedCircularBufferBackend def get_halfempty_buffer_with_dones(): diff --git a/vel/rl/command/rl_train_command.py b/vel/rl/command/rl_train_command.py index e1c0d9fb..0e852826 100644 --- a/vel/rl/command/rl_train_command.py +++ b/vel/rl/command/rl_train_command.py @@ -2,7 +2,7 @@ from vel.api import ModelConfig, EpochInfo, TrainingInfo, BatchInfo, OptimizerFactory, Storage, Callback from vel.rl.api import ReinforcerFactory -from vel.callbacks.time_tracker import TimeTracker +from vel.callback.time_tracker import TimeTracker import vel.openai.baselines.logger as openai_logger diff --git a/vel/rl/metrics.py b/vel/rl/metrics.py index 3f5be7b2..d41cf25a 100644 --- a/vel/rl/metrics.py +++ b/vel/rl/metrics.py @@ -4,7 +4,7 @@ import torch from vel.api import BatchInfo -from vel.api.metrics import BaseMetric, AveragingMetric, ValueMetric +from vel.metric import BaseMetric, AveragingMetric, ValueMetric class FramesMetric(ValueMetric): diff --git a/vel/rl/model/backbone/double_noisy_nature_cnn.py b/vel/rl/model/backbone/double_noisy_nature_cnn.py index ca6626b5..a55fc8ed 100644 --- a/vel/rl/model/backbone/double_noisy_nature_cnn.py +++ b/vel/rl/model/backbone/double_noisy_nature_cnn.py @@ -13,7 +13,7 @@ import vel.util.network as net_util from vel.api import LinearBackboneModel, ModelFactory -from vel.rl.modules.noisy_linear import NoisyLinear +from vel.rl.module.noisy_linear import NoisyLinear class DoubleNoisyNatureCnn(LinearBackboneModel): diff --git a/vel/rl/model/backbone/noisy_nature_cnn.py b/vel/rl/model/backbone/noisy_nature_cnn.py index 7f5e3b64..d258543e 100644 --- a/vel/rl/model/backbone/noisy_nature_cnn.py +++ b/vel/rl/model/backbone/noisy_nature_cnn.py @@ -13,7 +13,7 @@ import vel.util.network as net_util from vel.api import LinearBackboneModel, ModelFactory -from vel.rl.modules.noisy_linear import NoisyLinear +from vel.rl.module.noisy_linear import NoisyLinear class NoisyNatureCnn(LinearBackboneModel): diff --git a/vel/rl/model/q_noisy_model.py b/vel/rl/model/q_noisy_model.py index 9dc73e6e..b2d747bb 100644 --- a/vel/rl/model/q_noisy_model.py +++ b/vel/rl/model/q_noisy_model.py @@ -2,10 +2,10 @@ import typing from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.modules.input.identity import IdentityFactory +from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, RlModel, Evaluator -from vel.rl.models.q_model import QModelEvaluator -from vel.rl.modules.q_noisy_head import QNoisyHead +from vel.rl.model.q_model import QModelEvaluator +from vel.rl.module.q_noisy_head import QNoisyHead class NoisyQModel(RlModel): diff --git a/vel/rl/model/q_rainbow_model.py b/vel/rl/model/q_rainbow_model.py index 3e5aea33..d9b9dfbf 100644 --- a/vel/rl/model/q_rainbow_model.py +++ b/vel/rl/model/q_rainbow_model.py @@ -2,10 +2,10 @@ import typing from vel.api import LinearBackboneModel, Model, ModelFactory, BackboneModel -from vel.modules.input.identity import IdentityFactory +from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, Evaluator -from vel.rl.models.q_distributional_model import QDistributionalModelEvaluator -from vel.rl.modules.q_distributional_noisy_dueling_head import QDistributionalNoisyDuelingHead +from vel.rl.model.q_distributional_model import QDistributionalModelEvaluator +from vel.rl.module.q_distributional_noisy_dueling_head import QDistributionalNoisyDuelingHead class QRainbowModel(Model): diff --git a/vel/rl/model/stochastic_policy_model_separate.py b/vel/rl/model/stochastic_policy_model_separate.py index 50ab5ffd..7612fde3 100644 --- a/vel/rl/model/stochastic_policy_model_separate.py +++ b/vel/rl/model/stochastic_policy_model_separate.py @@ -3,11 +3,11 @@ import typing from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.modules.input.identity import IdentityFactory +from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, RlModel, Evaluator -from vel.rl.modules.action_head import ActionHead -from vel.rl.modules.value_head import ValueHead -from vel.rl.models.stochastic_policy_model import StochasticPolicyEvaluator +from vel.rl.module.action_head import ActionHead +from vel.rl.module.value_head import ValueHead +from vel.rl.model.stochastic_policy_model import StochasticPolicyEvaluator class StochasticPolicyModelSeparate(RlModel): diff --git a/vel/rl/module/noise/eps_greedy.py b/vel/rl/module/noise/eps_greedy.py index b5e6f0c9..5764a489 100644 --- a/vel/rl/module/noise/eps_greedy.py +++ b/vel/rl/module/noise/eps_greedy.py @@ -4,8 +4,8 @@ import torch.nn as nn from vel.api import Schedule -from vel.internals.generic_factory import GenericFactory -from vel.schedules.constant import ConstantSchedule +from vel.internal.generic_factory import GenericFactory +from vel.schedule.constant import ConstantSchedule class EpsGreedy(nn.Module): diff --git a/vel/rl/module/noise/ou_noise.py b/vel/rl/module/noise/ou_noise.py index be6ea0d8..a87f9786 100644 --- a/vel/rl/module/noise/ou_noise.py +++ b/vel/rl/module/noise/ou_noise.py @@ -2,8 +2,8 @@ import numpy as np import torch.nn as nn -from vel.math.processes import OrnsteinUhlenbeckNoiseProcess -from vel.internals.generic_factory import GenericFactory +from vel.math.process import OrnsteinUhlenbeckNoiseProcess +from vel.internal.generic_factory import GenericFactory class OuNoise(nn.Module): diff --git a/vel/rl/module/q_distributional_noisy_dueling_head.py b/vel/rl/module/q_distributional_noisy_dueling_head.py index 4c5c30bb..3e0f2794 100644 --- a/vel/rl/module/q_distributional_noisy_dueling_head.py +++ b/vel/rl/module/q_distributional_noisy_dueling_head.py @@ -5,7 +5,7 @@ import torch.nn.functional as F -from vel.rl.modules.noisy_linear import NoisyLinear +from vel.rl.module.noisy_linear import NoisyLinear class QDistributionalNoisyDuelingHead(nn.Module): diff --git a/vel/rl/module/q_noisy_head.py b/vel/rl/module/q_noisy_head.py index 63f510d6..8b171e1c 100644 --- a/vel/rl/module/q_noisy_head.py +++ b/vel/rl/module/q_noisy_head.py @@ -2,7 +2,7 @@ import gym.spaces as spaces -from vel.rl.modules.noisy_linear import NoisyLinear +from vel.rl.module.noisy_linear import NoisyLinear class QNoisyHead(nn.Module): diff --git a/vel/rl/module/test/test_action_head.py b/vel/rl/module/test/test_action_head.py index 5e3ff74c..6dc22e06 100644 --- a/vel/rl/module/test/test_action_head.py +++ b/vel/rl/module/test/test_action_head.py @@ -7,7 +7,7 @@ import torch.nn.functional as F import torch.distributions as d -from vel.rl.modules.action_head import DiagGaussianActionHead, CategoricalActionHead +from vel.rl.module.action_head import DiagGaussianActionHead, CategoricalActionHead def test_sample_diag_gaussian(): diff --git a/vel/rl/test/test_integration.py b/vel/rl/test/test_integration.py index 42aac5e3..2f51b419 100644 --- a/vel/rl/test/test_integration.py +++ b/vel/rl/test/test_integration.py @@ -1,19 +1,19 @@ import torch import torch.optim as optim -from vel.modules.input.image_to_tensor import ImageToTensorFactory -from vel.modules.input.normalize_observations import NormalizeObservationsFactory -from vel.rl.buffers.circular_replay_buffer import CircularReplayBuffer -from vel.rl.buffers.prioritized_circular_replay_buffer import PrioritizedCircularReplayBuffer -from vel.rl.commands.rl_train_command import FrameTracker +from vel.module.input.image_to_tensor import ImageToTensorFactory +from vel.module.input.normalize_observations import NormalizeObservationsFactory +from vel.rl.buffer.circular_replay_buffer import CircularReplayBuffer +from vel.rl.buffer.prioritized_circular_replay_buffer import PrioritizedCircularReplayBuffer +from vel.rl.command.rl_train_command import FrameTracker from vel.rl.env_roller.step_env_roller import StepEnvRoller from vel.rl.env_roller.trajectory_replay_env_roller import TrajectoryReplayEnvRoller from vel.rl.env_roller.transition_replay_env_roller import TransitionReplayEnvRoller from vel.rl.metrics import EpisodeRewardMetric -from vel.rl.modules.noise.eps_greedy import EpsGreedy -from vel.rl.modules.noise.ou_noise import OuNoise -from vel.schedules.linear import LinearSchedule -from vel.schedules.linear_and_constant import LinearAndConstantSchedule +from vel.rl.module.noise.eps_greedy import EpsGreedy +from vel.rl.module.noise.ou_noise import OuNoise +from vel.schedule.linear import LinearSchedule +from vel.schedule.linear_and_constant import LinearAndConstantSchedule from vel.util.random import set_seed from vel.rl.env.classic_atari import ClassicAtariEnv @@ -21,24 +21,24 @@ from vel.rl.vecenv.subproc import SubprocVecEnvWrapper from vel.rl.vecenv.dummy import DummyVecEnvWrapper -from vel.rl.models.stochastic_policy_model import StochasticPolicyModelFactory -from vel.rl.models.q_stochastic_policy_model import QStochasticPolicyModelFactory -from vel.rl.models.q_model import QModelFactory -from vel.rl.models.deterministic_policy_model import DeterministicPolicyModelFactory -from vel.rl.models.stochastic_policy_model_separate import StochasticPolicyModelSeparateFactory +from vel.rl.model.stochastic_policy_model import StochasticPolicyModelFactory +from vel.rl.model.q_stochastic_policy_model import QStochasticPolicyModelFactory +from vel.rl.model.q_model import QModelFactory +from vel.rl.model.deterministic_policy_model import DeterministicPolicyModelFactory +from vel.rl.model.stochastic_policy_model_separate import StochasticPolicyModelSeparateFactory -from vel.rl.models.backbone.nature_cnn import NatureCnnFactory -from vel.rl.models.backbone.mlp import MLPFactory +from vel.rl.model.backbone.nature_cnn import NatureCnnFactory +from vel.rl.model.backbone.mlp import MLPFactory -from vel.rl.reinforcers.on_policy_iteration_reinforcer import ( +from vel.rl.reinforcer.on_policy_iteration_reinforcer import ( OnPolicyIterationReinforcer, OnPolicyIterationReinforcerSettings ) -from vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer import ( +from vel.rl.reinforcer.buffered_off_policy_iteration_reinforcer import ( BufferedOffPolicyIterationReinforcer, BufferedOffPolicyIterationReinforcerSettings ) -from vel.rl.reinforcers.buffered_mixed_policy_iteration_reinforcer import ( +from vel.rl.reinforcer.buffered_mixed_policy_iteration_reinforcer import ( BufferedMixedPolicyIterationReinforcer, BufferedMixedPolicyIterationReinforcerSettings ) diff --git a/vel/schedule/linear.py b/vel/schedule/linear.py index a3f88c4f..58ca5a23 100644 --- a/vel/schedule/linear.py +++ b/vel/schedule/linear.py @@ -1,4 +1,4 @@ -import vel.util.intepolate as interpolate +import vel.util.interpolate as interpolate from vel.api import Schedule diff --git a/vel/schedule/linear_and_constant.py b/vel/schedule/linear_and_constant.py index f04b9e4a..fecf5d19 100644 --- a/vel/schedule/linear_and_constant.py +++ b/vel/schedule/linear_and_constant.py @@ -1,4 +1,4 @@ -import vel.util.intepolate as interpolate +import vel.util.interpolate as interpolate from vel.api import Schedule From 9150db17d094a3fb4638f70a4accd4c3034f4ab6 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Sat, 15 Jun 2019 21:25:06 -0700 Subject: [PATCH 043/162] Renaming models to policies. --- .velproject.yaml | 12 +- .../rl/atari/a2c/airraid_a2c.yaml | 69 -------- .../rl/atari/a2c/breakout_a2c.yaml | 69 -------- .../rl/atari/a2c/freeway_a2c.yaml | 69 -------- examples-configs/rl/atari/a2c/pacman_a2c.yaml | 69 -------- .../rl/atari/a2c/pitfall_a2c.yaml | 69 -------- .../rl/atari/a2c/pong_a2c_lstm.yaml | 71 -------- examples-configs/rl/atari/a2c/qbert_a2c.yaml | 70 -------- .../rl/atari/a2c/space_invaders_a2c.yaml | 69 -------- .../rl/atari/acer/breakout_acer.yaml | 82 --------- .../acer/breakout_acer_trust_region.yaml | 83 ---------- .../acer/seaquest_acer_trust_region.yaml | 83 ---------- .../rl/atari/acer/space_invaders_acer.yaml | 82 --------- .../space_invaders_acer_trust_region.yaml | 83 ---------- .../{a2c/pong_a2c.yaml => atari_a2c.yaml} | 25 +-- ...kout_a2c_lstm.yaml => atari_a2c_lstm.yaml} | 4 +- ...rmsprop.yaml => atari_a2c_tf_rmsprop.yaml} | 4 +- .../beam_rider_acer.yaml => atari_acer.yaml} | 4 +- ...gion.yaml => atari_acer_trust_region.yaml} | 4 +- .../{ppo/breakout_ppo.yaml => atari_ppo.yaml} | 27 +-- ...eakout_ppo_gru.yaml => atari_ppo_gru.yaml} | 22 +-- .../breakout_trpo.yaml => atari_trpo.yaml} | 5 +- .../{breakout_ddqn.yaml => atari_ddqn.yaml} | 4 +- ...nal.yaml => atari_dqn_distributional.yaml} | 4 +- ...eakout_dqn_raw.yaml => atari_dqn_raw.yaml} | 4 +- ...ling_ddqn.yaml => atari_dueling_ddqn.yaml} | 4 +- ...ml => atari_dueling_ddqn_prioritized.yaml} | 4 +- .../dqn/seaquest_dqn_distributional.yaml | 90 ---------- .../rl/atari/dqn/seaquest_dqn_raw.yaml | 86 ---------- .../dqn_rainbow_param/asterix_rp_dqn_raw.yaml | 89 ---------- .../atari_rainbow.yaml} | 4 +- ....yaml => atari_rp_dqn_distributional.yaml} | 4 +- ...isynet.yaml => atari_rp_dqn_noisynet.yaml} | 5 +- ...dqn_nstep.yaml => atari_rp_dqn_nstep.yaml} | 5 +- ..._rp_dqn_raw.yaml => atari_rp_dqn_raw.yaml} | 5 +- .../atlantis_rp_dqn_raw.yaml | 88 ---------- examples-configs/rl/atari/ppo/enduro_ppo.yaml | 85 ---------- examples-configs/rl/atari/ppo/qbert_ppo.yaml | 81 --------- examples-scripts/rl/atari/a2c/breakout_a2c.py | 10 +- .../rl/atari/a2c/breakout_a2c_evaluate.py | 2 +- .../rl/mujoco/ddpg/half_cheetah_ddpg.py | 2 +- vel/api/model.py | 4 + vel/model/vision/cifar_resnet_v1.py | 2 +- vel/model/vision/cifar_resnet_v2.py | 2 +- vel/module/input/one_hot_encoding.py | 2 +- vel/module/rnn_cell.py | 4 + vel/rl/algo/policy_gradient/trpo.py | 2 +- vel/rl/api/__init__.py | 2 +- vel/rl/api/algo_base.py | 4 +- vel/rl/api/env_roller.py | 15 +- vel/rl/api/model.py | 50 ------ vel/rl/api/policy.py | 22 +++ vel/rl/api/rollout.py | 2 +- vel/rl/{model => backbone}/__init__.py | 0 .../{model => }/backbone/double_nature_cnn.py | 0 .../backbone/double_noisy_nature_cnn.py | 0 vel/rl/{model => }/backbone/lstm.py | 0 vel/rl/{model => }/backbone/mlp.py | 0 vel/rl/{model => }/backbone/nature_cnn.py | 0 vel/rl/{model => }/backbone/nature_cnn_rnn.py | 11 +- .../{model => }/backbone/nature_cnn_small.py | 0 .../{model => }/backbone/noisy_nature_cnn.py | 0 vel/rl/env_roller/step_env_roller.py | 42 ++--- ...tion_head.py => stochastic_action_head.py} | 11 +- vel/rl/{model/backbone => policy}/__init__.py | 0 vel/rl/policy/purgatory/__init__.py | 0 .../purgatory/deterministic_policy.py} | 0 .../purgatory/q_distributional_policy.py} | 0 .../purgatory/q_dueling_policy.py} | 0 vel/rl/{model => policy/purgatory}/q_model.py | 0 .../purgatory}/q_noisy_model.py | 0 .../purgatory}/q_rainbow_model.py | 0 .../purgatory}/q_stochastic_policy_model.py | 8 +- .../purgatory/stochastic_policy.py} | 8 +- .../stochastic_policy_model_separate.py | 8 +- .../purgatory/stochastic_rnn_policy.py} | 8 +- vel/rl/policy/stochastic_policy.py | 121 ++++++++++++++ vel/rl/policy/stochastic_rnn_policy.py | 156 ++++++++++++++++++ ...fered_mixed_policy_iteration_reinforcer.py | 4 +- ...uffered_off_policy_iteration_reinforcer.py | 2 +- .../on_policy_iteration_reinforcer.py | 26 +-- vel/rl/test/test_integration.py | 4 +- vel/rl/util/actor.py | 36 ++++ vel/storage/streaming/tensorboard.py | 40 +++++ vel/util/tensor_util.py | 14 ++ 85 files changed, 562 insertions(+), 1699 deletions(-) delete mode 100644 examples-configs/rl/atari/a2c/airraid_a2c.yaml delete mode 100644 examples-configs/rl/atari/a2c/breakout_a2c.yaml delete mode 100644 examples-configs/rl/atari/a2c/freeway_a2c.yaml delete mode 100644 examples-configs/rl/atari/a2c/pacman_a2c.yaml delete mode 100644 examples-configs/rl/atari/a2c/pitfall_a2c.yaml delete mode 100644 examples-configs/rl/atari/a2c/pong_a2c_lstm.yaml delete mode 100644 examples-configs/rl/atari/a2c/qbert_a2c.yaml delete mode 100644 examples-configs/rl/atari/a2c/space_invaders_a2c.yaml delete mode 100644 examples-configs/rl/atari/acer/breakout_acer.yaml delete mode 100644 examples-configs/rl/atari/acer/breakout_acer_trust_region.yaml delete mode 100644 examples-configs/rl/atari/acer/seaquest_acer_trust_region.yaml delete mode 100644 examples-configs/rl/atari/acer/space_invaders_acer.yaml delete mode 100644 examples-configs/rl/atari/acer/space_invaders_acer_trust_region.yaml rename examples-configs/rl/atari/{a2c/pong_a2c.yaml => atari_a2c.yaml} (65%) rename examples-configs/rl/atari/{a2c/breakout_a2c_lstm.yaml => atari_a2c_lstm.yaml} (95%) rename examples-configs/rl/atari/{a2c/breakout_a2c_tf_rmsprop.yaml => atari_a2c_tf_rmsprop.yaml} (94%) rename examples-configs/rl/atari/{acer/beam_rider_acer.yaml => atari_acer.yaml} (96%) rename examples-configs/rl/atari/{acer/beam_rider_acer_trust_region.yaml => atari_acer_trust_region.yaml} (96%) rename examples-configs/rl/atari/{ppo/breakout_ppo.yaml => atari_ppo.yaml} (70%) rename examples-configs/rl/atari/{ppo/breakout_ppo_gru.yaml => atari_ppo_gru.yaml} (74%) rename examples-configs/rl/atari/{trpo/breakout_trpo.yaml => atari_trpo.yaml} (96%) rename examples-configs/rl/atari/dqn/{breakout_ddqn.yaml => atari_ddqn.yaml} (96%) rename examples-configs/rl/atari/dqn/{breakout_dqn_distributional.yaml => atari_dqn_distributional.yaml} (96%) rename examples-configs/rl/atari/dqn/{breakout_dqn_raw.yaml => atari_dqn_raw.yaml} (96%) rename examples-configs/rl/atari/dqn/{breakout_dueling_ddqn.yaml => atari_dueling_ddqn.yaml} (96%) rename examples-configs/rl/atari/dqn/{breakout_dueling_ddqn_prioritized.yaml => atari_dueling_ddqn_prioritized.yaml} (96%) delete mode 100644 examples-configs/rl/atari/dqn/seaquest_dqn_distributional.yaml delete mode 100644 examples-configs/rl/atari/dqn/seaquest_dqn_raw.yaml delete mode 100644 examples-configs/rl/atari/dqn_rainbow_param/asterix_rp_dqn_raw.yaml rename examples-configs/rl/atari/{rainbow/breakout_rainbow.yaml => dqn_rainbow_param/atari_rainbow.yaml} (97%) rename examples-configs/rl/atari/dqn_rainbow_param/{asterix_rp_dqn_distributional.yaml => atari_rp_dqn_distributional.yaml} (96%) rename examples-configs/rl/atari/dqn_rainbow_param/{asteroids_rp_dqn_noisynet.yaml => atari_rp_dqn_noisynet.yaml} (96%) rename examples-configs/rl/atari/dqn_rainbow_param/{atlantis_rp_dqn_nstep.yaml => atari_rp_dqn_nstep.yaml} (96%) rename examples-configs/rl/atari/dqn_rainbow_param/{asteroids_rp_dqn_raw.yaml => atari_rp_dqn_raw.yaml} (96%) delete mode 100644 examples-configs/rl/atari/dqn_rainbow_param/atlantis_rp_dqn_raw.yaml delete mode 100644 examples-configs/rl/atari/ppo/enduro_ppo.yaml delete mode 100644 examples-configs/rl/atari/ppo/qbert_ppo.yaml delete mode 100644 vel/rl/api/model.py create mode 100644 vel/rl/api/policy.py rename vel/rl/{model => backbone}/__init__.py (100%) rename vel/rl/{model => }/backbone/double_nature_cnn.py (100%) rename vel/rl/{model => }/backbone/double_noisy_nature_cnn.py (100%) rename vel/rl/{model => }/backbone/lstm.py (100%) rename vel/rl/{model => }/backbone/mlp.py (100%) rename vel/rl/{model => }/backbone/nature_cnn.py (100%) rename vel/rl/{model => }/backbone/nature_cnn_rnn.py (83%) rename vel/rl/{model => }/backbone/nature_cnn_small.py (100%) rename vel/rl/{model => }/backbone/noisy_nature_cnn.py (100%) rename vel/rl/module/{action_head.py => stochastic_action_head.py} (96%) rename vel/rl/{model/backbone => policy}/__init__.py (100%) create mode 100644 vel/rl/policy/purgatory/__init__.py rename vel/rl/{model/deterministic_policy_model.py => policy/purgatory/deterministic_policy.py} (100%) rename vel/rl/{model/q_distributional_model.py => policy/purgatory/q_distributional_policy.py} (100%) rename vel/rl/{model/q_dueling_model.py => policy/purgatory/q_dueling_policy.py} (100%) rename vel/rl/{model => policy/purgatory}/q_model.py (100%) rename vel/rl/{model => policy/purgatory}/q_noisy_model.py (100%) rename vel/rl/{model => policy/purgatory}/q_rainbow_model.py (100%) rename vel/rl/{model => policy/purgatory}/q_stochastic_policy_model.py (94%) rename vel/rl/{model/stochastic_policy_model.py => policy/purgatory/stochastic_policy.py} (94%) rename vel/rl/{model => policy/purgatory}/stochastic_policy_model_separate.py (94%) rename vel/rl/{model/stochastic_policy_rnn_model.py => policy/purgatory/stochastic_rnn_policy.py} (95%) create mode 100644 vel/rl/policy/stochastic_policy.py create mode 100644 vel/rl/policy/stochastic_rnn_policy.py create mode 100644 vel/rl/util/actor.py create mode 100644 vel/storage/streaming/tensorboard.py diff --git a/.velproject.yaml b/.velproject.yaml index 5921e596..2b6bbabd 100644 --- a/.velproject.yaml +++ b/.velproject.yaml @@ -2,12 +2,16 @@ storage: name: vel.storage.classic backend: - name: vel.storage.backend.mongodb - uri: 'mongodb://localhost:27017/' - database: deep_learning + name: vel.storage.backend.dummy + +# Other potential setting +# name: vel.storage.backend.mongodb +# uri: 'mongodb://localhost:27017/' +# database: deep_learning streaming: - - name: vel.storage.streaming.visdom + - name: vel.storage.streaming.tensorboard +# - name: vel.storage.streaming.visdom - name: vel.storage.streaming.stdout diff --git a/examples-configs/rl/atari/a2c/airraid_a2c.yaml b/examples-configs/rl/atari/a2c/airraid_a2c.yaml deleted file mode 100644 index 6869f6ad..00000000 --- a/examples-configs/rl/atari/a2c/airraid_a2c.yaml +++ /dev/null @@ -1,69 +0,0 @@ -name: 'airraid_a2c' - - -env: - name: vel.rl.env.classic_atari - game: 'AirRaidNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.a2c - entropy_coefficient: 0.01 - value_coefficient: 0.5 - max_grad_norm: 0.5 - discount_factor: 0.99 - - env_roller: - name: vel.rl.env_roller.step_env_roller - - number_of_steps: 5 # How many environment steps go into a single batch - parallel_envs: 16 # How many environments to run in parallel - - -optimizer: - name: vel.optimizers.rmsprop - lr: 7.0e-4 - alpha: 0.99 - epsilon: 1.0e-3 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 100 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'airraid_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 16 # How many environments to run in parallel - - takes: 20 - - visdom: - name: vel.commands.vis_store_command diff --git a/examples-configs/rl/atari/a2c/breakout_a2c.yaml b/examples-configs/rl/atari/a2c/breakout_a2c.yaml deleted file mode 100644 index 77b7593b..00000000 --- a/examples-configs/rl/atari/a2c/breakout_a2c.yaml +++ /dev/null @@ -1,69 +0,0 @@ -name: 'breakout_a2c' - - -env: - name: vel.rl.env.classic_atari - game: 'BreakoutNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.a2c - entropy_coefficient: 0.01 - value_coefficient: 0.5 - max_grad_norm: 0.5 - discount_factor: 0.99 - - env_roller: - name: vel.rl.env_roller.step_env_roller - - number_of_steps: 5 # How many environment steps go into a single batch - parallel_envs: 16 # How many environments to run in parallel - - -optimizer: - name: vel.optimizers.rmsprop - lr: 7.0e-4 - alpha: 0.99 - epsilon: 1.0e-3 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 100 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'breakout_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 16 # How many environments to run in parallel - - takes: 20 - - visdom: - name: vel.commands.vis_store_command diff --git a/examples-configs/rl/atari/a2c/freeway_a2c.yaml b/examples-configs/rl/atari/a2c/freeway_a2c.yaml deleted file mode 100644 index d8f7ce2c..00000000 --- a/examples-configs/rl/atari/a2c/freeway_a2c.yaml +++ /dev/null @@ -1,69 +0,0 @@ -name: 'freeway_a2c' - - -env: - name: vel.rl.env.classic_atari - game: 'FreewayNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.a2c - entropy_coefficient: 0.01 - value_coefficient: 0.5 - max_grad_norm: 0.5 - discount_factor: 0.99 - - env_roller: - name: vel.rl.env_roller.step_env_roller - - number_of_steps: 5 # How many environment steps go into a single batch - parallel_envs: 16 # How many environments to run in parallel - - -optimizer: - name: vel.optimizers.rmsprop - lr: 7.0e-4 - alpha: 0.99 - epsilon: 1.0e-3 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 100 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'freeway_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 16 # How many environments to run in parallel - - takes: 20 - - visdom: - name: vel.commands.vis_store_command diff --git a/examples-configs/rl/atari/a2c/pacman_a2c.yaml b/examples-configs/rl/atari/a2c/pacman_a2c.yaml deleted file mode 100644 index 3ad255b1..00000000 --- a/examples-configs/rl/atari/a2c/pacman_a2c.yaml +++ /dev/null @@ -1,69 +0,0 @@ -name: 'pacman_a2c' - - -env: - name: vel.rl.env.classic_atari - game: 'MsPacmanNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.a2c - entropy_coefficient: 0.01 - value_coefficient: 0.5 - max_grad_norm: 0.5 - discount_factor: 0.99 - - env_roller: - name: vel.rl.env_roller.step_env_roller - - number_of_steps: 5 # How many environment steps go into a single batch - parallel_envs: 16 # How many environments to run in parallel - - -optimizer: - name: vel.optimizers.rmsprop - lr: 7.0e-4 - alpha: 0.99 - epsilon: 1.0e-3 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 100 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'pacman_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 16 # How many environments to run in parallel - - takes: 20 - - visdom: - name: vel.commands.vis_store_command diff --git a/examples-configs/rl/atari/a2c/pitfall_a2c.yaml b/examples-configs/rl/atari/a2c/pitfall_a2c.yaml deleted file mode 100644 index fa9b467e..00000000 --- a/examples-configs/rl/atari/a2c/pitfall_a2c.yaml +++ /dev/null @@ -1,69 +0,0 @@ -name: 'pitfall_a2c' - - -env: - name: vel.rl.env.classic_atari - game: 'PitfallNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.a2c - entropy_coefficient: 0.01 - value_coefficient: 0.5 - max_grad_norm: 0.5 - discount_factor: 0.99 - - env_roller: - name: vel.rl.env_roller.step_env_roller - - number_of_steps: 5 # How many environment steps go into a single batch - parallel_envs: 16 # How many environments to run in parallel - - -optimizer: - name: vel.optimizers.rmsprop - lr: 7.0e-4 - alpha: 0.99 - epsilon: 1.0e-3 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 100 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'pitfall_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 16 # How many environments to run in parallel - - takes: 20 - - visdom: - name: vel.commands.vis_store_command diff --git a/examples-configs/rl/atari/a2c/pong_a2c_lstm.yaml b/examples-configs/rl/atari/a2c/pong_a2c_lstm.yaml deleted file mode 100644 index e7023fd6..00000000 --- a/examples-configs/rl/atari/a2c/pong_a2c_lstm.yaml +++ /dev/null @@ -1,71 +0,0 @@ -name: 'pong_a2c_lstm' - - -env: - name: vel.rl.env.classic_atari - game: 'PongNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.stochastic_policy_rnn_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn_rnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.a2c - entropy_coefficient: 0.01 - value_coefficient: 0.5 - max_grad_norm: 0.5 - discount_factor: 0.99 - - env_roller: - name: vel.rl.env_roller.step_env_roller - - number_of_steps: 5 # How many environment steps go into a single batch - parallel_envs: 16 # How many environments to run in parallel - - shuffle_transitions: off # Required for RNN policies - - -optimizer: - name: vel.optimizers.rmsprop - lr: 7.0e-4 - alpha: 0.99 - epsilon: 1.0e-3 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 100 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'pong_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 16 # How many environments to run in parallel - - takes: 20 - - visdom: - name: vel.commands.vis_store_command diff --git a/examples-configs/rl/atari/a2c/qbert_a2c.yaml b/examples-configs/rl/atari/a2c/qbert_a2c.yaml deleted file mode 100644 index 32c97af6..00000000 --- a/examples-configs/rl/atari/a2c/qbert_a2c.yaml +++ /dev/null @@ -1,70 +0,0 @@ -name: 'qbert_a2c' - - -env: - name: vel.rl.env.classic_atari - game: 'QbertNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - rnn_type: 'lstm' - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.a2c - entropy_coefficient: 0.01 - value_coefficient: 0.5 - max_grad_norm: 0.5 - discount_factor: 0.99 - - env_roller: - name: vel.rl.env_roller.step_env_roller - - number_of_steps: 5 # How many environment steps go into a single batch - parallel_envs: 16 # How many environments to run in parallel - - -optimizer: - name: vel.optimizers.rmsprop - lr: 7.0e-4 - alpha: 0.99 - epsilon: 1.0e-3 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 100 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'qbert_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 16 # How many environments to run in parallel - - takes: 20 - - visdom: - name: vel.commands.vis_store_command diff --git a/examples-configs/rl/atari/a2c/space_invaders_a2c.yaml b/examples-configs/rl/atari/a2c/space_invaders_a2c.yaml deleted file mode 100644 index 6333f1d2..00000000 --- a/examples-configs/rl/atari/a2c/space_invaders_a2c.yaml +++ /dev/null @@ -1,69 +0,0 @@ -name: 'space_invaders_a2c' - - -env: - name: vel.rl.env.classic_atari - game: 'SpaceInvadersNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.a2c - entropy_coefficient: 0.01 - value_coefficient: 0.5 - max_grad_norm: 0.5 - discount_factor: 0.99 - - env_roller: - name: vel.rl.env_roller.step_env_roller - - number_of_steps: 5 # How many environment steps go into a single batch - parallel_envs: 16 # How many environments to run in parallel - - -optimizer: - name: vel.optimizers.rmsprop - lr: 7.0e-4 - alpha: 0.99 - epsilon: 1.0e-3 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 100 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'space_invaders_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 16 # How many environments to run in parallel - - takes: 20 - - visdom: - name: vel.commands.vis_store_command diff --git a/examples-configs/rl/atari/acer/breakout_acer.yaml b/examples-configs/rl/atari/acer/breakout_acer.yaml deleted file mode 100644 index 5d4d787e..00000000 --- a/examples-configs/rl/atari/acer/breakout_acer.yaml +++ /dev/null @@ -1,82 +0,0 @@ -name: 'breakout_acer_notr' - - -env: - name: vel.rl.env.classic_atari - game: 'BreakoutNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_mixed_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.trajectory_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 1_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 50_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - algo: - name: vel.rl.algo.policy_gradient.acer - entropy_coefficient: 0.01 - q_coefficient: 0.5 - rho_cap: 10.0 - retrace_rho_cap: 1.0 - - max_grad_norm: 10.0 - discount_factor: 0.99 - - trust_region: false - - parallel_envs: 12 # How many environments to run in parallel - number_of_steps: 20 # How many environment steps go into a single batch - experience_replay: 4 - - -optimizer: - name: vel.optimizers.rmsprop - lr: 7.0e-4 - alpha: 0.99 -# epsilon: 1.0e-5 - epsilon: 1.0e-3 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 30 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'breakout_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - takes: 100 - parallel_envs: 12 # How many environments to run in parallel diff --git a/examples-configs/rl/atari/acer/breakout_acer_trust_region.yaml b/examples-configs/rl/atari/acer/breakout_acer_trust_region.yaml deleted file mode 100644 index 7fc824ab..00000000 --- a/examples-configs/rl/atari/acer/breakout_acer_trust_region.yaml +++ /dev/null @@ -1,83 +0,0 @@ -name: 'breakout_acer_trust_region' - - -env: - name: vel.rl.env.classic_atari - game: 'BreakoutNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_mixed_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.trajectory_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 1_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 50_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - algo: - name: vel.rl.algo.policy_gradient.acer - entropy_coefficient: 0.01 - q_coefficient: 0.5 - rho_cap: 10.0 - retrace_rho_cap: 1.0 - - max_grad_norm: 10.0 - discount_factor: 0.99 - - trust_region: true - trust_region_delta: 1.0 - - parallel_envs: 12 # How many environments to run in parallel - number_of_steps: 20 # How many environment steps go into a single batch - experience_replay: 4 - - -optimizer: - name: vel.optimizers.rmsprop - lr: 7.0e-4 - alpha: 0.99 - # epsilon: 1.0e-5 - epsilon: 1.0e-3 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 30 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'breakout_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - takes: 100 - parallel_envs: 12 # How many environments to run in parallel diff --git a/examples-configs/rl/atari/acer/seaquest_acer_trust_region.yaml b/examples-configs/rl/atari/acer/seaquest_acer_trust_region.yaml deleted file mode 100644 index 4fab32f0..00000000 --- a/examples-configs/rl/atari/acer/seaquest_acer_trust_region.yaml +++ /dev/null @@ -1,83 +0,0 @@ -name: 'seaquest_acer' - - -env: - name: vel.rl.env.classic_atari - game: 'SeaquestNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_mixed_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.trajectory_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 1_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 50_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - algo: - name: vel.rl.algo.policy_gradient.acer - entropy_coefficient: 0.01 - q_coefficient: 0.5 - rho_cap: 10.0 - retrace_rho_cap: 1.0 - - max_grad_norm: 10.0 - discount_factor: 0.99 - - trust_region: true - trust_region_delta: 1.0 - - parallel_envs: 12 # How many environments to run in parallel - number_of_steps: 20 # How many environment steps go into a single batch - experience_replay: 4 - - -optimizer: - name: vel.optimizers.rmsprop - lr: 7.0e-4 - alpha: 0.99 - # epsilon: 1.0e-5 - epsilon: 1.0e-3 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 10 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'seaquest_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - takes: 100 - parallel_envs: 12 # How many environments to run in parallel diff --git a/examples-configs/rl/atari/acer/space_invaders_acer.yaml b/examples-configs/rl/atari/acer/space_invaders_acer.yaml deleted file mode 100644 index b1dd8001..00000000 --- a/examples-configs/rl/atari/acer/space_invaders_acer.yaml +++ /dev/null @@ -1,82 +0,0 @@ -name: 'spaceinvaders_acer_notr' - - -env: - name: vel.rl.env.classic_atari - game: 'SpaceInvadersNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_mixed_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.trajectory_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 1_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 50_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - algo: - name: vel.rl.algo.policy_gradient.acer - entropy_coefficient: 0.01 - q_coefficient: 0.5 - rho_cap: 10.0 - retrace_rho_cap: 1.0 - - max_grad_norm: 10.0 - discount_factor: 0.99 - - trust_region: false - - parallel_envs: 12 # How many environments to run in parallel - number_of_steps: 20 # How many environment steps go into a single batch - experience_replay: 4 - - -optimizer: - name: vel.optimizers.rmsprop - lr: 7.0e-4 - alpha: 0.99 - # epsilon: 1.0e-5 - epsilon: 1.0e-3 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 30 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'spaceinvaders_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - takes: 100 - parallel_envs: 12 # How many environments to run in parallel diff --git a/examples-configs/rl/atari/acer/space_invaders_acer_trust_region.yaml b/examples-configs/rl/atari/acer/space_invaders_acer_trust_region.yaml deleted file mode 100644 index 0564d229..00000000 --- a/examples-configs/rl/atari/acer/space_invaders_acer_trust_region.yaml +++ /dev/null @@ -1,83 +0,0 @@ -name: 'spaceinvaders_acer' - - -env: - name: vel.rl.env.classic_atari - game: 'SpaceInvadersNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_mixed_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.trajectory_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 1_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 50_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - algo: - name: vel.rl.algo.policy_gradient.acer - entropy_coefficient: 0.01 - q_coefficient: 0.5 - rho_cap: 10.0 - retrace_rho_cap: 1.0 - - max_grad_norm: 10.0 - discount_factor: 0.99 - - trust_region: true - trust_region_delta: 1.0 - - parallel_envs: 12 # How many environments to run in parallel - number_of_steps: 20 # How many environment steps go into a single batch - experience_replay: 4 - - -optimizer: - name: vel.optimizers.rmsprop - lr: 7.0e-4 - alpha: 0.99 - # epsilon: 1.0e-5 - epsilon: 1.0e-3 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 10 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'spaceinvaders_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - takes: 100 - parallel_envs: 12 # How many environments to run in parallel diff --git a/examples-configs/rl/atari/a2c/pong_a2c.yaml b/examples-configs/rl/atari/atari_a2c.yaml similarity index 65% rename from examples-configs/rl/atari/a2c/pong_a2c.yaml rename to examples-configs/rl/atari/atari_a2c.yaml index 8b15fb6b..1d15f2dd 100644 --- a/examples-configs/rl/atari/a2c/pong_a2c.yaml +++ b/examples-configs/rl/atari/atari_a2c.yaml @@ -1,9 +1,9 @@ -name: 'pong_a2c' +name: 'atari_a2c' env: name: vel.rl.env.classic_atari - game: 'PongNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' vec_env: @@ -12,20 +12,21 @@ vec_env: model: - name: vel.rl.models.stochastic_policy_model + name: vel.rl.policy.stochastic_policy input_block: - name: vel.modules.input.image_to_tensor + name: vel.module.input.image_to_tensor backbone: - name: vel.rl.models.backbone.nature_cnn + name: vel.rl.backbone.nature_cnn + input_width: 84 input_height: 84 input_channels: 4 # The same as frame_history reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer + name: vel.rl.reinforcer.on_policy_iteration_reinforcer algo: name: vel.rl.algo.policy_gradient.a2c @@ -42,7 +43,7 @@ reinforcer: optimizer: - name: vel.optimizers.rmsprop + name: vel.optimizer.rmsprop lr: 7.0e-4 alpha: 0.99 epsilon: 1.0e-3 @@ -50,20 +51,20 @@ optimizer: commands: train: - name: vel.rl.commands.rl_train_command + name: vel.rl.command.rl_train_command total_frames: 1.1e7 batches_per_epoch: 100 record: - name: vel.rl.commands.record_movie_command + name: vel.rl.command.record_movie_command takes: 10 - videoname: 'pong_vid_{:04}.avi' + videoname: 'atari_vid_{:04}.avi' evaluate: - name: vel.rl.commands.evaluate_env_command + name: vel.rl.command.evaluate_env_command parallel_envs: 16 # How many environments to run in parallel takes: 20 visdom: - name: vel.commands.vis_store_command + name: vel.command.vis_store_command diff --git a/examples-configs/rl/atari/a2c/breakout_a2c_lstm.yaml b/examples-configs/rl/atari/atari_a2c_lstm.yaml similarity index 95% rename from examples-configs/rl/atari/a2c/breakout_a2c_lstm.yaml rename to examples-configs/rl/atari/atari_a2c_lstm.yaml index 8593be1c..36947571 100644 --- a/examples-configs/rl/atari/a2c/breakout_a2c_lstm.yaml +++ b/examples-configs/rl/atari/atari_a2c_lstm.yaml @@ -1,9 +1,9 @@ -name: 'breakout_a2c_lstm' +name: 'atari_a2c_lstm' env: name: vel.rl.env.classic_atari - game: 'BreakoutNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' vec_env: diff --git a/examples-configs/rl/atari/a2c/breakout_a2c_tf_rmsprop.yaml b/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml similarity index 94% rename from examples-configs/rl/atari/a2c/breakout_a2c_tf_rmsprop.yaml rename to examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml index a3acb3f2..3fa29e5b 100644 --- a/examples-configs/rl/atari/a2c/breakout_a2c_tf_rmsprop.yaml +++ b/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml @@ -1,9 +1,9 @@ -name: 'breakout_a2c_tf_rmsprop' +name: 'atari_a2c_tf_rmsprop' env: name: vel.rl.env.classic_atari - game: 'BreakoutNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' vec_env: diff --git a/examples-configs/rl/atari/acer/beam_rider_acer.yaml b/examples-configs/rl/atari/atari_acer.yaml similarity index 96% rename from examples-configs/rl/atari/acer/beam_rider_acer.yaml rename to examples-configs/rl/atari/atari_acer.yaml index 123c3c0b..c488883e 100644 --- a/examples-configs/rl/atari/acer/beam_rider_acer.yaml +++ b/examples-configs/rl/atari/atari_acer.yaml @@ -1,9 +1,9 @@ -name: 'beamrider_acer_notr' +name: 'atari_acer' env: name: vel.rl.env.classic_atari - game: 'BeamRiderNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' vec_env: diff --git a/examples-configs/rl/atari/acer/beam_rider_acer_trust_region.yaml b/examples-configs/rl/atari/atari_acer_trust_region.yaml similarity index 96% rename from examples-configs/rl/atari/acer/beam_rider_acer_trust_region.yaml rename to examples-configs/rl/atari/atari_acer_trust_region.yaml index 3758d9ea..99bae873 100644 --- a/examples-configs/rl/atari/acer/beam_rider_acer_trust_region.yaml +++ b/examples-configs/rl/atari/atari_acer_trust_region.yaml @@ -1,9 +1,9 @@ -name: 'beamrider_acer' +name: 'atari_acer_trust_region' env: name: vel.rl.env.classic_atari - game: 'BeamRiderNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' vec_env: diff --git a/examples-configs/rl/atari/ppo/breakout_ppo.yaml b/examples-configs/rl/atari/atari_ppo.yaml similarity index 70% rename from examples-configs/rl/atari/ppo/breakout_ppo.yaml rename to examples-configs/rl/atari/atari_ppo.yaml index 1a1c4e50..4850edba 100644 --- a/examples-configs/rl/atari/ppo/breakout_ppo.yaml +++ b/examples-configs/rl/atari/atari_ppo.yaml @@ -1,9 +1,9 @@ -name: 'breakout_ppo' +name: 'atari_ppo' env: name: vel.rl.env.classic_atari - game: 'BreakoutNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' vec_env: @@ -12,20 +12,20 @@ vec_env: model: - name: vel.rl.models.stochastic_policy_model + name: vel.rl.policy.stochastic_policy input_block: - name: vel.modules.input.image_to_tensor + name: vel.module.input.image_to_tensor backbone: - name: vel.rl.models.backbone.nature_cnn + name: vel.rl.backbone.nature_cnn input_width: 84 input_height: 84 input_channels: 4 # The same as frame_history reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer + name: vel.rl.reinforcer.on_policy_iteration_reinforcer algo: name: vel.rl.algo.policy_gradient.ppo @@ -39,7 +39,7 @@ reinforcer: max_grad_norm: 0.5 # Gradient clipping parameter cliprange: - name: vel.schedules.linear + name: vel.schedule.linear initial_value: 0.1 final_value: 0.0 @@ -53,7 +53,7 @@ reinforcer: optimizer: - name: vel.optimizers.adam + name: vel.optimizer.adam lr: 2.5e-4 epsilon: 1.0e-5 @@ -64,17 +64,20 @@ scheduler: commands: train: - name: vel.rl.commands.rl_train_command + name: vel.rl.command.rl_train_command total_frames: 1.1e7 batches_per_epoch: 10 record: - name: vel.rl.commands.record_movie_command + name: vel.rl.command.record_movie_command takes: 10 - videoname: 'breakout_ppo_vid_{:04}.avi' + videoname: 'atari_ppo_vid_{:04}.avi' evaluate: - name: vel.rl.commands.evaluate_env_command + name: vel.rl.command.evaluate_env_command parallel_envs: 16 # How many environments to run in parallel takes: 20 + + visdom: + name: vel.command.vis_store_command diff --git a/examples-configs/rl/atari/ppo/breakout_ppo_gru.yaml b/examples-configs/rl/atari/atari_ppo_gru.yaml similarity index 74% rename from examples-configs/rl/atari/ppo/breakout_ppo_gru.yaml rename to examples-configs/rl/atari/atari_ppo_gru.yaml index 1a7aa669..0f8c2e6a 100644 --- a/examples-configs/rl/atari/ppo/breakout_ppo_gru.yaml +++ b/examples-configs/rl/atari/atari_ppo_gru.yaml @@ -1,9 +1,9 @@ -name: 'breakout_ppo_gru' +name: 'atari_ppo_gru' env: name: vel.rl.env.classic_atari - game: 'BreakoutNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' vec_env: @@ -11,13 +11,13 @@ vec_env: model: - name: vel.rl.models.stochastic_policy_rnn_model + name: vel.rl.policy.stochastic_rnn_policy input_block: - name: vel.modules.input.image_to_tensor + name: vel.module.input.image_to_tensor backbone: - name: vel.rl.models.backbone.nature_cnn_rnn + name: vel.rl.backbone.nature_cnn_rnn rnn_type: 'gru' hidden_units: 512 @@ -27,7 +27,7 @@ model: reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer + name: vel.rl.reinforcer.on_policy_iteration_reinforcer algo: name: vel.rl.algo.policy_gradient.ppo @@ -41,7 +41,7 @@ reinforcer: max_grad_norm: 0.5 # Gradient clipping parameter cliprange: - name: vel.schedules.linear + name: vel.schedule.linear initial_value: 0.1 final_value: 0.0 @@ -57,7 +57,7 @@ reinforcer: optimizer: - name: vel.optimizers.adam + name: vel.optimizer.adam lr: 2.5e-4 epsilon: 1.0e-5 @@ -68,17 +68,17 @@ scheduler: commands: train: - name: vel.rl.commands.rl_train_command + name: vel.rl.command.rl_train_command total_frames: 1.1e7 batches_per_epoch: 10 record: - name: vel.rl.commands.record_movie_command + name: vel.rl.command.record_movie_command takes: 10 videoname: 'breakout_ppo_gru_vid_{:04}.avi' evaluate: - name: vel.rl.commands.evaluate_env_command + name: vel.rl.command.evaluate_env_command parallel_envs: 16 # How many environments to run in parallel takes: 20 diff --git a/examples-configs/rl/atari/trpo/breakout_trpo.yaml b/examples-configs/rl/atari/atari_trpo.yaml similarity index 96% rename from examples-configs/rl/atari/trpo/breakout_trpo.yaml rename to examples-configs/rl/atari/atari_trpo.yaml index 9c29af38..e54e6d07 100644 --- a/examples-configs/rl/atari/trpo/breakout_trpo.yaml +++ b/examples-configs/rl/atari/atari_trpo.yaml @@ -1,8 +1,9 @@ -name: 'breakout_trpo' +name: 'atari_trpo' + env: name: vel.rl.env.classic_atari - game: 'BreakoutNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' vec_env: diff --git a/examples-configs/rl/atari/dqn/breakout_ddqn.yaml b/examples-configs/rl/atari/dqn/atari_ddqn.yaml similarity index 96% rename from examples-configs/rl/atari/dqn/breakout_ddqn.yaml rename to examples-configs/rl/atari/dqn/atari_ddqn.yaml index fb6fb2bf..667ce429 100644 --- a/examples-configs/rl/atari/dqn/breakout_ddqn.yaml +++ b/examples-configs/rl/atari/dqn/atari_ddqn.yaml @@ -1,9 +1,9 @@ -name: 'breakout_ddqn' +name: 'atari_ddqn' env: name: vel.rl.env.classic_atari - game: 'BreakoutNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' vec_env: diff --git a/examples-configs/rl/atari/dqn/breakout_dqn_distributional.yaml b/examples-configs/rl/atari/dqn/atari_dqn_distributional.yaml similarity index 96% rename from examples-configs/rl/atari/dqn/breakout_dqn_distributional.yaml rename to examples-configs/rl/atari/dqn/atari_dqn_distributional.yaml index a0c6e219..b605a75d 100644 --- a/examples-configs/rl/atari/dqn/breakout_dqn_distributional.yaml +++ b/examples-configs/rl/atari/dqn/atari_dqn_distributional.yaml @@ -1,9 +1,9 @@ -name: 'breakout_dqn_distributional' +name: 'atari_dqn_distributional' env: name: vel.rl.env.classic_atari - game: 'BreakoutNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' vec_env: diff --git a/examples-configs/rl/atari/dqn/breakout_dqn_raw.yaml b/examples-configs/rl/atari/dqn/atari_dqn_raw.yaml similarity index 96% rename from examples-configs/rl/atari/dqn/breakout_dqn_raw.yaml rename to examples-configs/rl/atari/dqn/atari_dqn_raw.yaml index 5422f454..31e81b00 100644 --- a/examples-configs/rl/atari/dqn/breakout_dqn_raw.yaml +++ b/examples-configs/rl/atari/dqn/atari_dqn_raw.yaml @@ -1,9 +1,9 @@ -name: 'breakout_dqn_raw' +name: 'atari_dqn_raw' env: name: vel.rl.env.classic_atari - game: 'BreakoutNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' vec_env: diff --git a/examples-configs/rl/atari/dqn/breakout_dueling_ddqn.yaml b/examples-configs/rl/atari/dqn/atari_dueling_ddqn.yaml similarity index 96% rename from examples-configs/rl/atari/dqn/breakout_dueling_ddqn.yaml rename to examples-configs/rl/atari/dqn/atari_dueling_ddqn.yaml index fb6088e0..a5a225a9 100644 --- a/examples-configs/rl/atari/dqn/breakout_dueling_ddqn.yaml +++ b/examples-configs/rl/atari/dqn/atari_dueling_ddqn.yaml @@ -1,9 +1,9 @@ -name: 'breakout_dueling_ddqn' +name: 'atari_dueling_ddqn' env: name: vel.rl.env.classic_atari - game: 'BreakoutNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' vec_env: diff --git a/examples-configs/rl/atari/dqn/breakout_dueling_ddqn_prioritized.yaml b/examples-configs/rl/atari/dqn/atari_dueling_ddqn_prioritized.yaml similarity index 96% rename from examples-configs/rl/atari/dqn/breakout_dueling_ddqn_prioritized.yaml rename to examples-configs/rl/atari/dqn/atari_dueling_ddqn_prioritized.yaml index 017a0c6e..99127352 100644 --- a/examples-configs/rl/atari/dqn/breakout_dueling_ddqn_prioritized.yaml +++ b/examples-configs/rl/atari/dqn/atari_dueling_ddqn_prioritized.yaml @@ -1,9 +1,9 @@ -name: 'breakout_dueling_ddqn_prioritized' +name: 'atari_dueling_ddqn_prioritized' env: name: vel.rl.env.classic_atari - game: 'BreakoutNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' vec_env: diff --git a/examples-configs/rl/atari/dqn/seaquest_dqn_distributional.yaml b/examples-configs/rl/atari/dqn/seaquest_dqn_distributional.yaml deleted file mode 100644 index 9d068b5c..00000000 --- a/examples-configs/rl/atari/dqn/seaquest_dqn_distributional.yaml +++ /dev/null @@ -1,90 +0,0 @@ -name: 'seaquest_dqn_distributional' - - -env: - name: vel.rl.env.classic_atari - game: 'SeaquestNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.dummy - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.distributional_q_model - - atoms: 51 # 51 bins for Distributional DQN - vmin: -10.0 - vmax: 10.0 - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.transition_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 30_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 250_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - action_noise: - name: vel.rl.modules.noise.eps_greedy - - epsilon: - name: vel.schedules.linear_and_constant - end_of_interpolation: 0.1 - initial_value: 1.0 - final_value: 0.1 - - algo: - name: vel.rl.algo.distributional_dqn - - target_update_frequency: 10_000 # After how many batches to update the target network - max_grad_norm: 0.5 - - discount_factor: 0.99 - - rollout_steps: 4 # How many environment steps (per env) to perform per batch of training - training_steps: 32 # How many environment steps (per env) to perform per training round - parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper - - -optimizer: - name: vel.optimizers.rmsprop - lr: 2.5e-4 - alpha: 0.95 - momentum: 0.95 - epsilon: 1.0e-1 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 5.0e7 # 11M - batches_per_epoch: 2500 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'breakout_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - takes: 100 diff --git a/examples-configs/rl/atari/dqn/seaquest_dqn_raw.yaml b/examples-configs/rl/atari/dqn/seaquest_dqn_raw.yaml deleted file mode 100644 index a6e19031..00000000 --- a/examples-configs/rl/atari/dqn/seaquest_dqn_raw.yaml +++ /dev/null @@ -1,86 +0,0 @@ -name: 'seaquest_dqn_raw' - - -env: - name: vel.rl.env.classic_atari - game: 'SeaquestNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.dummy - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.transition_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 30_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 250_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - action_noise: - name: vel.rl.modules.noise.eps_greedy - - epsilon: - name: vel.schedules.linear_and_constant - end_of_interpolation: 0.1 - initial_value: 1.0 - final_value: 0.1 - - algo: - name: vel.rl.algo.dqn - - target_update_frequency: 10_000 # After how many batches to update the target network - max_grad_norm: 0.5 - - discount_factor: 0.99 - - rollout_steps: 4 # How many environment steps (per env) to perform per batch of training - training_steps: 32 # How many environment steps (per env) to perform per training round - parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper - - -optimizer: - name: vel.optimizers.rmsprop - lr: 2.5e-4 - alpha: 0.95 - momentum: 0.95 - epsilon: 1.0e-1 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 # 11M - batches_per_epoch: 2500 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'seaquest_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - takes: 100 diff --git a/examples-configs/rl/atari/dqn_rainbow_param/asterix_rp_dqn_raw.yaml b/examples-configs/rl/atari/dqn_rainbow_param/asterix_rp_dqn_raw.yaml deleted file mode 100644 index 4717e900..00000000 --- a/examples-configs/rl/atari/dqn_rainbow_param/asterix_rp_dqn_raw.yaml +++ /dev/null @@ -1,89 +0,0 @@ -name: 'asterix_rp_dqn_raw' - - -env: - name: vel.rl.env.classic_atari - game: 'AsterixNoFrameskip-v4' - - settings: - max_episode_frames: 108_000 - - -vec_env: - name: vel.rl.vecenv.dummy - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.transition_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 80_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 1_000_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - action_noise: - name: vel.rl.modules.noise.eps_greedy - - epsilon: - name: vel.schedules.linear_and_constant - end_of_interpolation: 0.1 - initial_value: 1.0 - final_value: 0.1 - - algo: - name: vel.rl.algo.dqn - - target_update_frequency: 32_000 # After how many batches to update the target network - max_grad_norm: 0.5 - - discount_factor: 0.99 - - rollout_steps: 4 # How many environment steps (per env) to perform per batch of training - training_steps: 32 # How many environment steps (per env) to perform per training round - parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper - - -optimizer: - name: vel.optimizers.adam - lr: 6.25e-05 - epsilon: 1.5e-4 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 5.0e7 # 50M - batches_per_epoch: 2500 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'asterix_vid_{:04}.avi' - fps: 15 - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 12 - takes: 20 diff --git a/examples-configs/rl/atari/rainbow/breakout_rainbow.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atari_rainbow.yaml similarity index 97% rename from examples-configs/rl/atari/rainbow/breakout_rainbow.yaml rename to examples-configs/rl/atari/dqn_rainbow_param/atari_rainbow.yaml index 1e0cbeec..af99acd8 100644 --- a/examples-configs/rl/atari/rainbow/breakout_rainbow.yaml +++ b/examples-configs/rl/atari/dqn_rainbow_param/atari_rainbow.yaml @@ -1,9 +1,9 @@ -name: 'breakout_rainbow' +name: 'atari_rainbow' env: name: vel.rl.env.classic_atari - game: 'BreakoutNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' settings: max_episode_frames: 108_000 diff --git a/examples-configs/rl/atari/dqn_rainbow_param/asterix_rp_dqn_distributional.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_distributional.yaml similarity index 96% rename from examples-configs/rl/atari/dqn_rainbow_param/asterix_rp_dqn_distributional.yaml rename to examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_distributional.yaml index 432cb64e..57e4fe91 100644 --- a/examples-configs/rl/atari/dqn_rainbow_param/asterix_rp_dqn_distributional.yaml +++ b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_distributional.yaml @@ -1,9 +1,9 @@ -name: 'asterix_dqn_distributional' +name: 'atari_dqn_distributional' env: name: vel.rl.env.classic_atari - game: 'AsterixNoFrameskip-v4' + game: !param game = 'BreakoutNoFrameskip-v4' settings: max_episode_frames: 108_000 diff --git a/examples-configs/rl/atari/dqn_rainbow_param/asteroids_rp_dqn_noisynet.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_noisynet.yaml similarity index 96% rename from examples-configs/rl/atari/dqn_rainbow_param/asteroids_rp_dqn_noisynet.yaml rename to examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_noisynet.yaml index b71afb46..e8174023 100644 --- a/examples-configs/rl/atari/dqn_rainbow_param/asteroids_rp_dqn_noisynet.yaml +++ b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_noisynet.yaml @@ -1,10 +1,9 @@ -name: 'asteroids_rp_dqn_noisynet' +name: 'atari_rp_dqn_noisynet' env: name: vel.rl.env.classic_atari - game: 'AsteroidsNoFrameskip-v4' - + game: !param game = 'BreakoutNoFrameskip-v4' settings: max_episode_frames: 108_000 diff --git a/examples-configs/rl/atari/dqn_rainbow_param/atlantis_rp_dqn_nstep.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_nstep.yaml similarity index 96% rename from examples-configs/rl/atari/dqn_rainbow_param/atlantis_rp_dqn_nstep.yaml rename to examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_nstep.yaml index d6558654..89629ac7 100644 --- a/examples-configs/rl/atari/dqn_rainbow_param/atlantis_rp_dqn_nstep.yaml +++ b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_nstep.yaml @@ -1,10 +1,9 @@ -name: 'atlantis_rp_dqn_nstep' +name: 'atari_rp_dqn_nstep' env: name: vel.rl.env.classic_atari - game: 'AtlantisNoFrameskip-v4' - + game: !param game = 'BreakoutNoFrameskip-v4' settings: max_episode_frames: 108_000 diff --git a/examples-configs/rl/atari/dqn_rainbow_param/asteroids_rp_dqn_raw.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_raw.yaml similarity index 96% rename from examples-configs/rl/atari/dqn_rainbow_param/asteroids_rp_dqn_raw.yaml rename to examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_raw.yaml index 9622d200..5786002b 100644 --- a/examples-configs/rl/atari/dqn_rainbow_param/asteroids_rp_dqn_raw.yaml +++ b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_raw.yaml @@ -1,10 +1,9 @@ -name: 'asteroids_rp_dqn_raw' +name: 'atari_rp_dqn_raw' env: name: vel.rl.env.classic_atari - game: 'AsteroidsNoFrameskip-v4' - + game: !param game = 'BreakoutNoFrameskip-v4' settings: max_episode_frames: 108_000 diff --git a/examples-configs/rl/atari/dqn_rainbow_param/atlantis_rp_dqn_raw.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atlantis_rp_dqn_raw.yaml deleted file mode 100644 index d30461e2..00000000 --- a/examples-configs/rl/atari/dqn_rainbow_param/atlantis_rp_dqn_raw.yaml +++ /dev/null @@ -1,88 +0,0 @@ -name: 'atlantis_rp_dqn_raw' - - -env: - name: vel.rl.env.classic_atari - game: 'AtlantisNoFrameskip-v4' - - settings: - max_episode_frames: 108_000 - - -vec_env: - name: vel.rl.vecenv.dummy - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.transition_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 80_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 1_000_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - action_noise: - name: vel.rl.modules.noise.eps_greedy - - epsilon: - name: vel.schedules.linear_and_constant - end_of_interpolation: 0.1 - initial_value: 1.0 - final_value: 0.1 - - algo: - name: vel.rl.algo.dqn - - target_update_frequency: 32_000 # After how many batches to update the target network - max_grad_norm: 0.5 - - discount_factor: 0.99 - - rollout_steps: 4 # How many environment steps (per env) to perform per batch of training - training_steps: 32 # How many environment steps (per env) to perform per training round - parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper - - -optimizer: - name: vel.optimizers.adam - lr: 6.25e-05 - epsilon: 1.5e-4 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 5.0e7 # 50M - batches_per_epoch: 2500 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'atlantis_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 12 - takes: 100 diff --git a/examples-configs/rl/atari/ppo/enduro_ppo.yaml b/examples-configs/rl/atari/ppo/enduro_ppo.yaml deleted file mode 100644 index afc6d1cc..00000000 --- a/examples-configs/rl/atari/ppo/enduro_ppo.yaml +++ /dev/null @@ -1,85 +0,0 @@ -name: 'enduro_ppo' - -env: - name: vel.rl.env.classic_atari - game: 'EnduroNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.ppo - - entropy_coefficient: 0.01 - value_coefficient: 0.5 - - discount_factor: 0.99 # Discount factor for the rewards - gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter - - max_grad_norm: 0.5 # Gradient clipping parameter - - cliprange: - name: vel.schedules.linear - initial_value: 0.1 - final_value: 0.0 - - env_roller: - name: vel.rl.env_roller.step_env_roller - - parallel_envs: 8 # How many environments to run in parallel - number_of_steps: 128 # How many environment steps go into a single batch - batch_size: 256 # How many samples can go into the model once - experience_replay: 4 # How many times to replay the experience - - discount_factor: 0.99 # Discount factor for the rewards - - -optimizer: - name: vel.optimizers.adam - lr: 2.5e-4 - epsilon: 1.0e-5 - - -scheduler: - name: vel.scheduler.linear_batch_scaler - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 10 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'enduro_ppo_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 16 # How many environments to run in parallel - - takes: 20 - - enjoy: - name: vel.rl.commands.enjoy - fps: 15 diff --git a/examples-configs/rl/atari/ppo/qbert_ppo.yaml b/examples-configs/rl/atari/ppo/qbert_ppo.yaml deleted file mode 100644 index b4e012b0..00000000 --- a/examples-configs/rl/atari/ppo/qbert_ppo.yaml +++ /dev/null @@ -1,81 +0,0 @@ -name: 'qbert_ppo_simple' - -env: - name: vel.rl.env.classic_atari - game: 'QbertNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.ppo - - entropy_coefficient: 0.01 - value_coefficient: 0.5 - - discount_factor: 0.99 # Discount factor for the rewards - gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter - - max_grad_norm: 0.5 # Gradient clipping parameter - - cliprange: - name: vel.schedules.linear - initial_value: 0.1 - final_value: 0.0 - - env_roller: - name: vel.rl.env_roller.step_env_roller - - parallel_envs: 8 # How many environments to run in parallel - number_of_steps: 128 # How many environment steps go into a single batch - batch_size: 256 # How many samples can go into the model once - experience_replay: 4 # How many times to replay the experience - - discount_factor: 0.99 # Discount factor for the rewards - - -optimizer: - name: vel.optimizers.adam - lr: 2.5e-4 - epsilon: 1.0e-5 - - -scheduler: - name: vel.scheduler.linear_batch_scaler - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 10 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'qbert_ppo_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 16 # How many environments to run in parallel - - takes: 20 diff --git a/examples-scripts/rl/atari/a2c/breakout_a2c.py b/examples-scripts/rl/atari/a2c/breakout_a2c.py index 1f855788..4cb9560a 100644 --- a/examples-scripts/rl/atari/a2c/breakout_a2c.py +++ b/examples-scripts/rl/atari/a2c/breakout_a2c.py @@ -8,12 +8,12 @@ from vel.rl.env.classic_atari import ClassicAtariEnv from vel.rl.vecenv.subproc import SubprocVecEnvWrapper -from vel.modules.input.image_to_tensor import ImageToTensorFactory -from vel.rl.models.stochastic_policy_model import StochasticPolicyModelFactory -from vel.rl.models.backbone.nature_cnn import NatureCnnFactory +from vel.module.input.image_to_tensor import ImageToTensorFactory +from vel.rl.policy.stochastic_policy import StochasticPolicy +from vel.rl.backbone.nature_cnn import NatureCnnFactory -from vel.rl.reinforcers.on_policy_iteration_reinforcer import ( +from vel.rl.reinforcer.on_policy_iteration_reinforcer import ( OnPolicyIterationReinforcer, OnPolicyIterationReinforcerSettings ) @@ -39,7 +39,7 @@ def breakout_a2c(): # Again, use a helper to create a model # But because model is owned by the reinforcer, model should not be accessed using this variable # but from reinforcer.model property - model = StochasticPolicyModelFactory( + model = StochasticPolicy( input_block=ImageToTensorFactory(), backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) ).instantiate(action_space=vec_env.action_space) diff --git a/examples-scripts/rl/atari/a2c/breakout_a2c_evaluate.py b/examples-scripts/rl/atari/a2c/breakout_a2c_evaluate.py index 2912d46e..3c31f6ab 100644 --- a/examples-scripts/rl/atari/a2c/breakout_a2c_evaluate.py +++ b/examples-scripts/rl/atari/a2c/breakout_a2c_evaluate.py @@ -51,7 +51,7 @@ def record_take(model, env_instance, device): while True: observation_array = np.expand_dims(np.array(observation), axis=0) observation_tensor = torch.from_numpy(observation_array).to(device) - actions = model.step(observation_tensor, argmax_sampling=True)['actions'] + actions = model.step(observation_tensor, deterministic=True)['actions'] observation, reward, done, epinfo = env_instance.step(actions.item()) diff --git a/examples-scripts/rl/mujoco/ddpg/half_cheetah_ddpg.py b/examples-scripts/rl/mujoco/ddpg/half_cheetah_ddpg.py index 9c0a679b..49900c0e 100644 --- a/examples-scripts/rl/mujoco/ddpg/half_cheetah_ddpg.py +++ b/examples-scripts/rl/mujoco/ddpg/half_cheetah_ddpg.py @@ -11,7 +11,7 @@ from vel.util.random import set_seed from vel.rl.env.mujoco import MujocoEnv from vel.rl.model.deterministic_policy_model import DeterministicPolicyModelFactory -from vel.rl.model.backbone.mlp import MLPFactory +from vel.rl.backbone.mlp import MLPFactory from vel.rl.reinforcer.buffered_off_policy_iteration_reinforcer import ( BufferedOffPolicyIterationReinforcer, BufferedOffPolicyIterationReinforcerSettings ) diff --git a/vel/api/model.py b/vel/api/model.py index 9164442f..be09907b 100644 --- a/vel/api/model.py +++ b/vel/api/model.py @@ -58,6 +58,10 @@ def is_stateful(self) -> bool: """ If the model has a state that needs to be fed between individual observations """ return False + def zero_state(self, batch_size): + """ Potential state for the model """ + return None + class SupervisedModel(Model): """ Model for a supervised learning problem """ diff --git a/vel/model/vision/cifar_resnet_v1.py b/vel/model/vision/cifar_resnet_v1.py index 2a19ffa8..ab8fac25 100644 --- a/vel/model/vision/cifar_resnet_v1.py +++ b/vel/model/vision/cifar_resnet_v1.py @@ -7,7 +7,7 @@ import torch.nn.functional as F from vel.api import LossFunctionModel, ModelFactory -from vel.modules.resnet_v1 import Bottleneck, BasicBlock +from vel.module.resnet_v1 import Bottleneck, BasicBlock class ResNetV1(LossFunctionModel): diff --git a/vel/model/vision/cifar_resnet_v2.py b/vel/model/vision/cifar_resnet_v2.py index 2d44ab01..66e96fb6 100644 --- a/vel/model/vision/cifar_resnet_v2.py +++ b/vel/model/vision/cifar_resnet_v2.py @@ -7,7 +7,7 @@ import torch.nn.functional as F from vel.api import LossFunctionModel, ModelFactory -from vel.modules.resnet_v2 import Bottleneck, BasicBlock +from vel.module.resnet_v2 import Bottleneck, BasicBlock class ResNetV2(LossFunctionModel): diff --git a/vel/module/input/one_hot_encoding.py b/vel/module/input/one_hot_encoding.py index 1e351721..eaee642c 100644 --- a/vel/module/input/one_hot_encoding.py +++ b/vel/module/input/one_hot_encoding.py @@ -1,5 +1,5 @@ from vel.api import LinearBackboneModel, ModelFactory -from vel.modules.layers import OneHotEncode +from vel.module.layers import OneHotEncode class OneHotEncodingInput(LinearBackboneModel): diff --git a/vel/module/rnn_cell.py b/vel/module/rnn_cell.py index 5b62a046..5ce58867 100644 --- a/vel/module/rnn_cell.py +++ b/vel/module/rnn_cell.py @@ -46,6 +46,10 @@ def state_dim(self) -> int: else: return self.hidden_size + def zero_state(self, batch_size): + """ Potential state for the model """ + return torch.zeros(batch_size, self.state_dim) + def forward(self, input_data, state): if self.rnn_type == 'lstm': hidden_state, cell_state = torch.split(state, self.hidden_size, 1) diff --git a/vel/rl/algo/policy_gradient/trpo.py b/vel/rl/algo/policy_gradient/trpo.py index d97d1bf3..2922e128 100644 --- a/vel/rl/algo/policy_gradient/trpo.py +++ b/vel/rl/algo/policy_gradient/trpo.py @@ -84,7 +84,7 @@ def process_rollout(self, batch_info, rollout: Rollout): return rollout - def optimizer_step(self, batch_info, device, model, rollout): + def optimize(self, batch_info, device, model, rollout): """ Single optimization step for a model """ rollout = rollout.to_transitions() diff --git a/vel/rl/api/__init__.py b/vel/rl/api/__init__.py index a48ee85a..4e80755c 100644 --- a/vel/rl/api/__init__.py +++ b/vel/rl/api/__init__.py @@ -2,7 +2,7 @@ from .env_base import EnvFactory, VecEnvFactory from .env_roller import EnvRollerBase, ReplayEnvRollerBase, EnvRollerFactoryBase, ReplayEnvRollerFactoryBase from .evaluator import Evaluator -from .model import RlModel, RlRnnModel +from .policy import Policy from .reinforcer_base import ReinforcerBase, ReinforcerFactory from .replay_buffer import ReplayBuffer, ReplayBufferFactory from .rollout import Rollout, Trajectories, Transitions diff --git a/vel/rl/api/algo_base.py b/vel/rl/api/algo_base.py index d9e6c8c4..305e2624 100644 --- a/vel/rl/api/algo_base.py +++ b/vel/rl/api/algo_base.py @@ -25,7 +25,7 @@ def process_rollout(self, batch_info, rollout): """ Process rollout for ALGO before any chunking/shuffling """ return rollout - def optimizer_step(self, batch_info, device, model, rollout): + def optimize(self, batch_info, device, model, rollout): """ Single optimization step for a model """ raise NotImplementedError @@ -48,7 +48,7 @@ def post_optimization_step(self, batch_info, device, model, rollout): """ Steps to take after optimization has been done""" pass - def optimizer_step(self, batch_info, device, model, rollout): + def optimize(self, batch_info, device, model, rollout): """ Single optimization step for a model """ batch_info.optimizer.zero_grad() diff --git a/vel/rl/api/env_roller.py b/vel/rl/api/env_roller.py index 3457104d..f868fe90 100644 --- a/vel/rl/api/env_roller.py +++ b/vel/rl/api/env_roller.py @@ -1,20 +1,19 @@ import typing -import gym -from vel.rl.api.rollout import Rollout -from vel.api import BatchInfo, Model +from vel.api import BatchInfo from vel.openai.baselines.common.vec_env import VecEnv +from vel.rl.api.rollout import Rollout class EnvRollerBase: """ Class generating environment rollouts """ @property - def environment(self) -> typing.Union[gym.Env, VecEnv]: + def environment(self) -> VecEnv: """ Reference to environment being evaluated """ raise NotImplementedError - def rollout(self, batch_info: BatchInfo, model: Model, number_of_steps: int) -> Rollout: + def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: """ Roll-out the environment and return it """ raise NotImplementedError @@ -27,7 +26,7 @@ def metrics(self) -> list: class ReplayEnvRollerBase(EnvRollerBase): """ Class generating environment rollouts with experience replay """ - def sample(self, batch_info: BatchInfo, model: Model, number_of_steps: int) -> Rollout: + def sample(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: """ Sample experience from replay buffer and return a batch """ raise NotImplementedError @@ -47,7 +46,7 @@ def update(self, rollout, batch_info): class EnvRollerFactoryBase: """ Factory for env rollers """ - def instantiate(self, environment, device) -> EnvRollerBase: + def instantiate(self, environment, policy, device) -> EnvRollerBase: """ Instantiate env roller """ raise NotImplementedError @@ -55,6 +54,6 @@ def instantiate(self, environment, device) -> EnvRollerBase: class ReplayEnvRollerFactoryBase(EnvRollerFactoryBase): """ Factory for env rollers """ - def instantiate(self, environment, device) -> ReplayEnvRollerBase: + def instantiate(self, environment, policy, device) -> ReplayEnvRollerBase: """ Instantiate env roller """ raise NotImplementedError diff --git a/vel/rl/api/model.py b/vel/rl/api/model.py deleted file mode 100644 index fb5a691c..00000000 --- a/vel/rl/api/model.py +++ /dev/null @@ -1,50 +0,0 @@ -import torch - -from vel.api import Model - -from .rollout import Rollout -from .evaluator import Evaluator - - -class RlModel(Model): - """ Reinforcement learning model """ - - def step(self, observations) -> dict: - """ - Evaluate environment on given observations, return actions and potentially some extra information - in a dictionary. - """ - raise NotImplementedError - - def evaluate(self, rollout: Rollout) -> Evaluator: - """ Evaluate model on a rollout """ - raise NotImplementedError - - -class RlRnnModel(Model): - """ Reinforcement learning recurrent model """ - - @property - def is_stateful(self) -> bool: - """ If the network is recurrent and needs to be fed previous state """ - return True - - def step(self, observations, state) -> dict: - """ - Evaluate environment on given observations, return actions and potentially some extra information - in a dictionary. - """ - raise NotImplementedError - - @property - def state_dim(self) -> int: - """ Dimension of model state """ - raise NotImplementedError - - def zero_state(self, batch_size): - """ Initial state of the network """ - return torch.zeros(batch_size, self.state_dim) - - def evaluate(self, rollout: Rollout) -> Evaluator: - """ Evaluate model on a rollout """ - raise NotImplementedError diff --git a/vel/rl/api/policy.py b/vel/rl/api/policy.py new file mode 100644 index 00000000..34082364 --- /dev/null +++ b/vel/rl/api/policy.py @@ -0,0 +1,22 @@ +import torch +from vel.api import Model + + +class Policy(Model): + """ Base class for reinforcement learning policies """ + + def act(self, observation, state=None, deterministic=False) -> dict: + """ Make an action based on the observation from the environment. """ + raise NotImplementedError + + def value(self, observation, state=None) -> torch.tensor: + """ Return the expected reward from current state """ + return self.act(observation=observation, state=state)['value'] + + def reset_state(self, state, dones): + """ Reset the state after the episode has been terminated """ + raise NotImplementedError + + def evaluate(self, rollout) -> object: + """ Return an evaluator object evaluating given rollout that may be used for gradient computations etc. """ + raise NotImplementedError diff --git a/vel/rl/api/rollout.py b/vel/rl/api/rollout.py index 4acf8cbb..4e7695d7 100644 --- a/vel/rl/api/rollout.py +++ b/vel/rl/api/rollout.py @@ -176,7 +176,7 @@ def frames(self): """ Number of frames in rollout """ return self.num_steps * self.num_envs - def to_device(self, device, non_blocking=True): + def to_device(self, device, non_blocking=False): """ Move a rollout to a selected device """ return Trajectories( num_steps=self.num_steps, diff --git a/vel/rl/model/__init__.py b/vel/rl/backbone/__init__.py similarity index 100% rename from vel/rl/model/__init__.py rename to vel/rl/backbone/__init__.py diff --git a/vel/rl/model/backbone/double_nature_cnn.py b/vel/rl/backbone/double_nature_cnn.py similarity index 100% rename from vel/rl/model/backbone/double_nature_cnn.py rename to vel/rl/backbone/double_nature_cnn.py diff --git a/vel/rl/model/backbone/double_noisy_nature_cnn.py b/vel/rl/backbone/double_noisy_nature_cnn.py similarity index 100% rename from vel/rl/model/backbone/double_noisy_nature_cnn.py rename to vel/rl/backbone/double_noisy_nature_cnn.py diff --git a/vel/rl/model/backbone/lstm.py b/vel/rl/backbone/lstm.py similarity index 100% rename from vel/rl/model/backbone/lstm.py rename to vel/rl/backbone/lstm.py diff --git a/vel/rl/model/backbone/mlp.py b/vel/rl/backbone/mlp.py similarity index 100% rename from vel/rl/model/backbone/mlp.py rename to vel/rl/backbone/mlp.py diff --git a/vel/rl/model/backbone/nature_cnn.py b/vel/rl/backbone/nature_cnn.py similarity index 100% rename from vel/rl/model/backbone/nature_cnn.py rename to vel/rl/backbone/nature_cnn.py diff --git a/vel/rl/model/backbone/nature_cnn_rnn.py b/vel/rl/backbone/nature_cnn_rnn.py similarity index 83% rename from vel/rl/model/backbone/nature_cnn_rnn.py rename to vel/rl/backbone/nature_cnn_rnn.py index 9662a444..8888da6e 100644 --- a/vel/rl/model/backbone/nature_cnn_rnn.py +++ b/vel/rl/backbone/nature_cnn_rnn.py @@ -1,5 +1,5 @@ from vel.api import LinearBackboneModel, ModelFactory -from vel.rl.model.backbone.nature_cnn import NatureCnn +from vel.rl.backbone.nature_cnn import NatureCnn from vel.module.rnn_cell import RnnCell @@ -31,6 +31,15 @@ def state_dim(self) -> int: """ Initial state of the network """ return self.rnn_cell.state_dim + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return True + + def zero_state(self, batch_size): + """ Potential state for the model """ + return self.rnn_cell.zero_state(batch_size) + def forward(self, input_image, state): cnn_output = self.nature_cnn(input_image) hidden_state, new_state = self.rnn_cell(cnn_output, state) diff --git a/vel/rl/model/backbone/nature_cnn_small.py b/vel/rl/backbone/nature_cnn_small.py similarity index 100% rename from vel/rl/model/backbone/nature_cnn_small.py rename to vel/rl/backbone/nature_cnn_small.py diff --git a/vel/rl/model/backbone/noisy_nature_cnn.py b/vel/rl/backbone/noisy_nature_cnn.py similarity index 100% rename from vel/rl/model/backbone/noisy_nature_cnn.py rename to vel/rl/backbone/noisy_nature_cnn.py diff --git a/vel/rl/env_roller/step_env_roller.py b/vel/rl/env_roller/step_env_roller.py index a25cecfe..c4ec4700 100644 --- a/vel/rl/env_roller/step_env_roller.py +++ b/vel/rl/env_roller/step_env_roller.py @@ -1,8 +1,10 @@ import torch import numpy as np -from vel.api import BatchInfo, Model -from vel.rl.api import Trajectories, Rollout, EnvRollerBase, EnvRollerFactoryBase +from vel.api import BatchInfo +from vel.openai.baselines.common.vec_env import VecEnv +from vel.rl.api import Trajectories, Rollout, EnvRollerBase, EnvRollerFactoryBase, Policy +from vel.rl.util.actor import PolicyActor from vel.util.tensor_accumulator import TensorAccumulator @@ -11,15 +13,14 @@ class StepEnvRoller(EnvRollerBase): Class calculating env rollouts. """ - def __init__(self, environment, device): + def __init__(self, environment: VecEnv, policy: Policy, device: torch.device): self._environment = environment self.device = device # Initial observation - kept on CPU self.last_observation = torch.from_numpy(self.environment.reset()).clone() - # Relevant for RNN policies - kept on DEVICE - self.hidden_state = None + self.actor = PolicyActor(self.environment.num_envs, policy, device) @property def environment(self): @@ -27,26 +28,17 @@ def environment(self): return self._environment @torch.no_grad() - def rollout(self, batch_info: BatchInfo, model: Model, number_of_steps: int) -> Rollout: + def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: """ Calculate env rollout """ accumulator = TensorAccumulator() episode_information = [] # List of dictionaries with episode information - if self.hidden_state is None and model.is_stateful: - self.hidden_state = model.zero_state(self.last_observation.size(0)).to(self.device) - - # Remember rollout initial state, we'll use that for training as well - initial_hidden_state = self.hidden_state - for step_idx in range(number_of_steps): - if model.is_stateful: - step = model.step(self.last_observation.to(self.device), state=self.hidden_state) - self.hidden_state = step['state'] - else: - step = model.step(self.last_observation.to(self.device)) + step = self.actor.act(self.last_observation.to(self.device)) # Add step to the tensor accumulator for name, tensor in step.items(): + # Take not that here we convert all the tensors to CPU accumulator.add(name, tensor.cpu()) accumulator.add('observations', self.last_observation) @@ -57,22 +49,16 @@ def rollout(self, batch_info: BatchInfo, model: Model, number_of_steps: int) -> # Done is flagged true when the episode has ended AND the frame we see is already a first frame from the # next episode dones_tensor = torch.from_numpy(new_dones.astype(np.float32)).clone() - accumulator.add('dones', dones_tensor) self.last_observation = torch.from_numpy(new_obs).clone() + self.actor.reset_states(dones_tensor) - if model.is_stateful: - # Zero out state in environments that have finished - self.hidden_state = self.hidden_state * (1.0 - dones_tensor.unsqueeze(-1)).to(self.device) - + accumulator.add('dones', dones_tensor) accumulator.add('rewards', torch.from_numpy(new_rewards.astype(np.float32)).clone()) episode_information.append(new_infos) - if model.is_stateful: - final_values = model.value(self.last_observation.to(self.device), state=self.hidden_state).cpu() - else: - final_values = model.value(self.last_observation.to(self.device)).cpu() + final_values = self.actor.value(self.last_observation.to(self.device)).cpu() accumulated_tensors = accumulator.result() @@ -82,7 +68,6 @@ def rollout(self, batch_info: BatchInfo, model: Model, number_of_steps: int) -> environment_information=episode_information, transition_tensors=accumulated_tensors, rollout_tensors={ - 'initial_hidden_state': initial_hidden_state.cpu() if initial_hidden_state is not None else None, 'final_values': final_values } ) @@ -93,9 +78,10 @@ class StepEnvRollerFactory(EnvRollerFactoryBase): def __init__(self): pass - def instantiate(self, environment, device): + def instantiate(self, environment, policy, device): return StepEnvRoller( environment=environment, + policy=policy, device=device, ) diff --git a/vel/rl/module/action_head.py b/vel/rl/module/stochastic_action_head.py similarity index 96% rename from vel/rl/module/action_head.py rename to vel/rl/module/stochastic_action_head.py index 8cd6b6ba..2d54cbab 100644 --- a/vel/rl/module/action_head.py +++ b/vel/rl/module/stochastic_action_head.py @@ -32,12 +32,12 @@ def forward(self, input_data): return torch.stack([means, log_std_tile], dim=-1) - def sample(self, params, argmax_sampling=False): + def sample(self, params, deterministic=False): """ Sample from a probability space of all actions """ means = params[:, :, 0] log_std = params[:, :, 1] - if argmax_sampling: + if deterministic: return means else: return torch.randn_like(means) * torch.exp(log_std) + means @@ -105,11 +105,12 @@ def logprob(self, actions, action_logits): neg_log_prob = F.nll_loss(action_logits, actions, reduction='none') return -neg_log_prob - def sample(self, logits, argmax_sampling=False): + def sample(self, logits, deterministic=False): """ Sample from a probability space of all actions """ - if argmax_sampling: + if deterministic: return torch.argmax(logits, dim=-1) else: + # Gumbel-softmax trick u = torch.rand_like(logits) return torch.argmax(logits - torch.log(-torch.log(u)), dim=-1) @@ -133,7 +134,7 @@ def kl_divergence(self, logits_q, logits_p): return (torch.exp(logits_q) * (logits_q - logits_p)).sum(1, keepdim=True) -class ActionHead(nn.Module): +class StochasticActionHead(nn.Module): """ Network head for action determination. Returns probability distribution parametrization """ diff --git a/vel/rl/model/backbone/__init__.py b/vel/rl/policy/__init__.py similarity index 100% rename from vel/rl/model/backbone/__init__.py rename to vel/rl/policy/__init__.py diff --git a/vel/rl/policy/purgatory/__init__.py b/vel/rl/policy/purgatory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/rl/model/deterministic_policy_model.py b/vel/rl/policy/purgatory/deterministic_policy.py similarity index 100% rename from vel/rl/model/deterministic_policy_model.py rename to vel/rl/policy/purgatory/deterministic_policy.py diff --git a/vel/rl/model/q_distributional_model.py b/vel/rl/policy/purgatory/q_distributional_policy.py similarity index 100% rename from vel/rl/model/q_distributional_model.py rename to vel/rl/policy/purgatory/q_distributional_policy.py diff --git a/vel/rl/model/q_dueling_model.py b/vel/rl/policy/purgatory/q_dueling_policy.py similarity index 100% rename from vel/rl/model/q_dueling_model.py rename to vel/rl/policy/purgatory/q_dueling_policy.py diff --git a/vel/rl/model/q_model.py b/vel/rl/policy/purgatory/q_model.py similarity index 100% rename from vel/rl/model/q_model.py rename to vel/rl/policy/purgatory/q_model.py diff --git a/vel/rl/model/q_noisy_model.py b/vel/rl/policy/purgatory/q_noisy_model.py similarity index 100% rename from vel/rl/model/q_noisy_model.py rename to vel/rl/policy/purgatory/q_noisy_model.py diff --git a/vel/rl/model/q_rainbow_model.py b/vel/rl/policy/purgatory/q_rainbow_model.py similarity index 100% rename from vel/rl/model/q_rainbow_model.py rename to vel/rl/policy/purgatory/q_rainbow_model.py diff --git a/vel/rl/model/q_stochastic_policy_model.py b/vel/rl/policy/purgatory/q_stochastic_policy_model.py similarity index 94% rename from vel/rl/model/q_stochastic_policy_model.py rename to vel/rl/policy/purgatory/q_stochastic_policy_model.py index 5cf97893..c489980d 100644 --- a/vel/rl/model/q_stochastic_policy_model.py +++ b/vel/rl/policy/purgatory/q_stochastic_policy_model.py @@ -5,7 +5,7 @@ from vel.api import LinearBackboneModel, Model, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, Evaluator -from vel.rl.module.action_head import ActionHead +from vel.rl.module.action_head import StochasticActionHead from vel.rl.module.q_head import QHead @@ -43,7 +43,7 @@ def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, ac self.input_block = input_block self.backbone = backbone - self.action_head = ActionHead( + self.action_head = StochasticActionHead( input_dim=self.backbone.output_dim, action_space=action_space ) @@ -71,10 +71,10 @@ def forward(self, observations): return policy_params, q - def step(self, observation, argmax_sampling=False): + def step(self, observation, deterministic=False): """ Select actions based on model's output """ policy_params, q = self(observation) - actions = self.action_head.sample(policy_params, argmax_sampling=argmax_sampling) + actions = self.action_head.sample(policy_params, deterministic=deterministic) # log probability - we can do that, because we support only discrete action spaces logprobs = self.action_head.logprob(actions, policy_params) diff --git a/vel/rl/model/stochastic_policy_model.py b/vel/rl/policy/purgatory/stochastic_policy.py similarity index 94% rename from vel/rl/model/stochastic_policy_model.py rename to vel/rl/policy/purgatory/stochastic_policy.py index cee084b6..1788ffc6 100644 --- a/vel/rl/model/stochastic_policy_model.py +++ b/vel/rl/policy/purgatory/stochastic_policy.py @@ -4,7 +4,7 @@ from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, Evaluator, RlModel -from vel.rl.module.action_head import ActionHead +from vel.rl.module.action_head import StochasticActionHead from vel.rl.module.value_head import ValueHead @@ -43,7 +43,7 @@ def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, ac self.input_block = input_block self.backbone = backbone - self.action_head = ActionHead( + self.action_head = StochasticActionHead( action_space=action_space, input_dim=self.backbone.output_dim ) @@ -67,10 +67,10 @@ def forward(self, observations): return action_output, value_output - def step(self, observation, argmax_sampling=False): + def step(self, observation, deterministic=False): """ Select actions based on model's output """ action_pd_params, value_output = self(observation) - actions = self.action_head.sample(action_pd_params, argmax_sampling=argmax_sampling) + actions = self.action_head.sample(action_pd_params, deterministic=deterministic) # log likelihood of selected action logprobs = self.action_head.logprob(actions, action_pd_params) diff --git a/vel/rl/model/stochastic_policy_model_separate.py b/vel/rl/policy/purgatory/stochastic_policy_model_separate.py similarity index 94% rename from vel/rl/model/stochastic_policy_model_separate.py rename to vel/rl/policy/purgatory/stochastic_policy_model_separate.py index 7612fde3..3044459e 100644 --- a/vel/rl/model/stochastic_policy_model_separate.py +++ b/vel/rl/policy/purgatory/stochastic_policy_model_separate.py @@ -5,7 +5,7 @@ from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, RlModel, Evaluator -from vel.rl.module.action_head import ActionHead +from vel.rl.module.action_head import StochasticActionHead from vel.rl.module.value_head import ValueHead from vel.rl.model.stochastic_policy_model import StochasticPolicyEvaluator @@ -24,7 +24,7 @@ def __init__(self, input_block: BackboneModel, self.policy_backbone = policy_backbone self.value_backbone = value_backbone - self.action_head = ActionHead( + self.action_head = StochasticActionHead( action_space=action_space, input_dim=self.policy_backbone.output_dim ) @@ -53,10 +53,10 @@ def forward(self, observations): return action_output, value_output - def step(self, observation, argmax_sampling=False): + def step(self, observation, deterministic=False): """ Select actions based on model's output """ policy_params, values = self(observation) - actions = self.action_head.sample(policy_params, argmax_sampling=argmax_sampling) + actions = self.action_head.sample(policy_params, deterministic=deterministic) # log likelihood of selected action logprobs = self.action_head.logprob(actions, policy_params) diff --git a/vel/rl/model/stochastic_policy_rnn_model.py b/vel/rl/policy/purgatory/stochastic_rnn_policy.py similarity index 95% rename from vel/rl/model/stochastic_policy_rnn_model.py rename to vel/rl/policy/purgatory/stochastic_rnn_policy.py index 7147a381..25551144 100644 --- a/vel/rl/model/stochastic_policy_rnn_model.py +++ b/vel/rl/policy/purgatory/stochastic_rnn_policy.py @@ -5,7 +5,7 @@ from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, Trajectories, Evaluator, RlRnnModel -from vel.rl.module.action_head import ActionHead +from vel.rl.module.action_head import StochasticActionHead from vel.rl.module.value_head import ValueHead @@ -61,7 +61,7 @@ def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, self.input_block = input_block self.backbone = backbone - self.action_head = ActionHead( + self.action_head = StochasticActionHead( action_space=action_space, input_dim=self.backbone.output_dim ) @@ -91,10 +91,10 @@ def forward(self, observations, state): return action_output, value_output, new_state - def step(self, observations, state, argmax_sampling=False): + def step(self, observations, state, deterministic=False): """ Select actions based on model's output """ action_pd_params, value_output, new_state = self(observations, state) - actions = self.action_head.sample(action_pd_params, argmax_sampling=argmax_sampling) + actions = self.action_head.sample(action_pd_params, deterministic=deterministic) # log likelihood of selected action logprobs = self.action_head.logprob(actions, action_pd_params) diff --git a/vel/rl/policy/stochastic_policy.py b/vel/rl/policy/stochastic_policy.py new file mode 100644 index 00000000..6ecabc12 --- /dev/null +++ b/vel/rl/policy/stochastic_policy.py @@ -0,0 +1,121 @@ +import gym +import torch +import typing + +from vel.api import LinearBackboneModel, ModelFactory, BackboneModel +from vel.module.input.identity import IdentityFactory +from vel.rl.api import Rollout, Evaluator, Policy +from vel.rl.module.stochastic_action_head import StochasticActionHead +from vel.rl.module.value_head import ValueHead + + +class StochasticPolicyEvaluator(Evaluator): + """ Evaluator for a policy gradient model """ + + def __init__(self, model: 'StochasticPolicy', rollout: Rollout): + super().__init__(rollout) + + self.model = model + + pd_params, estimated_values = model(self.rollout.batch_tensor('observations')) + + self.provide('model:pd_params', pd_params) + self.provide('model:values', estimated_values) + + @Evaluator.provides('model:action:logprobs') + def model_action_logprobs(self): + actions = self.get('rollout:actions') + pd_params = self.get('model:pd_params') + return self.model.action_head.logprob(actions, pd_params) + + @Evaluator.provides('model:entropy') + def model_entropy(self): + pd_params = self.get('model:pd_params') + return self.model.action_head.entropy(pd_params) + + +class StochasticPolicy(Policy): + """ + Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone + """ + + def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, action_space: gym.Space): + super().__init__() + + self.input_block = input_block + self.backbone = backbone + + assert not self.backbone.is_stateful, "Backbone shouldn't have state" + + self.action_head = StochasticActionHead( + action_space=action_space, + input_dim=self.backbone.output_dim + ) + + self.value_head = ValueHead( + input_dim=self.backbone.output_dim + ) + + def reset_weights(self): + """ Initialize properly model weights """ + self.input_block.reset_weights() + self.backbone.reset_weights() + self.action_head.reset_weights() + self.value_head.reset_weights() + + def forward(self, observation): + """ Calculate model outputs """ + input_data = self.input_block(observation) + + base_output = self.backbone(input_data) + + action_output = self.action_head(base_output) + value_output = self.value_head(base_output) + + return action_output, value_output + + def act(self, observation, state=None, deterministic=False): + """ Select actions based on model's output """ + action_pd_params, value_output = self(observation) + actions = self.action_head.sample(action_pd_params, deterministic=deterministic) + + # log likelihood of selected action + logprobs = self.action_head.logprob(actions, action_pd_params) + + return { + 'actions': actions, + 'values': value_output, + 'action:logprobs': logprobs + } + + def value(self, observation, state=None) -> torch.tensor: + """ Calculate value only - small optimization """ + input_data = self.input_block(observation) + base_output = self.backbone(input_data) + return self.value_head(base_output) + + def evaluate(self, rollout: Rollout) -> Evaluator: + """ Evaluate model on a rollout """ + return StochasticPolicyEvaluator(self, rollout) + + +class StochasticPolicyFactory(ModelFactory): + """ Factory class for policy gradient models """ + def __init__(self, input_block: IdentityFactory, backbone: ModelFactory): + self.backbone = backbone + self.input_block = input_block + + def instantiate(self, **extra_args): + """ Instantiate the model """ + input_block = self.input_block.instantiate() + backbone = self.backbone.instantiate(**extra_args) + + return StochasticPolicy(input_block, backbone, extra_args['action_space']) + + +def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): + """ Vel factory function """ + if input_block is None: + input_block = IdentityFactory() + + return StochasticPolicyFactory(input_block=input_block, backbone=backbone) diff --git a/vel/rl/policy/stochastic_rnn_policy.py b/vel/rl/policy/stochastic_rnn_policy.py new file mode 100644 index 00000000..a1a87f2e --- /dev/null +++ b/vel/rl/policy/stochastic_rnn_policy.py @@ -0,0 +1,156 @@ +import gym +import torch +import typing + +from vel.api import LinearBackboneModel, ModelFactory, BackboneModel +from vel.module.input.identity import IdentityFactory +from vel.rl.api import Rollout, Trajectories, Evaluator, Policy +from vel.rl.module.stochastic_action_head import StochasticActionHead +from vel.rl.module.value_head import ValueHead + + +class StochasticPolicyRnnEvaluator(Evaluator): + """ Evaluate recurrent model from initial state """ + + def __init__(self, model: 'StochasticRnnPolicy', rollout: Rollout): + assert isinstance(rollout, Trajectories), "For an RNN model, we must evaluate trajectories" + super().__init__(rollout) + + self.model = model + + observation_trajectories = rollout.transition_tensors['observations'] + hidden_state = rollout.transition_tensors['state'][0] # Initial hidden state + + action_accumulator = [] + value_accumulator = [] + + # Evaluate recurrent network step by step + for i in range(observation_trajectories.size(0)): + action_output, value_output, hidden_state = model(observation_trajectories[i], hidden_state) + action_accumulator.append(action_output) + value_accumulator.append(value_output) + + policy_params = torch.cat(action_accumulator, dim=0) + estimated_values = torch.cat(value_accumulator, dim=0) + + self.provide('model:policy_params', policy_params) + self.provide('model:values', estimated_values) + + @Evaluator.provides('model:action:logprobs') + def model_action_logprobs(self): + actions = self.get('rollout:actions') + policy_params = self.get('model:policy_params') + return self.model.action_head.logprob(actions, policy_params) + + @Evaluator.provides('model:entropy') + def model_entropy(self): + policy_params = self.get('model:policy_params') + return self.model.action_head.entropy(policy_params) + + +class StochasticRnnPolicy(Policy): + """ + Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone + RNN version + """ + + def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, + action_space: gym.Space): + super().__init__() + + self.input_block = input_block + self.backbone = backbone + + assert self.backbone.is_stateful, "Must have a stateful backbone" + + self.action_head = StochasticActionHead( + action_space=action_space, + input_dim=self.backbone.output_dim + ) + self.value_head = ValueHead(input_dim=self.backbone.output_dim) + + assert self.backbone.is_stateful, "Backbone must be a recurrent model" + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return True + + def reset_weights(self): + """ Initialize properly model weights """ + self.input_block.reset_weights() + self.backbone.reset_weights() + self.action_head.reset_weights() + self.value_head.reset_weights() + + def forward(self, observations, state): + """ Calculate model outputs """ + input_data = self.input_block(observations) + base_output, new_state = self.backbone(input_data, state=state) + + action_output = self.action_head(base_output) + value_output = self.value_head(base_output) + + return action_output, value_output, new_state + + def act(self, observation, state=None, deterministic=False) -> dict: + """ Select actions based on model's output """ + action_pd_params, value_output, new_state = self(observation, state) + actions = self.action_head.sample(action_pd_params, deterministic=deterministic) + + # log likelihood of selected action + logprobs = self.action_head.logprob(actions, action_pd_params) + + return { + 'actions': actions, + 'values': value_output, + 'action:logprobs': logprobs, + 'state': new_state + } + + def evaluate(self, rollout: Rollout) -> Evaluator: + """ Evaluate model on a rollout """ + return StochasticPolicyRnnEvaluator(self, rollout) + + def value(self, observation, state=None): + """ Calculate only value head for given state """ + input_data = self.input_block(observation) + + base_output, new_state = self.backbone(input_data, state) + value_output = self.value_head(base_output) + + return value_output + + def reset_state(self, state, dones): + """ Reset the state after the episode has been terminated """ + if (dones > 0).any().item(): + zero_state = self.backbone.zero_state(dones.shape[0]).to(state.device) + dones_expanded = dones.unsqueeze(-1) + return state * (1 - dones_expanded) + zero_state * dones_expanded + else: + return state + + +class StochasticRnnPolicyFactory(ModelFactory): + """ Factory class for policy gradient models """ + def __init__(self, input_block: ModelFactory, backbone: ModelFactory): + self.input_block = input_block + self.backbone = backbone + + def instantiate(self, **extra_args): + """ Instantiate the model """ + input_block = self.input_block.instantiate() + backbone = self.backbone.instantiate(**extra_args) + + return StochasticRnnPolicy(input_block, backbone, extra_args['action_space']) + + +def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): + """ Vel factory function """ + if input_block is None: + input_block = IdentityFactory() + + return StochasticRnnPolicyFactory( + input_block=input_block, + backbone=backbone + ) diff --git a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py index 8f1888a6..c2d97e31 100644 --- a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py @@ -115,7 +115,7 @@ def on_policy_train_batch(self, batch_info: BatchInfo): rollout = self.env_roller.rollout(batch_info, self.model, self.settings.number_of_steps).to_device(self.device) - batch_result = self.algo.optimizer_step( + batch_result = self.algo.optimize( batch_info=batch_info, device=self.device, model=self.model, @@ -132,7 +132,7 @@ def off_policy_train_batch(self, batch_info: BatchInfo): rollout = self.env_roller.sample(batch_info, self.model, self.settings.number_of_steps).to_device(self.device) - batch_result = self.algo.optimizer_step( + batch_result = self.algo.optimize( batch_info=batch_info, device=self.device, model=self.model, diff --git a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py index d3ce3349..c2ec6fe8 100644 --- a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py @@ -146,7 +146,7 @@ def train_on_replay_memory(self, batch_info): for i in range(self.settings.training_rounds): sampled_rollout = self.env_roller.sample(batch_info, self.model, self.settings.training_steps) - batch_result = self.algo.optimizer_step( + batch_result = self.algo.optimize( batch_info=batch_info, device=self.device, model=self.model, diff --git a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py index 46c7d450..d889c681 100644 --- a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py @@ -5,7 +5,10 @@ import tqdm from vel.api import Model, ModelFactory, TrainingInfo, EpochInfo, BatchInfo -from vel.rl.api import ReinforcerBase, ReinforcerFactory, VecEnvFactory, EnvRollerFactoryBase, EnvRollerBase, AlgoBase +from vel.rl.api import ( + ReinforcerBase, ReinforcerFactory, VecEnvFactory, EnvRollerFactoryBase, EnvRollerBase, AlgoBase, + Policy +) from vel.rl.metrics import ( FPSMetric, EpisodeLengthMetric, EpisodeRewardMetricQuantile, EpisodeRewardMetric, FramesMetric @@ -32,16 +35,16 @@ class OnPolicyIterationReinforcer(ReinforcerBase): A reinforcer that calculates on-policy environment rollouts and uses them to train policy directly. May split the sample into multiple batches and may replay batches a few times. """ - def __init__(self, device: torch.device, settings: OnPolicyIterationReinforcerSettings, model: Model, + def __init__(self, device: torch.device, settings: OnPolicyIterationReinforcerSettings, policy: Policy, algo: AlgoBase, env_roller: EnvRollerBase) -> None: self.device = device self.settings = settings - self._trained_model = model.to(self.device) - self.env_roller = env_roller self.algo = algo + self._trained_model = policy.to(self.device) + def metrics(self) -> list: """ List of metrics to track for this learning process """ my_metrics = [ @@ -53,7 +56,7 @@ def metrics(self) -> list: EpisodeLengthMetric("episode_length"), ] - return my_metrics + self.algo.metrics() + self.env_roller.metrics() + return my_metrics + self.algo.metrics() + self.env_roller.metrics() + self.model.metrics() @property def model(self) -> Model: @@ -102,7 +105,7 @@ def train_batch(self, batch_info: BatchInfo) -> None: # Calculate environment rollout on the evaluation version of the model self.model.train() - rollout = self.env_roller.rollout(batch_info, self.model, self.settings.number_of_steps) + rollout = self.env_roller.rollout(batch_info, self.settings.number_of_steps) # Process rollout by the 'algo' (e.g. perform the advantage estimation) rollout = self.algo.process_rollout(batch_info, rollout) @@ -124,7 +127,7 @@ def train_batch(self, batch_info: BatchInfo) -> None: for i in range(experience_replay_count): # We may potentially need to split rollout into multiple batches if self.settings.batch_size >= rollout.frames(): - batch_result = self.algo.optimizer_step( + batch_result = self.algo.optimize( batch_info=batch_info, device=self.device, model=self.model, @@ -135,7 +138,7 @@ def train_batch(self, batch_info: BatchInfo) -> None: else: # Rollout too big, need to split in batches for batch_rollout in rollout.shuffled_batches(self.settings.batch_size): - batch_result = self.algo.optimizer_step( + batch_result = self.algo.optimize( batch_info=batch_info, device=self.device, model=self.model, @@ -166,10 +169,9 @@ def __init__(self, settings, parallel_envs: int, env_factory: VecEnvFactory, mod def instantiate(self, device: torch.device) -> ReinforcerBase: env = self.env_factory.instantiate(parallel_envs=self.parallel_envs, seed=self.seed) - env_roller = self.env_roller_factory.instantiate(environment=env, device=device) - model = self.model_factory.instantiate(action_space=env.action_space) - - return OnPolicyIterationReinforcer(device, self.settings, model, self.algo, env_roller) + policy = self.model_factory.instantiate(action_space=env.action_space) + env_roller = self.env_roller_factory.instantiate(environment=env, policy=policy, device=device) + return OnPolicyIterationReinforcer(device, self.settings, policy, self.algo, env_roller) def create(model_config, model, vec_env, algo, env_roller, parallel_envs, number_of_steps, diff --git a/vel/rl/test/test_integration.py b/vel/rl/test/test_integration.py index 2f51b419..b0433ac4 100644 --- a/vel/rl/test/test_integration.py +++ b/vel/rl/test/test_integration.py @@ -27,8 +27,8 @@ from vel.rl.model.deterministic_policy_model import DeterministicPolicyModelFactory from vel.rl.model.stochastic_policy_model_separate import StochasticPolicyModelSeparateFactory -from vel.rl.model.backbone.nature_cnn import NatureCnnFactory -from vel.rl.model.backbone.mlp import MLPFactory +from vel.rl.backbone.nature_cnn import NatureCnnFactory +from vel.rl.backbone.mlp import MLPFactory from vel.rl.reinforcer.on_policy_iteration_reinforcer import ( OnPolicyIterationReinforcer, OnPolicyIterationReinforcerSettings diff --git a/vel/rl/util/actor.py b/vel/rl/util/actor.py new file mode 100644 index 00000000..a395bb53 --- /dev/null +++ b/vel/rl/util/actor.py @@ -0,0 +1,36 @@ +import torch + +from vel.rl.api import Policy +from vel.util.tensor_util import to_device + + +class PolicyActor: + """ Evaluates policy on a fixed set of environments. Additionally tracks the state """ + + def __init__(self, num_envs: int, policy: Policy, device: torch.device): + self.num_envs = num_envs + self.policy = policy + self.device = device + self.state = to_device(self.policy.zero_state(num_envs), self.device) + + def act(self, observation, advance_state=True, deterministic=False): + """ Return result of a policy on a given input """ + result = self.policy.act(observation, state=self.state, deterministic=deterministic) + + if self.policy.is_stateful and advance_state: + self.state = result['state'] + + return result + + def reset_states(self, dones): + """ Reset states given dones """ + if not self.policy.is_stateful: + return + + dones = dones.to(self.device) + + self.state = self.policy.reset_state(self.state, dones) + + def value(self, observation): + """ Return value for provided observations """ + return self.policy.value(observation, state=self.state) diff --git a/vel/storage/streaming/tensorboard.py b/vel/storage/streaming/tensorboard.py new file mode 100644 index 00000000..ef90b267 --- /dev/null +++ b/vel/storage/streaming/tensorboard.py @@ -0,0 +1,40 @@ +import os +import shutil + +from vel.api import ModelConfig, Callback, TrainingInfo +from torch.utils.tensorboard import SummaryWriter + + +class TensorboardStreaming(Callback): + """ Stream results to tensorboard """ + + def __init__(self, model_config: ModelConfig): + self.model_config = model_config + self.logdir = self.model_config.output_dir('tensorboard', self.model_config.run_name) + + def on_train_begin(self, training_info: TrainingInfo) -> None: + """ Potentially cleanup previous runs """ + if training_info.start_epoch_idx == 0: + if os.path.exists(self.logdir): + shutil.rmtree(self.logdir) + + def on_epoch_end(self, epoch_info): + """ Push data to tensorboard on push """ + summary_writer = SummaryWriter(log_dir=self.logdir) + + for key, value in epoch_info.result.items(): + if key == 'epoch_idx': + continue + + summary_writer.add_scalar( + tag=key, + scalar_value=value, + global_step=epoch_info.global_epoch_idx, + ) + + summary_writer.close() + + +def create(model_config): + """ Vel factory function """ + return TensorboardStreaming(model_config) diff --git a/vel/util/tensor_util.py b/vel/util/tensor_util.py index 06e4dd7c..b0683099 100644 --- a/vel/util/tensor_util.py +++ b/vel/util/tensor_util.py @@ -16,3 +16,17 @@ def merge_first_two_dims(tensor): batch_size = shape[0] * shape[1] new_shape = tuple([batch_size] + list(shape[2:])) return tensor.view(new_shape) + + +def to_device(tensor, device: torch.device): + """ Convert tensor-like object to given PyTorch device """ + if tensor is None: + return tensor + elif isinstance(tensor, torch.Tensor): + return tensor.to(device) + elif isinstance(tensor, dict): + return {k: to_device(v, device) for k, v in tensor.items()} + elif isinstance(tensor, list): + return [to_device(v, device) for v in tensor] + else: + raise NotImplementedError From 1b8251a8c4b7ee25fe93c595bb342943ac1ad631 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Sat, 15 Jun 2019 21:29:48 -0700 Subject: [PATCH 044/162] Fixing configuration files. --- README.md | 12 +++++------- examples-configs/rl/atari/atari_a2c_lstm.yaml | 2 +- examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml | 2 +- examples-configs/rl/atari/atari_acer.yaml | 2 +- .../rl/atari/atari_acer_trust_region.yaml | 2 +- examples-configs/rl/atari/atari_ppo_gru.yaml | 2 +- examples-configs/rl/atari/atari_trpo.yaml | 2 +- .../rl/atari/dqn/atari_dqn_distributional.yaml | 2 +- examples-configs/rl/atari/dqn/atari_dqn_raw.yaml | 2 +- .../rl/atari/dqn/atari_dueling_ddqn_prioritized.yaml | 2 +- .../rl/atari/dqn_rainbow_param/atari_rainbow.yaml | 2 +- .../atari_rp_dqn_distributional.yaml | 2 +- .../dqn_rainbow_param/atari_rp_dqn_noisynet.yaml | 2 +- .../atari/dqn_rainbow_param/atari_rp_dqn_nstep.yaml | 2 +- .../rl/atari/dqn_rainbow_param/atari_rp_dqn_raw.yaml | 2 +- 15 files changed, 19 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index be618544..fd5d7c42 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ into a structure that is designed to be reused rather than copied over. As a goal, it should be enough to write a config file that wires existing components together and defines their hyperparameters for most common applications. -If that's not the case few bits of custom glue code should do the job. +If that's not the case few bits of custom glue code should do the jobatari. This repository is still in an early stage of that journey but it will grow @@ -55,16 +55,14 @@ pip install -e . ``` from the repository root directory. -This project requires Python at least 3.6 and PyTorch 1.0. +This project requires Python at least 3.6 and PyTorch 1.1. If you want to run YAML config examples, you'll also need a **project configuration file** `.velproject.yaml`. An example is included in this repository. -Default project configuration writes -metrics to MongoDB instance open on localhost port 27017 and Visdom instance -on localhost port 8097. +Default project configuration writes logs to the tensorboard directory `output/tensorboard` +under the main directory. Outputs to visdom and mongodb are also implemented. -If you don't want to run these services, there is included -another example file `.velproject.dummy.yaml` +If you don't want any logging, there is included another example file `.velproject.dummy.yaml` that writes training progress to the standard output only. To use it, just rename it to `.velproject.yaml`. diff --git a/examples-configs/rl/atari/atari_a2c_lstm.yaml b/examples-configs/rl/atari/atari_a2c_lstm.yaml index 36947571..f83f5c50 100644 --- a/examples-configs/rl/atari/atari_a2c_lstm.yaml +++ b/examples-configs/rl/atari/atari_a2c_lstm.yaml @@ -60,7 +60,7 @@ commands: record: name: vel.rl.commands.record_movie_command takes: 10 - videoname: 'breakout_vid_{:04}.avi' + videoname: 'atari_vid_{:04}.avi' evaluate: name: vel.rl.commands.evaluate_env_command diff --git a/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml b/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml index 3fa29e5b..2786a6a4 100644 --- a/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml +++ b/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml @@ -57,7 +57,7 @@ commands: record: name: vel.rl.commands.record_movie_command takes: 10 - videoname: 'breakout_vid_{:04}.avi' + videoname: 'atari_vid_{:04}.avi' evaluate: name: vel.rl.commands.evaluate_env_command diff --git a/examples-configs/rl/atari/atari_acer.yaml b/examples-configs/rl/atari/atari_acer.yaml index c488883e..52e47b84 100644 --- a/examples-configs/rl/atari/atari_acer.yaml +++ b/examples-configs/rl/atari/atari_acer.yaml @@ -74,7 +74,7 @@ commands: record: name: vel.rl.commands.record_movie_command takes: 10 - videoname: 'beamrider_vid_{:04}.avi' + videoname: 'atari_vid_{:04}.avi' evaluate: name: vel.rl.commands.evaluate_env_command diff --git a/examples-configs/rl/atari/atari_acer_trust_region.yaml b/examples-configs/rl/atari/atari_acer_trust_region.yaml index 99bae873..810a6a51 100644 --- a/examples-configs/rl/atari/atari_acer_trust_region.yaml +++ b/examples-configs/rl/atari/atari_acer_trust_region.yaml @@ -75,7 +75,7 @@ commands: record: name: vel.rl.commands.record_movie_command takes: 10 - videoname: 'beamrider_vid_{:04}.avi' + videoname: 'atari_vid_{:04}.avi' evaluate: name: vel.rl.commands.evaluate_env_command diff --git a/examples-configs/rl/atari/atari_ppo_gru.yaml b/examples-configs/rl/atari/atari_ppo_gru.yaml index 0f8c2e6a..43244de9 100644 --- a/examples-configs/rl/atari/atari_ppo_gru.yaml +++ b/examples-configs/rl/atari/atari_ppo_gru.yaml @@ -75,7 +75,7 @@ commands: record: name: vel.rl.command.record_movie_command takes: 10 - videoname: 'breakout_ppo_gru_vid_{:04}.avi' + videoname: 'atari_ppo_gru_vid_{:04}.avi' evaluate: name: vel.rl.command.evaluate_env_command diff --git a/examples-configs/rl/atari/atari_trpo.yaml b/examples-configs/rl/atari/atari_trpo.yaml index e54e6d07..6b363274 100644 --- a/examples-configs/rl/atari/atari_trpo.yaml +++ b/examples-configs/rl/atari/atari_trpo.yaml @@ -78,7 +78,7 @@ commands: record: name: vel.rl.commands.record_movie_command takes: 10 - videoname: 'breakout_trpo_vid_{:04}.avi' + videoname: 'atari_trpo_vid_{:04}.avi' frame_history: 4 sample_args: argmax_sampling: true diff --git a/examples-configs/rl/atari/dqn/atari_dqn_distributional.yaml b/examples-configs/rl/atari/dqn/atari_dqn_distributional.yaml index b605a75d..dd5b62f3 100644 --- a/examples-configs/rl/atari/dqn/atari_dqn_distributional.yaml +++ b/examples-configs/rl/atari/dqn/atari_dqn_distributional.yaml @@ -83,7 +83,7 @@ commands: record: name: vel.rl.commands.record_movie_command takes: 10 - videoname: 'breakout_vid_{:04}.avi' + videoname: 'atari_vid_{:04}.avi' evaluate: name: vel.rl.commands.evaluate_env_command diff --git a/examples-configs/rl/atari/dqn/atari_dqn_raw.yaml b/examples-configs/rl/atari/dqn/atari_dqn_raw.yaml index 31e81b00..a32427bd 100644 --- a/examples-configs/rl/atari/dqn/atari_dqn_raw.yaml +++ b/examples-configs/rl/atari/dqn/atari_dqn_raw.yaml @@ -79,7 +79,7 @@ commands: record: name: vel.rl.commands.record_movie_command takes: 10 - videoname: 'breakout_vid_{:04}.avi' + videoname: 'atari_vid_{:04}.avi' evaluate: name: vel.rl.commands.evaluate_env_command diff --git a/examples-configs/rl/atari/dqn/atari_dueling_ddqn_prioritized.yaml b/examples-configs/rl/atari/dqn/atari_dueling_ddqn_prioritized.yaml index 99127352..3084f15a 100644 --- a/examples-configs/rl/atari/dqn/atari_dueling_ddqn_prioritized.yaml +++ b/examples-configs/rl/atari/dqn/atari_dueling_ddqn_prioritized.yaml @@ -89,7 +89,7 @@ commands: record: name: vel.rl.commands.record_movie_command takes: 10 - videoname: 'breakout_vid_{:04}.avi' + videoname: 'atari_vid_{:04}.avi' evaluate: name: vel.rl.commands.evaluate_env_command diff --git a/examples-configs/rl/atari/dqn_rainbow_param/atari_rainbow.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atari_rainbow.yaml index af99acd8..9e8a92cb 100644 --- a/examples-configs/rl/atari/dqn_rainbow_param/atari_rainbow.yaml +++ b/examples-configs/rl/atari/dqn_rainbow_param/atari_rainbow.yaml @@ -93,7 +93,7 @@ commands: record: name: vel.rl.commands.record_movie_command takes: 10 - videoname: 'breakout_rainbow_vid_{:04}.avi' + videoname: 'atari_rainbow_vid_{:04}.avi' fps: 15 evaluate: diff --git a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_distributional.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_distributional.yaml index 57e4fe91..20fabbd6 100644 --- a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_distributional.yaml +++ b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_distributional.yaml @@ -84,7 +84,7 @@ commands: name: vel.rl.commands.record_movie_command takes: 10 fps: 15 - videoname: 'asterix_vid_{:04}.avi' + videoname: 'atari_vid_{:04}.avi' evaluate: name: vel.rl.commands.evaluate_env_command diff --git a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_noisynet.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_noisynet.yaml index e8174023..822e3085 100644 --- a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_noisynet.yaml +++ b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_noisynet.yaml @@ -77,7 +77,7 @@ commands: record: name: vel.rl.commands.record_movie_command takes: 10 - videoname: 'asterix_vid_{:04}.avi' + videoname: 'atari_vid_{:04}.avi' fps: 15 evaluate: diff --git a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_nstep.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_nstep.yaml index 89629ac7..af118e3d 100644 --- a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_nstep.yaml +++ b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_nstep.yaml @@ -83,7 +83,7 @@ commands: record: name: vel.rl.commands.record_movie_command takes: 10 - videoname: 'atlantis_vid_{:04}.avi' + videoname: 'atari_vid_{:04}.avi' evaluate: name: vel.rl.commands.evaluate_env_command diff --git a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_raw.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_raw.yaml index 5786002b..8e7272b8 100644 --- a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_raw.yaml +++ b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_raw.yaml @@ -79,7 +79,7 @@ commands: record: name: vel.rl.commands.record_movie_command takes: 10 - videoname: 'asterix_vid_{:04}.avi' + videoname: 'atari_vid_{:04}.avi' fps: 15 evaluate: From 30e654e5c8dc99553bb3b014932f268371b933b6 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 20 Jun 2019 17:30:46 -0700 Subject: [PATCH 045/162] Added a few extra input modules. --- vel/module/input/flatten.py | 28 ++++++++++++++++++++++++++++ vel/module/input/sequence.py | 25 +++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 vel/module/input/flatten.py create mode 100644 vel/module/input/sequence.py diff --git a/vel/module/input/flatten.py b/vel/module/input/flatten.py new file mode 100644 index 00000000..d32b06f0 --- /dev/null +++ b/vel/module/input/flatten.py @@ -0,0 +1,28 @@ +from vel.module.layers import Flatten + + +from vel.api import ModelFactory, BackboneModel + + +class FlattenInput(BackboneModel): + """ Sequence input """ + def __init__(self): + super().__init__() + self.model = Flatten() + + def forward(self, input_data): + return self.model(input_data) + + +def create(): + """ Vel factory function """ + def instantiate(**_): + return Flatten() + + return ModelFactory.generic(instantiate) + + +# Scripting interface +FlattenInputFactory = create + + diff --git a/vel/module/input/sequence.py b/vel/module/input/sequence.py new file mode 100644 index 00000000..43d41ad1 --- /dev/null +++ b/vel/module/input/sequence.py @@ -0,0 +1,25 @@ +import torch.nn as nn + +from vel.api import ModelFactory, BackboneModel + + +class SequenceInput(BackboneModel): + """ Sequence input """ + def __init__(self, modules): + super().__init__() + self.model = nn.Sequential(*modules) + + def forward(self, input_data): + return self.model(input_data) + + +def create(modules): + """ Vel factory function """ + def instantiate(**_): + return SequenceInput([f.instantiate() for f in modules]) + + return ModelFactory.generic(instantiate) + + +# Scripting interface +SequenceInputFactory = create From 54352e9727e6934319135f4c1318cc7308b69051 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 20 Jun 2019 17:34:00 -0700 Subject: [PATCH 046/162] Remove blank line. --- vel/module/input/flatten.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vel/module/input/flatten.py b/vel/module/input/flatten.py index d32b06f0..0972616d 100644 --- a/vel/module/input/flatten.py +++ b/vel/module/input/flatten.py @@ -24,5 +24,3 @@ def instantiate(**_): # Scripting interface FlattenInputFactory = create - - From 99106a6098ae844700311418e9f85f8685ddda39 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 20 Jun 2019 17:41:05 -0700 Subject: [PATCH 047/162] Fixed replay env rollers a bit. --- .../trajectory_replay_env_roller.py | 24 +++++++++++-------- .../transition_replay_env_roller.py | 24 +++++++++++-------- vel/rl/module/test/test_action_head.py | 2 +- vel/rl/util/actor.py | 5 ++++ 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/vel/rl/env_roller/trajectory_replay_env_roller.py b/vel/rl/env_roller/trajectory_replay_env_roller.py index 38d1f9aa..e0407a05 100644 --- a/vel/rl/env_roller/trajectory_replay_env_roller.py +++ b/vel/rl/env_roller/trajectory_replay_env_roller.py @@ -2,9 +2,11 @@ import numpy as np from vel.api import BatchInfo +from vel.openai.baselines.common.vec_env import VecEnv from vel.rl.api import ( - Trajectories, Rollout, ReplayEnvRollerBase, ReplayEnvRollerFactoryBase, ReplayBuffer, ReplayBufferFactory, RlModel + Trajectories, Rollout, ReplayEnvRollerBase, ReplayEnvRollerFactoryBase, ReplayBuffer, ReplayBufferFactory, Policy ) +from vel.rl.util.actor import PolicyActor from vel.util.tensor_accumulator import TensorAccumulator @@ -15,11 +17,14 @@ class TrajectoryReplayEnvRoller(ReplayEnvRollerBase): Samples trajectories from the replay buffer (consecutive series of frames) """ - def __init__(self, environment, device, replay_buffer: ReplayBuffer): + def __init__(self, environment: VecEnv, policy: Policy, device: torch.device, replay_buffer: ReplayBuffer): self._environment = environment self.device = device self.replay_buffer = replay_buffer + self.actor = PolicyActor(self.environment.num_envs, policy, device) + assert not self.actor.is_stateful, "Does not support stateful policies" + # Initial observation self.last_observation_cpu = torch.from_numpy(self.environment.reset()).clone() self.last_observation = self.last_observation_cpu.to(self.device) @@ -30,15 +35,13 @@ def environment(self): return self._environment @torch.no_grad() - def rollout(self, batch_info: BatchInfo, model: RlModel, number_of_steps: int) -> Rollout: + def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: """ Calculate env rollout """ - assert not model.is_stateful, "Replay env roller does not support recurrent models" - accumulator = TensorAccumulator() episode_information = [] # List of dictionaries with episode information for step_idx in range(number_of_steps): - step = model.step(self.last_observation) + step = self.actor.act(self.last_observation) replay_extra_information = {} @@ -84,17 +87,17 @@ def rollout(self, batch_info: BatchInfo, model: RlModel, number_of_steps: int) - environment_information=episode_information, transition_tensors=accumulated_tensors, rollout_tensors={ - 'final_values': model.value(self.last_observation).cpu() + 'final_values': self.actor.value(self.last_observation).cpu() } ) - def sample(self, batch_info: BatchInfo, model: RlModel, number_of_steps: int) -> Rollout: + def sample(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: """ Sample experience from replay buffer and return a batch """ # Sample trajectories rollout = self.replay_buffer.sample_trajectories(rollout_length=number_of_steps, batch_info=batch_info) last_observations = rollout.transition_tensors['observations_next'][-1].to(self.device) - final_values = model.value(last_observations).cpu() + final_values = self.actor.value(last_observations).cpu() # Add 'final_values' to the rollout rollout.rollout_tensors['final_values'] = final_values @@ -116,11 +119,12 @@ class TrajectoryReplayEnvRollerFactory(ReplayEnvRollerFactoryBase): def __init__(self, replay_buffer_factory: ReplayBufferFactory): self.replay_buffer_factory = replay_buffer_factory - def instantiate(self, environment, device): + def instantiate(self, environment, policy, device): replay_buffer = self.replay_buffer_factory.instantiate(environment) return TrajectoryReplayEnvRoller( environment=environment, + policy=policy, device=device, replay_buffer=replay_buffer ) diff --git a/vel/rl/env_roller/transition_replay_env_roller.py b/vel/rl/env_roller/transition_replay_env_roller.py index d0ed933b..25c1541a 100644 --- a/vel/rl/env_roller/transition_replay_env_roller.py +++ b/vel/rl/env_roller/transition_replay_env_roller.py @@ -4,10 +4,12 @@ import numpy as np from vel.api import BatchInfo, ModelFactory +from vel.openai.baselines.common.vec_env import VecEnv from vel.openai.baselines.common.running_mean_std import RunningMeanStd from vel.rl.api import ( - Trajectories, Rollout, ReplayEnvRollerBase, ReplayEnvRollerFactoryBase, RlModel, ReplayBuffer, ReplayBufferFactory + Trajectories, Rollout, ReplayEnvRollerBase, ReplayEnvRollerFactoryBase, ReplayBuffer, ReplayBufferFactory, Policy ) +from vel.rl.util.actor import PolicyActor from vel.util.tensor_accumulator import TensorAccumulator @@ -18,9 +20,9 @@ class TransitionReplayEnvRoller(ReplayEnvRollerBase): Samples transitions from the replay buffer (individual frame transitions) """ - def __init__(self, environment, device, replay_buffer: ReplayBuffer, discount_factor: typing.Optional[float] = None, - normalize_returns: bool = False, forward_steps: int = 1, - action_noise: typing.Optional[nn.Module] = None): + def __init__(self, environment: VecEnv, policy: Policy, device: torch.device, replay_buffer: ReplayBuffer, + discount_factor: typing.Optional[float] = None, normalize_returns: bool = False, + forward_steps: int = 1, action_noise: typing.Optional[nn.Module] = None): self._environment = environment self.device = device self.replay_buffer = replay_buffer @@ -29,6 +31,9 @@ def __init__(self, environment, device, replay_buffer: ReplayBuffer, discount_fa self.discount_factor = discount_factor self.action_noise = action_noise.to(self.device) if action_noise is not None else None + self.actor = PolicyActor(self.environment.num_envs, policy, device) + assert not self.actor.is_stateful, "Does not support stateful policies" + if self.normalize_returns: assert self.discount_factor is not None, \ "TransitionReplayEnvRoller must have a discount factor defined if normalize_returns is turned on" @@ -53,15 +58,13 @@ def environment(self): return self._environment @torch.no_grad() - def rollout(self, batch_info: BatchInfo, model: RlModel, number_of_steps: int) -> Rollout: + def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: """ Calculate env rollout """ - assert not model.is_stateful, "Replay env roller does not support stateful models" - accumulator = TensorAccumulator() episode_information = [] # List of dictionaries with episode information for step_idx in range(number_of_steps): - step = model.step(self.last_observation) + step = self.actor.act(self.last_observation) if self.action_noise is not None: step['actions'] = self.action_noise(step['actions'], batch_info=batch_info) @@ -124,7 +127,7 @@ def rollout(self, batch_info: BatchInfo, model: RlModel, number_of_steps: int) - rollout_tensors={} ).to_transitions() - def sample(self, batch_info: BatchInfo, model: RlModel, number_of_steps: int) -> Rollout: + def sample(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: """ Sample experience from replay buffer and return a batch """ if self.forward_steps > 1: transitions = self.replay_buffer.sample_forward_transitions( @@ -166,7 +169,7 @@ def __init__(self, replay_buffer_factory: ReplayBufferFactory, discount_factor: self.discount_factor = discount_factor self.action_noise_factory = action_noise - def instantiate(self, environment, device): + def instantiate(self, environment, policy, device): replay_buffer = self.replay_buffer_factory.instantiate(environment) if self.action_noise_factory is None: @@ -176,6 +179,7 @@ def instantiate(self, environment, device): return TransitionReplayEnvRoller( environment=environment, + policy=policy, device=device, replay_buffer=replay_buffer, discount_factor=self.discount_factor, diff --git a/vel/rl/module/test/test_action_head.py b/vel/rl/module/test/test_action_head.py index 6dc22e06..b0364e5c 100644 --- a/vel/rl/module/test/test_action_head.py +++ b/vel/rl/module/test/test_action_head.py @@ -7,7 +7,7 @@ import torch.nn.functional as F import torch.distributions as d -from vel.rl.module.action_head import DiagGaussianActionHead, CategoricalActionHead +from vel.rl.module.stochastic_action_head import DiagGaussianActionHead, CategoricalActionHead def test_sample_diag_gaussian(): diff --git a/vel/rl/util/actor.py b/vel/rl/util/actor.py index a395bb53..43fc4b80 100644 --- a/vel/rl/util/actor.py +++ b/vel/rl/util/actor.py @@ -34,3 +34,8 @@ def reset_states(self, dones): def value(self, observation): """ Return value for provided observations """ return self.policy.value(observation, state=self.state) + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return self.policy.is_stateful From 772f6bcd9e3d81798c3b6a9e0152eda07a7f6774 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 20 Jun 2019 17:47:04 -0700 Subject: [PATCH 048/162] Fixed integration tests a bit for the time being. --- vel/rl/test/test_integration.py | 942 ++++++++++++++++---------------- vel/rl/util/actor.py | 2 +- 2 files changed, 473 insertions(+), 471 deletions(-) diff --git a/vel/rl/test/test_integration.py b/vel/rl/test/test_integration.py index b0433ac4..912debf1 100644 --- a/vel/rl/test/test_integration.py +++ b/vel/rl/test/test_integration.py @@ -21,11 +21,11 @@ from vel.rl.vecenv.subproc import SubprocVecEnvWrapper from vel.rl.vecenv.dummy import DummyVecEnvWrapper -from vel.rl.model.stochastic_policy_model import StochasticPolicyModelFactory -from vel.rl.model.q_stochastic_policy_model import QStochasticPolicyModelFactory -from vel.rl.model.q_model import QModelFactory -from vel.rl.model.deterministic_policy_model import DeterministicPolicyModelFactory -from vel.rl.model.stochastic_policy_model_separate import StochasticPolicyModelSeparateFactory +from vel.rl.policy.stochastic_policy import StochasticPolicyFactory +# from vel.rl.model.q_stochastic_policy_model import QStochasticPolicyModelFactory +# from vel.rl.model.q_model import QModelFactory +# from vel.rl.model.deterministic_policy_model import DeterministicPolicyModelFactory +# from vel.rl.model.stochastic_policy_model_separate import StochasticPolicyModelSeparateFactory from vel.rl.backbone.nature_cnn import NatureCnnFactory from vel.rl.backbone.mlp import MLPFactory @@ -73,7 +73,7 @@ def test_a2c_breakout(): # Again, use a helper to create a model # But because model is owned by the reinforcer, model should not be accessed using this variable # but from reinforcer.model property - model = StochasticPolicyModelFactory( + policy = StochasticPolicyFactory( input_block=ImageToTensorFactory(), backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) ).instantiate(action_space=vec_env.action_space) @@ -85,7 +85,7 @@ def test_a2c_breakout(): batch_size=256, number_of_steps=5 ), - model=model, + policy=policy, algo=A2CPolicyGradient( entropy_coefficient=0.01, value_coefficient=0.5, @@ -94,6 +94,7 @@ def test_a2c_breakout(): ), env_roller=StepEnvRoller( environment=vec_env, + policy=policy, device=CPU_DEVICE ) ) @@ -150,7 +151,7 @@ def test_ppo_breakout(): # Again, use a helper to create a model # But because model is owned by the reinforcer, model should not be accessed using this variable # but from reinforcer.model property - model = StochasticPolicyModelFactory( + policy = StochasticPolicyFactory( input_block=ImageToTensorFactory(), backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) ).instantiate(action_space=vec_env.action_space) @@ -163,7 +164,7 @@ def test_ppo_breakout(): batch_size=4, experience_replay=2, ), - model=model, + policy=policy, algo=PpoPolicyGradient( entropy_coefficient=0.01, value_coefficient=0.5, @@ -174,6 +175,7 @@ def test_ppo_breakout(): ), env_roller=StepEnvRoller( environment=vec_env, + policy=policy, device=device, ) ) @@ -214,464 +216,464 @@ def test_ppo_breakout(): training_info.on_train_end() -def test_dqn_breakout(): - """ - Simple 1 iteration of DQN breakout - """ - device = torch.device('cpu') - seed = 1001 - - # Set random seed in python std lib, numpy and pytorch - set_seed(seed) - - # Only single environment for DQN - vec_env = DummyVecEnvWrapper( - ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 - ).instantiate(parallel_envs=1, seed=seed) - - # Again, use a helper to create a model - # But because model is owned by the reinforcer, model should not be accessed using this variable - # but from reinforcer.model property - model_factory = QModelFactory( - input_block=ImageToTensorFactory(), - backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) - ) - - # Reinforcer - an object managing the learning process - reinforcer = BufferedOffPolicyIterationReinforcer( - device=device, - settings=BufferedOffPolicyIterationReinforcerSettings( - rollout_steps=4, - training_steps=1, - ), - environment=vec_env, - algo=DeepQLearning( - model_factory=model_factory, - double_dqn=False, - target_update_frequency=10_000, - discount_factor=0.99, - max_grad_norm=0.5 - ), - model=model_factory.instantiate(action_space=vec_env.action_space), - env_roller=TransitionReplayEnvRoller( - environment=vec_env, - device=device, - replay_buffer=CircularReplayBuffer( - buffer_capacity=100, - buffer_initial_size=100, - num_envs=vec_env.num_envs, - observation_space=vec_env.observation_space, - action_space=vec_env.action_space, - frame_stack_compensation=True, - frame_history=4 - ), - action_noise=EpsGreedy( - epsilon=LinearAndConstantSchedule( - initial_value=1.0, final_value=0.1, end_of_interpolation=0.1 - ), - environment=vec_env - ) - ) - ) - - # Model optimizer - optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=2.5e-4, alpha=0.95, momentum=0.95, eps=1e-3) - - # Overall information store for training information - training_info = TrainingInfo( - metrics=[ - EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode - ], - callbacks=[ - FrameTracker(100_000) - ] # Print live metrics every epoch to standard output - ) - - # A bit of training initialization bookkeeping... - training_info.initialize() - reinforcer.initialize_training(training_info) - training_info.on_train_begin() - - # Let's make 100 batches per epoch to average metrics nicely - num_epochs = 1 - - # Normal handrolled training loop - for i in range(1, num_epochs+1): - epoch_info = EpochInfo( - training_info=training_info, - global_epoch_idx=i, - batches_per_epoch=1, - optimizer=optimizer - ) - - reinforcer.train_epoch(epoch_info, interactive=False) - - training_info.on_train_end() - - -def test_prioritized_dqn_breakout(): - """ - Simple 1 iteration of DQN prioritized replay breakout - """ - device = torch.device('cpu') - seed = 1001 - - # Set random seed in python std lib, numpy and pytorch - set_seed(seed) - - # Only single environment for DQN - vec_env = DummyVecEnvWrapper( - ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 - ).instantiate(parallel_envs=1, seed=seed) - - # Again, use a helper to create a model - # But because model is owned by the reinforcer, model should not be accessed using this variable - # but from reinforcer.model property - model_factory = QModelFactory( - input_block=ImageToTensorFactory(), - backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) - ) - - # Reinforcer - an object managing the learning process - reinforcer = BufferedOffPolicyIterationReinforcer( - device=device, - settings=BufferedOffPolicyIterationReinforcerSettings( - rollout_steps=4, - training_steps=1, - ), - environment=vec_env, - algo=DeepQLearning( - model_factory=model_factory, - double_dqn=False, - target_update_frequency=10_000, - discount_factor=0.99, - max_grad_norm=0.5 - ), - model=model_factory.instantiate(action_space=vec_env.action_space), - env_roller=TransitionReplayEnvRoller( - environment=vec_env, - device=device, - replay_buffer=PrioritizedCircularReplayBuffer( - buffer_capacity=100, - buffer_initial_size=100, - num_envs=vec_env.num_envs, - observation_space=vec_env.observation_space, - action_space=vec_env.action_space, - priority_exponent=0.6, - priority_weight=LinearSchedule( - initial_value=0.4, - final_value=1.0 - ), - priority_epsilon=1.0e-6, - frame_stack_compensation=True, - frame_history=4 - ), - action_noise=EpsGreedy( - epsilon=LinearAndConstantSchedule( - initial_value=1.0, final_value=0.1, end_of_interpolation=0.1 - ), - environment=vec_env - ) - ) - ) - - # Model optimizer - optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=2.5e-4, alpha=0.95, momentum=0.95, eps=1e-3) - - # Overall information store for training information - training_info = TrainingInfo( - metrics=[ - EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode - ], - callbacks=[ - FrameTracker(100_000) - ] # Print live metrics every epoch to standard output - ) - - # A bit of training initialization bookkeeping... - training_info.initialize() - reinforcer.initialize_training(training_info) - training_info.on_train_begin() - - # Let's make 100 batches per epoch to average metrics nicely - num_epochs = 1 - - # Normal handrolled training loop - for i in range(1, num_epochs+1): - epoch_info = EpochInfo( - training_info=training_info, - global_epoch_idx=i, - batches_per_epoch=1, - optimizer=optimizer - ) - - reinforcer.train_epoch(epoch_info, interactive=False) - - training_info.on_train_end() - - -def test_ddpg_bipedal_walker(): - """ - 1 iteration of DDPG bipedal walker environment - """ - device = torch.device('cpu') - seed = 1001 - - # Set random seed in python std lib, numpy and pytorch - set_seed(seed) - - # Only single environment for DDPG - - vec_env = DummyVecEnvWrapper( - MujocoEnv('BipedalWalker-v2') - ).instantiate(parallel_envs=1, seed=seed) - - # Again, use a helper to create a model - # But because model is owned by the reinforcer, model should not be accessed using this variable - # but from reinforcer.model property - model_factory = DeterministicPolicyModelFactory( - input_block=NormalizeObservationsFactory(input_shape=24), - policy_backbone=MLPFactory(input_length=24, hidden_layers=[64, 64], normalization='layer'), - value_backbone=MLPFactory(input_length=28, hidden_layers=[64, 64], normalization='layer') - ) - - # Reinforcer - an object managing the learning process - reinforcer = BufferedOffPolicyIterationReinforcer( - device=device, - settings=BufferedOffPolicyIterationReinforcerSettings( - rollout_steps=4, - training_steps=1, - ), - environment=vec_env, - algo=DeepDeterministicPolicyGradient( - model_factory=model_factory, - tau=0.01, - discount_factor=0.99, - max_grad_norm=0.5 - ), - model=model_factory.instantiate(action_space=vec_env.action_space), - env_roller=TransitionReplayEnvRoller( - environment=vec_env, - device=device, - action_noise=OuNoise(std_dev=0.2, environment=vec_env), - replay_buffer=CircularReplayBuffer( - buffer_capacity=100, - buffer_initial_size=100, - num_envs=vec_env.num_envs, - observation_space=vec_env.observation_space, - action_space=vec_env.action_space - ), - normalize_returns=True, - discount_factor=0.99 - ), - ) - - # Model optimizer - optimizer = optim.Adam(reinforcer.model.parameters(), lr=2.5e-4, eps=1e-4) - - # Overall information store for training information - training_info = TrainingInfo( - metrics=[ - EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode - ], - callbacks=[ - FrameTracker(100_000) - ] # Print live metrics every epoch to standard output - ) - - # A bit of training initialization bookkeeping... - training_info.initialize() - reinforcer.initialize_training(training_info) - training_info.on_train_begin() - - # Let's make 100 batches per epoch to average metrics nicely - num_epochs = 1 - - # Normal handrolled training loop - for i in range(1, num_epochs+1): - epoch_info = EpochInfo( - training_info=training_info, - global_epoch_idx=i, - batches_per_epoch=1, - optimizer=optimizer - ) - - reinforcer.train_epoch(epoch_info, interactive=False) - - training_info.on_train_end() - - -def test_trpo_bipedal_walker(): - """ - 1 iteration of TRPO on bipedal walker - """ - device = torch.device('cpu') - seed = 1001 - - # Set random seed in python std lib, numpy and pytorch - set_seed(seed) - - vec_env = DummyVecEnvWrapper( - MujocoEnv('BipedalWalker-v2', normalize_returns=True), - ).instantiate(parallel_envs=8, seed=seed) - - # Again, use a helper to create a model - # But because model is owned by the reinforcer, model should not be accessed using this variable - # but from reinforcer.model property - model_factory = StochasticPolicyModelSeparateFactory( - input_block=NormalizeObservationsFactory(input_shape=24), - policy_backbone=MLPFactory(input_length=24, hidden_layers=[32, 32]), - value_backbone=MLPFactory(input_length=24, hidden_layers=[32]) - ) - - # Reinforcer - an object managing the learning process - reinforcer = OnPolicyIterationReinforcer( - device=device, - settings=OnPolicyIterationReinforcerSettings( - number_of_steps=12, - ), - model=model_factory.instantiate(action_space=vec_env.action_space), - algo=TrpoPolicyGradient( - max_kl=0.01, - cg_iters=10, - line_search_iters=10, - improvement_acceptance_ratio=0.1, - cg_damping=0.1, - vf_iters=5, - entropy_coef=0.0, - discount_factor=0.99, - max_grad_norm=0.5, - gae_lambda=1.0 - ), - env_roller=StepEnvRoller( - environment=vec_env, - device=device, - ) - ) - - # Model optimizer - optimizer = optim.Adam(reinforcer.model.parameters(), lr=1.0e-3, eps=1e-4) - - # Overall information store for training information - training_info = TrainingInfo( - metrics=[ - EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode - ], - callbacks=[ - FrameTracker(100_000) - ] # Print live metrics every epoch to standard output - ) - - # A bit of training initialization bookkeeping... - training_info.initialize() - reinforcer.initialize_training(training_info) - training_info.on_train_begin() - - # Let's make 100 batches per epoch to average metrics nicely - num_epochs = 1 - - # Normal handrolled training loop - for i in range(1, num_epochs+1): - epoch_info = EpochInfo( - training_info=training_info, - global_epoch_idx=i, - batches_per_epoch=1, - optimizer=optimizer - ) - - reinforcer.train_epoch(epoch_info, interactive=False) - - training_info.on_train_end() - - -def test_acer_breakout(): - """ - 1 iteration of ACER on breakout environment - """ - device = torch.device('cpu') - seed = 1001 - - # Set random seed in python std lib, numpy and pytorch - set_seed(seed) - - # Create 16 environments evaluated in parallel in sub processess with all usual DeepMind wrappers - # These are just helper functions for that - vec_env = SubprocVecEnvWrapper( - ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 - ).instantiate(parallel_envs=16, seed=seed) - - # Again, use a helper to create a model - # But because model is owned by the reinforcer, model should not be accessed using this variable - # but from reinforcer.model property - model_factory = QStochasticPolicyModelFactory( - input_block=ImageToTensorFactory(), - backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) - ) - - # Reinforcer - an object managing the learning process - reinforcer = BufferedMixedPolicyIterationReinforcer( - device=device, - settings=BufferedMixedPolicyIterationReinforcerSettings( - experience_replay=2, - number_of_steps=12, - stochastic_experience_replay=False - ), - model=model_factory.instantiate(action_space=vec_env.action_space), - env=vec_env, - algo=AcerPolicyGradient( - model_factory=model_factory, - entropy_coefficient=0.01, - q_coefficient=0.5, - rho_cap=10.0, - retrace_rho_cap=1.0, - trust_region=True, - trust_region_delta=1.0, - discount_factor=0.99, - max_grad_norm=10.0, - ), - env_roller=TrajectoryReplayEnvRoller( - environment=vec_env, - device=device, - replay_buffer=CircularReplayBuffer( - buffer_capacity=100, - buffer_initial_size=100, - num_envs=vec_env.num_envs, - action_space=vec_env.action_space, - observation_space=vec_env.observation_space, - frame_stack_compensation=True, - frame_history=4, - ) - ), - ) - - # Model optimizer - optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=7.0e-4, eps=1e-3, alpha=0.99) - - # Overall information store for training information - training_info = TrainingInfo( - metrics=[ - EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode - ], - callbacks=[] # Print live metrics every epoch to standard output - ) - - # A bit of training initialization bookkeeping... - training_info.initialize() - reinforcer.initialize_training(training_info) - training_info.on_train_begin() - - # Let's make 100 batches per epoch to average metrics nicely - num_epochs = 1 - - # Normal handrolled training loop - for i in range(1, num_epochs+1): - epoch_info = EpochInfo( - training_info=training_info, - global_epoch_idx=i, - batches_per_epoch=1, - optimizer=optimizer - ) - - reinforcer.train_epoch(epoch_info, interactive=False) - - training_info.on_train_end() +# def test_dqn_breakout(): +# """ +# Simple 1 iteration of DQN breakout +# """ +# device = torch.device('cpu') +# seed = 1001 +# +# # Set random seed in python std lib, numpy and pytorch +# set_seed(seed) +# +# # Only single environment for DQN +# vec_env = DummyVecEnvWrapper( +# ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 +# ).instantiate(parallel_envs=1, seed=seed) +# +# # Again, use a helper to create a model +# # But because model is owned by the reinforcer, model should not be accessed using this variable +# # but from reinforcer.model property +# model_factory = QModelFactory( +# input_block=ImageToTensorFactory(), +# backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) +# ) +# +# # Reinforcer - an object managing the learning process +# reinforcer = BufferedOffPolicyIterationReinforcer( +# device=device, +# settings=BufferedOffPolicyIterationReinforcerSettings( +# rollout_steps=4, +# training_steps=1, +# ), +# environment=vec_env, +# algo=DeepQLearning( +# model_factory=model_factory, +# double_dqn=False, +# target_update_frequency=10_000, +# discount_factor=0.99, +# max_grad_norm=0.5 +# ), +# model=model_factory.instantiate(action_space=vec_env.action_space), +# env_roller=TransitionReplayEnvRoller( +# environment=vec_env, +# device=device, +# replay_buffer=CircularReplayBuffer( +# buffer_capacity=100, +# buffer_initial_size=100, +# num_envs=vec_env.num_envs, +# observation_space=vec_env.observation_space, +# action_space=vec_env.action_space, +# frame_stack_compensation=True, +# frame_history=4 +# ), +# action_noise=EpsGreedy( +# epsilon=LinearAndConstantSchedule( +# initial_value=1.0, final_value=0.1, end_of_interpolation=0.1 +# ), +# environment=vec_env +# ) +# ) +# ) +# +# # Model optimizer +# optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=2.5e-4, alpha=0.95, momentum=0.95, eps=1e-3) +# +# # Overall information store for training information +# training_info = TrainingInfo( +# metrics=[ +# EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode +# ], +# callbacks=[ +# FrameTracker(100_000) +# ] # Print live metrics every epoch to standard output +# ) +# +# # A bit of training initialization bookkeeping... +# training_info.initialize() +# reinforcer.initialize_training(training_info) +# training_info.on_train_begin() +# +# # Let's make 100 batches per epoch to average metrics nicely +# num_epochs = 1 +# +# # Normal handrolled training loop +# for i in range(1, num_epochs+1): +# epoch_info = EpochInfo( +# training_info=training_info, +# global_epoch_idx=i, +# batches_per_epoch=1, +# optimizer=optimizer +# ) +# +# reinforcer.train_epoch(epoch_info, interactive=False) +# +# training_info.on_train_end() +# +# +# def test_prioritized_dqn_breakout(): +# """ +# Simple 1 iteration of DQN prioritized replay breakout +# """ +# device = torch.device('cpu') +# seed = 1001 +# +# # Set random seed in python std lib, numpy and pytorch +# set_seed(seed) +# +# # Only single environment for DQN +# vec_env = DummyVecEnvWrapper( +# ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 +# ).instantiate(parallel_envs=1, seed=seed) +# +# # Again, use a helper to create a model +# # But because model is owned by the reinforcer, model should not be accessed using this variable +# # but from reinforcer.model property +# model_factory = QModelFactory( +# input_block=ImageToTensorFactory(), +# backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) +# ) +# +# # Reinforcer - an object managing the learning process +# reinforcer = BufferedOffPolicyIterationReinforcer( +# device=device, +# settings=BufferedOffPolicyIterationReinforcerSettings( +# rollout_steps=4, +# training_steps=1, +# ), +# environment=vec_env, +# algo=DeepQLearning( +# model_factory=model_factory, +# double_dqn=False, +# target_update_frequency=10_000, +# discount_factor=0.99, +# max_grad_norm=0.5 +# ), +# model=model_factory.instantiate(action_space=vec_env.action_space), +# env_roller=TransitionReplayEnvRoller( +# environment=vec_env, +# device=device, +# replay_buffer=PrioritizedCircularReplayBuffer( +# buffer_capacity=100, +# buffer_initial_size=100, +# num_envs=vec_env.num_envs, +# observation_space=vec_env.observation_space, +# action_space=vec_env.action_space, +# priority_exponent=0.6, +# priority_weight=LinearSchedule( +# initial_value=0.4, +# final_value=1.0 +# ), +# priority_epsilon=1.0e-6, +# frame_stack_compensation=True, +# frame_history=4 +# ), +# action_noise=EpsGreedy( +# epsilon=LinearAndConstantSchedule( +# initial_value=1.0, final_value=0.1, end_of_interpolation=0.1 +# ), +# environment=vec_env +# ) +# ) +# ) +# +# # Model optimizer +# optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=2.5e-4, alpha=0.95, momentum=0.95, eps=1e-3) +# +# # Overall information store for training information +# training_info = TrainingInfo( +# metrics=[ +# EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode +# ], +# callbacks=[ +# FrameTracker(100_000) +# ] # Print live metrics every epoch to standard output +# ) +# +# # A bit of training initialization bookkeeping... +# training_info.initialize() +# reinforcer.initialize_training(training_info) +# training_info.on_train_begin() +# +# # Let's make 100 batches per epoch to average metrics nicely +# num_epochs = 1 +# +# # Normal handrolled training loop +# for i in range(1, num_epochs+1): +# epoch_info = EpochInfo( +# training_info=training_info, +# global_epoch_idx=i, +# batches_per_epoch=1, +# optimizer=optimizer +# ) +# +# reinforcer.train_epoch(epoch_info, interactive=False) +# +# training_info.on_train_end() +# +# +# def test_ddpg_bipedal_walker(): +# """ +# 1 iteration of DDPG bipedal walker environment +# """ +# device = torch.device('cpu') +# seed = 1001 +# +# # Set random seed in python std lib, numpy and pytorch +# set_seed(seed) +# +# # Only single environment for DDPG +# +# vec_env = DummyVecEnvWrapper( +# MujocoEnv('BipedalWalker-v2') +# ).instantiate(parallel_envs=1, seed=seed) +# +# # Again, use a helper to create a model +# # But because model is owned by the reinforcer, model should not be accessed using this variable +# # but from reinforcer.model property +# model_factory = DeterministicPolicyModelFactory( +# input_block=NormalizeObservationsFactory(input_shape=24), +# policy_backbone=MLPFactory(input_length=24, hidden_layers=[64, 64], normalization='layer'), +# value_backbone=MLPFactory(input_length=28, hidden_layers=[64, 64], normalization='layer') +# ) +# +# # Reinforcer - an object managing the learning process +# reinforcer = BufferedOffPolicyIterationReinforcer( +# device=device, +# settings=BufferedOffPolicyIterationReinforcerSettings( +# rollout_steps=4, +# training_steps=1, +# ), +# environment=vec_env, +# algo=DeepDeterministicPolicyGradient( +# model_factory=model_factory, +# tau=0.01, +# discount_factor=0.99, +# max_grad_norm=0.5 +# ), +# model=model_factory.instantiate(action_space=vec_env.action_space), +# env_roller=TransitionReplayEnvRoller( +# environment=vec_env, +# device=device, +# action_noise=OuNoise(std_dev=0.2, environment=vec_env), +# replay_buffer=CircularReplayBuffer( +# buffer_capacity=100, +# buffer_initial_size=100, +# num_envs=vec_env.num_envs, +# observation_space=vec_env.observation_space, +# action_space=vec_env.action_space +# ), +# normalize_returns=True, +# discount_factor=0.99 +# ), +# ) +# +# # Model optimizer +# optimizer = optim.Adam(reinforcer.model.parameters(), lr=2.5e-4, eps=1e-4) +# +# # Overall information store for training information +# training_info = TrainingInfo( +# metrics=[ +# EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode +# ], +# callbacks=[ +# FrameTracker(100_000) +# ] # Print live metrics every epoch to standard output +# ) +# +# # A bit of training initialization bookkeeping... +# training_info.initialize() +# reinforcer.initialize_training(training_info) +# training_info.on_train_begin() +# +# # Let's make 100 batches per epoch to average metrics nicely +# num_epochs = 1 +# +# # Normal handrolled training loop +# for i in range(1, num_epochs+1): +# epoch_info = EpochInfo( +# training_info=training_info, +# global_epoch_idx=i, +# batches_per_epoch=1, +# optimizer=optimizer +# ) +# +# reinforcer.train_epoch(epoch_info, interactive=False) +# +# training_info.on_train_end() +# +# +# def test_trpo_bipedal_walker(): +# """ +# 1 iteration of TRPO on bipedal walker +# """ +# device = torch.device('cpu') +# seed = 1001 +# +# # Set random seed in python std lib, numpy and pytorch +# set_seed(seed) +# +# vec_env = DummyVecEnvWrapper( +# MujocoEnv('BipedalWalker-v2', normalize_returns=True), +# ).instantiate(parallel_envs=8, seed=seed) +# +# # Again, use a helper to create a model +# # But because model is owned by the reinforcer, model should not be accessed using this variable +# # but from reinforcer.model property +# model_factory = StochasticPolicyModelSeparateFactory( +# input_block=NormalizeObservationsFactory(input_shape=24), +# policy_backbone=MLPFactory(input_length=24, hidden_layers=[32, 32]), +# value_backbone=MLPFactory(input_length=24, hidden_layers=[32]) +# ) +# +# # Reinforcer - an object managing the learning process +# reinforcer = OnPolicyIterationReinforcer( +# device=device, +# settings=OnPolicyIterationReinforcerSettings( +# number_of_steps=12, +# ), +# model=model_factory.instantiate(action_space=vec_env.action_space), +# algo=TrpoPolicyGradient( +# max_kl=0.01, +# cg_iters=10, +# line_search_iters=10, +# improvement_acceptance_ratio=0.1, +# cg_damping=0.1, +# vf_iters=5, +# entropy_coef=0.0, +# discount_factor=0.99, +# max_grad_norm=0.5, +# gae_lambda=1.0 +# ), +# env_roller=StepEnvRoller( +# environment=vec_env, +# device=device, +# ) +# ) +# +# # Model optimizer +# optimizer = optim.Adam(reinforcer.model.parameters(), lr=1.0e-3, eps=1e-4) +# +# # Overall information store for training information +# training_info = TrainingInfo( +# metrics=[ +# EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode +# ], +# callbacks=[ +# FrameTracker(100_000) +# ] # Print live metrics every epoch to standard output +# ) +# +# # A bit of training initialization bookkeeping... +# training_info.initialize() +# reinforcer.initialize_training(training_info) +# training_info.on_train_begin() +# +# # Let's make 100 batches per epoch to average metrics nicely +# num_epochs = 1 +# +# # Normal handrolled training loop +# for i in range(1, num_epochs+1): +# epoch_info = EpochInfo( +# training_info=training_info, +# global_epoch_idx=i, +# batches_per_epoch=1, +# optimizer=optimizer +# ) +# +# reinforcer.train_epoch(epoch_info, interactive=False) +# +# training_info.on_train_end() +# +# +# def test_acer_breakout(): +# """ +# 1 iteration of ACER on breakout environment +# """ +# device = torch.device('cpu') +# seed = 1001 +# +# # Set random seed in python std lib, numpy and pytorch +# set_seed(seed) +# +# # Create 16 environments evaluated in parallel in sub processess with all usual DeepMind wrappers +# # These are just helper functions for that +# vec_env = SubprocVecEnvWrapper( +# ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 +# ).instantiate(parallel_envs=16, seed=seed) +# +# # Again, use a helper to create a model +# # But because model is owned by the reinforcer, model should not be accessed using this variable +# # but from reinforcer.model property +# model_factory = QStochasticPolicyModelFactory( +# input_block=ImageToTensorFactory(), +# backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) +# ) +# +# # Reinforcer - an object managing the learning process +# reinforcer = BufferedMixedPolicyIterationReinforcer( +# device=device, +# settings=BufferedMixedPolicyIterationReinforcerSettings( +# experience_replay=2, +# number_of_steps=12, +# stochastic_experience_replay=False +# ), +# model=model_factory.instantiate(action_space=vec_env.action_space), +# env=vec_env, +# algo=AcerPolicyGradient( +# model_factory=model_factory, +# entropy_coefficient=0.01, +# q_coefficient=0.5, +# rho_cap=10.0, +# retrace_rho_cap=1.0, +# trust_region=True, +# trust_region_delta=1.0, +# discount_factor=0.99, +# max_grad_norm=10.0, +# ), +# env_roller=TrajectoryReplayEnvRoller( +# environment=vec_env, +# device=device, +# replay_buffer=CircularReplayBuffer( +# buffer_capacity=100, +# buffer_initial_size=100, +# num_envs=vec_env.num_envs, +# action_space=vec_env.action_space, +# observation_space=vec_env.observation_space, +# frame_stack_compensation=True, +# frame_history=4, +# ) +# ), +# ) +# +# # Model optimizer +# optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=7.0e-4, eps=1e-3, alpha=0.99) +# +# # Overall information store for training information +# training_info = TrainingInfo( +# metrics=[ +# EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode +# ], +# callbacks=[] # Print live metrics every epoch to standard output +# ) +# +# # A bit of training initialization bookkeeping... +# training_info.initialize() +# reinforcer.initialize_training(training_info) +# training_info.on_train_begin() +# +# # Let's make 100 batches per epoch to average metrics nicely +# num_epochs = 1 +# +# # Normal handrolled training loop +# for i in range(1, num_epochs+1): +# epoch_info = EpochInfo( +# training_info=training_info, +# global_epoch_idx=i, +# batches_per_epoch=1, +# optimizer=optimizer +# ) +# +# reinforcer.train_epoch(epoch_info, interactive=False) +# +# training_info.on_train_end() diff --git a/vel/rl/util/actor.py b/vel/rl/util/actor.py index 43fc4b80..a858c4a7 100644 --- a/vel/rl/util/actor.py +++ b/vel/rl/util/actor.py @@ -9,7 +9,7 @@ class PolicyActor: def __init__(self, num_envs: int, policy: Policy, device: torch.device): self.num_envs = num_envs - self.policy = policy + self.policy = policy.to(device) self.device = device self.state = to_device(self.policy.zero_state(num_envs), self.device) From fe2443e9e9466a80c67d3bb80686ee9009627945 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 20 Jun 2019 19:31:05 -0700 Subject: [PATCH 049/162] Implemented some useful new backbones. --- vel/rl/backbone/lstm.py | 20 ------------ vel/rl/backbone/mlp_rnn.py | 62 ++++++++++++++++++++++++++++++++++++++ vel/rl/backbone/rnn.py | 50 ++++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+), 20 deletions(-) delete mode 100644 vel/rl/backbone/lstm.py create mode 100644 vel/rl/backbone/mlp_rnn.py create mode 100644 vel/rl/backbone/rnn.py diff --git a/vel/rl/backbone/lstm.py b/vel/rl/backbone/lstm.py deleted file mode 100644 index 50356d07..00000000 --- a/vel/rl/backbone/lstm.py +++ /dev/null @@ -1,20 +0,0 @@ -from vel.api import LinearBackboneModel - - -class LstmBackbone(LinearBackboneModel): - """ - Simple 'LSTM' model backbone - """ - - def __init__(self, input_size, hidden_units): - super().__init__() - - self.input_size = input_size - self.hidden_units = hidden_units - - def forward(self, input_data, masks, state): - raise NotImplementedError - - def initial_state(self): - """ Initial state of the network """ - raise NotImplementedError diff --git a/vel/rl/backbone/mlp_rnn.py b/vel/rl/backbone/mlp_rnn.py new file mode 100644 index 00000000..d5229d7f --- /dev/null +++ b/vel/rl/backbone/mlp_rnn.py @@ -0,0 +1,62 @@ +import typing + +from vel.api import LinearBackboneModel, ModelFactory +from vel.rl.backbone.mlp import MLP +from vel.rl.backbone.rnn import RNN + + +class MlpRnn(LinearBackboneModel): + """ MLP followed by an RNN - another simple policy backbone """ + + def __init__(self, input_length: int, mlp_layers: typing.List[int], rnn_units: int, rnn_type: str = 'lstm', + mlp_activation: str = 'tanh', mlp_normalization: typing.Optional[str] = None): + super().__init__() + + self.mlp = MLP( + input_length=input_length, hidden_layers=mlp_layers, activation=mlp_activation, + normalization=mlp_normalization + ) + + self.rnn = RNN(input_length=self.mlp.output_dim, hidden_units=rnn_units, rnn_type=rnn_type) + + @property + def output_dim(self) -> int: + return self.rnn.output_dim + + @property + def state_dim(self) -> int: + """ Initial state of the network """ + return self.rnn.state_dim + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return True + + def zero_state(self, batch_size): + """ Potential state for the model """ + return self.rnn.zero_state(batch_size) + + def forward(self, input_data, state): + mlp_output = self.mlp(input_data) + hidden_state, new_state = self.rnn(mlp_output, state) + return hidden_state, new_state + + +def create(input_length: int, mlp_layers: typing.List[int], rnn_units: int, rnn_type: str = 'lstm', + mlp_activation: str = 'tanh', mlp_normalization: typing.Optional[str] = None): + """ Vel factory function """ + def instantiate(**_): + return MlpRnn( + input_length=input_length, + mlp_layers=mlp_layers, + rnn_units=rnn_units, + rnn_type=rnn_type, + mlp_activation=mlp_activation, + mlp_normalization=mlp_normalization + ) + + return ModelFactory.generic(instantiate) + + +MlpRnnFactory = create diff --git a/vel/rl/backbone/rnn.py b/vel/rl/backbone/rnn.py new file mode 100644 index 00000000..973345d4 --- /dev/null +++ b/vel/rl/backbone/rnn.py @@ -0,0 +1,50 @@ +from vel.api import LinearBackboneModel, ModelFactory +from vel.module.rnn_cell import RnnCell + + +class RNN(LinearBackboneModel): + """ Simple recurrent model backbone """ + + def __init__(self, input_length: int, hidden_units: int, rnn_type: str = 'lstm'): + super().__init__() + + self.input_length = input_length + self.hidden_units = hidden_units + + self.rnn_cell = RnnCell(input_size=input_length, hidden_size=self.hidden_units, rnn_type=rnn_type) + + @property + def output_dim(self) -> int: + return self.rnn_cell.output_dim + + @property + def state_dim(self) -> int: + """ Initial state of the network """ + return self.rnn_cell.state_dim + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return True + + def zero_state(self, batch_size): + """ Potential state for the model """ + return self.rnn_cell.zero_state(batch_size) + + def forward(self, input_data, state): + hidden_state, new_state = self.rnn_cell(input_data, state) + return hidden_state, new_state + + +def create(input_length: int, hidden_units: int, rnn_type: str = 'lstm'): + """ Vel factory function """ + def instantiate(**_): + return RNN( + input_length=input_length, + hidden_units=hidden_units, + rnn_type=rnn_type + ) + return ModelFactory.generic(instantiate) + + +RNNFactory = create From ecd5c8535b35beef1278e6c11410fd51c515eed9 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 20 Jun 2019 19:42:40 -0700 Subject: [PATCH 050/162] Added potential output directory override. --- vel/api/model_config.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vel/api/model_config.py b/vel/api/model_config.py index 23488916..4426dd81 100644 --- a/vel/api/model_config.py +++ b/vel/api/model_config.py @@ -112,6 +112,12 @@ def __init__(self, filename: str, configuration: dict, run_number: int, project_ del self.contents['commands'] self.provider = Provider(self._prepare_environment(), {'model_config': self}, parameters=parameters) + + if self.provider.has_name('output_directory'): + self.output_directory_name = self.provider.get("output_directory") + else: + self.output_directory_name = 'output' + self._model_name = self.provider.get("name") def _prepare_environment(self) -> dict: @@ -153,7 +159,7 @@ def project_data_dir(self, *args) -> str: def output_dir(self, *args) -> str: """ Directory where to store output """ - return os.path.join(self.project_dir, 'output', *args) + return os.path.join(self.project_dir, self.output_directory_name, *args) def project_top_dir(self, *args) -> str: """ Project top-level directory """ From 6de2f6c387828cc0e8ac998c695097f74ef351cc Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 22 Jun 2019 11:27:30 -0700 Subject: [PATCH 051/162] Updated requirements. --- setup.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8a8fa5b9..0fd152f6 100644 --- a/setup.py +++ b/setup.py @@ -41,12 +41,16 @@ ], extras_require={ 'visdom': ['visdom'], + 'tensorboard': ['tb-nightly'], 'mongo': ['pymongo', 'dnspython'], 'gym': ['gym[atari,box2d,classic_control]'], 'mujoco': ['gym[mujoco,robotics]'], 'dev': ['pytest', 'ipython', 'jupyter', 'pip-tools', 'flake8', 'pytest-xdist'], 'text': ['spacy'], - 'all': ['visdom', 'pymongo', 'dnspython', 'gym[all]', 'pytest', 'spacy', 'ipython', 'jupyter'] + 'all': [ + 'visdom', 'pymongo', 'dnspython', 'gym[all]', 'pytest', 'spacy', 'ipython', 'jupyter', + 'pip-tools', 'flake8', 'pytest-xdist', 'tb-nightly' + ] }, tests_require=[ 'pytest' From 6d6679f7b350a30f13eca772956eda2607ecb02d Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 22 Jun 2019 11:27:40 -0700 Subject: [PATCH 052/162] New version of some dependencies. --- requirements.txt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index e09fcbe8..a0cce232 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,17 +4,17 @@ # # pip-compile # -atari-py==0.1.15 # via gym +atari-py==0.2.0 # via gym atomicwrites==1.3.0 # via pytest attrs==19.1.0 box2d-py==2.3.8 # via gym -certifi==2019.3.9 # via requests +certifi==2019.6.16 # via requests chardet==3.0.4 # via requests cloudpickle==1.2.1 cycler==0.10.0 # via matplotlib dnspython==1.16.0 future==0.17.1 # via pyglet -gym[atari,box2d,classic_control]==0.12.5 +gym[atari,box2d,classic_control]==0.13.0 idna==2.8 # via requests importlib-metadata==0.18 # via pluggy, pytest joblib==0.13.2 # via scikit-learn @@ -45,9 +45,12 @@ torchfile==0.1.0 # via visdom torchtext==0.3.1 torchvision==0.3.0 tornado==6.0.2 # via visdom -tqdm==4.32.1 +tqdm==4.32.2 urllib3==1.25.3 # via requests visdom==0.1.8.8 wcwidth==0.1.7 # via pytest websocket-client==0.56.0 # via visdom zipp==0.5.1 # via importlib-metadata + +# The following packages are considered to be unsafe in a requirements file: +# setuptools==41.0.1 # via kiwisolver From 5c048541b0dfcc5e5e54b738da7317894b29550d Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 23 Jun 2019 17:52:35 -0700 Subject: [PATCH 053/162] Large refactoring - work in progress. --- ...cnn_autoencoder.yaml => mnist_cnn_ae.yaml} | 0 .../classification/mnist/mnist_cnn_01.yaml | 23 +- vel/api/__init__.py | 8 +- vel/api/augmentation.py | 19 -- vel/api/dataflow.py | 42 --- vel/api/model.py | 18 +- vel/api/source.py | 280 +++++++++--------- vel/api/train_phase.py | 5 +- vel/api/transformation.py | 43 +++ vel/augmentation/to_tensor.py | 26 -- vel/{augmentation => calc}/__init__.py | 0 vel/{math => calc}/function.py | 0 vel/{math => calc}/process.py | 0 vel/command/augvis_command.py | 20 +- vel/command/train_command.py | 16 +- vel/data/__init__.py | 3 +- .../tta => data/augmentation}/__init__.py | 0 vel/{ => data}/augmentation/center_crop.py | 0 vel/{ => data}/augmentation/normalize.py | 2 +- vel/{ => data}/augmentation/random_crop.py | 0 .../augmentation/random_horizontal_flip.py | 2 +- .../augmentation/random_lighting.py | 2 +- vel/{ => data}/augmentation/random_rotate.py | 0 vel/{ => data}/augmentation/random_scale.py | 0 vel/{ => data}/augmentation/scale_min_size.py | 0 .../augmentation/tta}/__init__.py | 0 vel/{ => data}/augmentation/tta/train_tta.py | 0 vel/{ => data}/augmentation/unsupervised.py | 0 vel/data/dataflow.py | 55 ++++ vel/data/loader.py | 71 +++++ vel/{phase => data/operation}/__init__.py | 0 vel/data/{ => operation}/image_op.py | 0 vel/{schedule => data/source}/__init__.py | 0 vel/{ => data}/source/img_dir_source.py | 0 vel/{source => data/source/nlp}/__init__.py | 0 vel/{ => data}/source/nlp/imdb.py | 0 vel/{ => data}/source/nlp/multi30k.py | 0 vel/{ => data}/source/nlp/text_url.py | 0 vel/{ => data}/source/nlp/wmt14.py | 0 .../nlp => data/source/vision}/__init__.py | 0 vel/{ => data}/source/vision/cifar10.py | 0 vel/data/source/vision/mnist.py | 51 ++++ .../transformation}/__init__.py | 0 vel/data/transformation/image_to_tensor.py | 31 ++ .../transformation}/to_array.py | 10 +- vel/function/__init__.py | 0 vel/{schedule => function}/constant.py | 0 vel/{schedule => function}/linear.py | 0 .../linear_and_constant.py | 0 vel/model/autoencoder/mnist_cnn_vae.py | 4 +- vel/source/vision/mnist.py | 39 --- vel/train/__init__.py | 1 + vel/train/phase/__init__.py | 0 vel/{ => train}/phase/cycle.py | 0 vel/{ => train}/phase/freeze.py | 0 vel/{ => train}/phase/generic.py | 0 vel/{ => train}/phase/unfreeze.py | 0 vel/{api/learner.py => train/trainer.py} | 49 +-- 58 files changed, 483 insertions(+), 337 deletions(-) rename examples-configs/autoencoder/mnist/{mnist_cnn_autoencoder.yaml => mnist_cnn_ae.yaml} (100%) delete mode 100644 vel/api/augmentation.py delete mode 100644 vel/api/dataflow.py create mode 100644 vel/api/transformation.py delete mode 100644 vel/augmentation/to_tensor.py rename vel/{augmentation => calc}/__init__.py (100%) rename vel/{math => calc}/function.py (100%) rename vel/{math => calc}/process.py (100%) rename vel/{augmentation/tta => data/augmentation}/__init__.py (100%) rename vel/{ => data}/augmentation/center_crop.py (100%) rename vel/{ => data}/augmentation/normalize.py (94%) rename vel/{ => data}/augmentation/random_crop.py (100%) rename vel/{ => data}/augmentation/random_horizontal_flip.py (93%) rename vel/{ => data}/augmentation/random_lighting.py (94%) rename vel/{ => data}/augmentation/random_rotate.py (100%) rename vel/{ => data}/augmentation/random_scale.py (100%) rename vel/{ => data}/augmentation/scale_min_size.py (100%) rename vel/{math => data/augmentation/tta}/__init__.py (100%) rename vel/{ => data}/augmentation/tta/train_tta.py (100%) rename vel/{ => data}/augmentation/unsupervised.py (100%) create mode 100644 vel/data/dataflow.py create mode 100644 vel/data/loader.py rename vel/{phase => data/operation}/__init__.py (100%) rename vel/data/{ => operation}/image_op.py (100%) rename vel/{schedule => data/source}/__init__.py (100%) rename vel/{ => data}/source/img_dir_source.py (100%) rename vel/{source => data/source/nlp}/__init__.py (100%) rename vel/{ => data}/source/nlp/imdb.py (100%) rename vel/{ => data}/source/nlp/multi30k.py (100%) rename vel/{ => data}/source/nlp/text_url.py (100%) rename vel/{ => data}/source/nlp/wmt14.py (100%) rename vel/{source/nlp => data/source/vision}/__init__.py (100%) rename vel/{ => data}/source/vision/cifar10.py (100%) create mode 100644 vel/data/source/vision/mnist.py rename vel/{source/vision => data/transformation}/__init__.py (100%) create mode 100644 vel/data/transformation/image_to_tensor.py rename vel/{augmentation => data/transformation}/to_array.py (56%) create mode 100644 vel/function/__init__.py rename vel/{schedule => function}/constant.py (100%) rename vel/{schedule => function}/linear.py (100%) rename vel/{schedule => function}/linear_and_constant.py (100%) delete mode 100644 vel/source/vision/mnist.py create mode 100644 vel/train/__init__.py create mode 100644 vel/train/phase/__init__.py rename vel/{ => train}/phase/cycle.py (100%) rename vel/{ => train}/phase/freeze.py (100%) rename vel/{ => train}/phase/generic.py (100%) rename vel/{ => train}/phase/unfreeze.py (100%) rename vel/{api/learner.py => train/trainer.py} (65%) diff --git a/examples-configs/autoencoder/mnist/mnist_cnn_autoencoder.yaml b/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml similarity index 100% rename from examples-configs/autoencoder/mnist/mnist_cnn_autoencoder.yaml rename to examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml diff --git a/examples-configs/classification/mnist/mnist_cnn_01.yaml b/examples-configs/classification/mnist/mnist_cnn_01.yaml index 8d3ed19b..b0f75729 100644 --- a/examples-configs/classification/mnist/mnist_cnn_01.yaml +++ b/examples-configs/classification/mnist/mnist_cnn_01.yaml @@ -2,7 +2,7 @@ name: 'mnist_cnn_01' model: - name: vel.models.vision.mnist_cnn_01 + name: vel.model.vision.mnist_cnn_01 img_rows: 28 img_cols: 28 img_channels: 1 @@ -10,26 +10,31 @@ model: source: - name: vel.sources.vision.mnist + name: vel.data.source.vision.mnist + + +loader: + name: vel.data.loader batch_size: 128 - normalize: False num_workers: 4 - + transformations: + - name: vel.data.transformation.image_to_tensor commands: train: - name: vel.commands.train_command + name: vel.command.train_command epochs: 12 log_frequency: 100 optimizer: - name: vel.optimizers.adadelta + name: vel.optimizer.adadelta checkpoint: metric: 'val:loss' - - visdom: - name: vel.commands.vis_store_command + augvis: + name: vel.command.augvis_command + samples: 3 + cases: 3 diff --git a/vel/api/__init__.py b/vel/api/__init__.py index 06b8490c..1e5c9b65 100644 --- a/vel/api/__init__.py +++ b/vel/api/__init__.py @@ -1,16 +1,14 @@ -from .augmentation import Augmentation -from .dataflow import DataFlow +from .transformation import Transformation from .callback import Callback from .info import BatchInfo, EpochInfo, TrainingInfo -from .learner import Learner from .model import ( - Model, SupervisedModel, LossFunctionModel, BackboneModel, LinearBackboneModel + Model, GradientModel, LossFunctionModel, BackboneModel, LinearBackboneModel ) from .model_factory import ModelFactory from .optimizer import OptimizerFactory from .schedule import Schedule from .scheduler import SchedulerFactory -from .source import Source, SupervisedTrainingData, SupervisedTextData +from .source import Source from .storage import Storage from .train_phase import TrainPhase, EmptyTrainPhase from .model_config import ModelConfig diff --git a/vel/api/augmentation.py b/vel/api/augmentation.py deleted file mode 100644 index b4b103de..00000000 --- a/vel/api/augmentation.py +++ /dev/null @@ -1,19 +0,0 @@ - - -class Augmentation: - """ Base class for all data augmentations """ - def __init__(self, mode='x', tags=None): - self.mode = mode - self.tags = tags or ['train', 'val', 'test'] - - def __call__(self, *args): - """ Do the transformation """ - print(self) - raise NotImplementedError - - def denormalize(self, *args): - """ Operation reverse to normalization """ - if len(args) == 1: - return args[0] - else: - return args diff --git a/vel/api/dataflow.py b/vel/api/dataflow.py deleted file mode 100644 index 6b880b4f..00000000 --- a/vel/api/dataflow.py +++ /dev/null @@ -1,42 +0,0 @@ -import torch.utils.data as data - - -class DataFlow(data.Dataset): - """ A dataset wrapping underlying data source with transformations """ - def __init__(self, dataset, transformations, tag): - self.dataset = dataset - - if transformations is None: - self.transformations = [] - else: - self.transformations = [t for t in transformations if tag in t.tags] - - self.tag = tag - - def get_raw(self, index): - return self.dataset[index] - - def __getitem__(self, index): - raw_x, raw_y = self.dataset[index] - - for t in self.transformations: - if t.mode == 'x': - raw_x = t(raw_x) - elif t.mode == 'y': - raw_y = t(raw_y) - elif t.mode == 'both': - raw_x, raw_y = t(raw_x, raw_y) - else: - raise RuntimeError(f"Mode {t.mode} not recognized") - - return raw_x, raw_y - - def denormalize(self, datum, mode='x'): - for t in self.transformations[::-1]: - if t.mode == mode: - datum = t.denormalize(datum) - - return datum - - def __len__(self): - return len(self.dataset) diff --git a/vel/api/model.py b/vel/api/model.py index be09907b..53406a3e 100644 --- a/vel/api/model.py +++ b/vel/api/model.py @@ -63,10 +63,10 @@ def zero_state(self, batch_size): return None -class SupervisedModel(Model): +class GradientModel(Model): """ Model for a supervised learning problem """ - def calculate_gradient(self, x_data, y_true) -> dict: + def calculate_gradient(self, data: dict) -> dict: """ Calculate gradient for given batch of supervised learning. Returns a dictionary of metrics @@ -74,25 +74,25 @@ def calculate_gradient(self, x_data, y_true) -> dict: raise NotImplementedError -class LossFunctionModel(SupervisedModel): +class LossFunctionModel(GradientModel): """ Model for a supervised learning with a simple loss function """ def metrics(self) -> list: """ Set of metrics for this model """ return [Loss()] - def calculate_gradient(self, x_data, y_true) -> dict: - y_pred = self(x_data) - loss_value = self.loss_value(x_data, y_true, y_pred) + def calculate_gradient(self, data: dict) -> dict: + y_hat = self(data['x']) + loss_value = self.loss_value(data['x'], data['y'], y_hat) if self.training: loss_value.backward() return { 'loss': loss_value.item(), - 'data': x_data, - 'target': y_true, - 'output': y_pred + 'data': data['x'], + 'target': data['y'], + 'output': y_hat } def loss_value(self, x_data, y_true, y_pred) -> torch.tensor: diff --git a/vel/api/source.py b/vel/api/source.py index be1d864e..c6209c3a 100644 --- a/vel/api/source.py +++ b/vel/api/source.py @@ -1,139 +1,149 @@ -import torch.utils.data as data +import typing -from .dataflow import DataFlow +import torch.utils.data as data class Source: - """ Source of data for supervised learning algorithms """ - def __init__(self): - pass - - @property - def train_loader(self): - """ PyTorch loader of training data """ - raise NotImplementedError - - @property - def val_loader(self): - """ PyTorch loader of validation data """ - raise NotImplementedError - - @property - def train_dataset(self): - """ Return the training dataset """ - raise NotImplementedError - - @property - def val_dataset(self): - """ Return the validation dataset """ - raise NotImplementedError - - @property - def train_iterations_per_epoch(self): - """ Return number of iterations per epoch """ - raise NotImplementedError - - @property - def val_iterations_per_epoch(self): - """ Return number of iterations per epoch - validation """ - raise NotImplementedError - - -class SupervisedTextData(Source): - """ An NLP torchtext data source """ - def __init__(self, train_source, val_source, train_iterator, val_iterator, data_field, target_field): - super().__init__() - - self.train_source = train_source - self.val_source = val_source - self.train_iterator = train_iterator - self.val_iterator = val_iterator - self.data_field = data_field - self.target_field = target_field - - @property - def train_loader(self): - """ PyTorch loader of training data """ - return self.train_iterator - - @property - def val_loader(self): - """ PyTorch loader of validation data """ - return self.val_iterator - - @property - def train_dataset(self): - """ Return the training dataset """ - return self.train_source - - @property - def val_dataset(self): - """ Return the validation dataset """ - return self.val_source - - @property - def train_iterations_per_epoch(self): - """ Return number of iterations per epoch """ - return len(self.train_iterator) - - @property - def val_iterations_per_epoch(self): - """ Return number of iterations per epoch - validation """ - return len(self.val_iterator) - - -class SupervisedTrainingData(Source): - """ Most common source of data combining a basic datasource and sampler """ - def __init__(self, train_source, val_source, num_workers, batch_size, augmentations=None): - - super().__init__() - - self.train_source = train_source - self.val_source = val_source - - self.num_workers = num_workers - self.batch_size = batch_size - - self.augmentations = augmentations - - # Derived values - self.train_ds = DataFlow(self.train_source, augmentations, tag='train') - self.val_ds = DataFlow(self.val_source, augmentations, tag='val') - - self._train_loader = data.DataLoader( - self.train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers - ) - - self._val_loader = data.DataLoader( - self.val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers - ) - - @property - def train_loader(self): - """ PyTorch loader of training data """ - return self._train_loader - - @property - def val_loader(self): - """ PyTorch loader of validation data """ - return self._val_loader - - @property - def train_dataset(self): - """ Return the training dataset """ - return self.train_ds - - @property - def val_dataset(self): - """ Return the validation dataset """ - return self.val_ds - - @property - def train_iterations_per_epoch(self): - """ Return number of iterations per epoch """ - return len(self._train_loader) - - @property - def val_iterations_per_epoch(self): - """ Return number of iterations per epoch - validation """ - return len(self._val_loader) + """ + Single simple container for train/validation/test datasets. + + PyTorch datasets by default support only __len__ and __getitem__ operations + """ + + def __init__(self, train: data.Dataset, validation: data.Dataset, + test: typing.Optional[data.Dataset] = None, metadata: typing.Optional[dict] = None): + self.train = train + self.validation = validation + self.test = test + + self.metadata = {} if metadata is None else metadata + +# @property +# def train_loader(self): +# """ PyTorch loader of training data """ +# raise NotImplementedError +# +# @property +# def val_loader(self): +# """ PyTorch loader of validation data """ +# raise NotImplementedError +# +# @property +# def train_dataset(self): +# """ Return the training dataset """ +# raise NotImplementedError +# +# @property +# def val_dataset(self): +# """ Return the validation dataset """ +# raise NotImplementedError +# +# @property +# def train_iterations_per_epoch(self): +# """ Return number of iterations per epoch """ +# raise NotImplementedError +# +# @property +# def val_iterations_per_epoch(self): +# """ Return number of iterations per epoch - validation """ +# raise NotImplementedError +# +# +# class SupervisedTextData(Source): +# """ An NLP torchtext data source """ +# def __init__(self, train_source, val_source, train_iterator, val_iterator, data_field, target_field): +# super().__init__() +# +# self.train_source = train_source +# self.val_source = val_source +# self.train_iterator = train_iterator +# self.val_iterator = val_iterator +# self.data_field = data_field +# self.target_field = target_field +# +# @property +# def train_loader(self): +# """ PyTorch loader of training data """ +# return self.train_iterator +# +# @property +# def val_loader(self): +# """ PyTorch loader of validation data """ +# return self.val_iterator +# +# @property +# def train_dataset(self): +# """ Return the training dataset """ +# return self.train_source +# +# @property +# def val_dataset(self): +# """ Return the validation dataset """ +# return self.val_source +# +# @property +# def train_iterations_per_epoch(self): +# """ Return number of iterations per epoch """ +# return len(self.train_iterator) +# +# @property +# def val_iterations_per_epoch(self): +# """ Return number of iterations per epoch - validation """ +# return len(self.val_iterator) +# +# +# class SupervisedTrainingData(Source): +# """ Most common source of data combining a basic datasource and sampler """ +# def __init__(self, train_source, val_source, num_workers, batch_size, augmentations=None): +# +# super().__init__() +# +# self.train_source = train_source +# self.val_source = val_source +# +# self.num_workers = num_workers +# self.batch_size = batch_size +# +# self.augmentations = augmentations +# +# # Derived values +# self.train_ds = DataFlow(self.train_source, augmentations, tag='train') +# self.val_ds = DataFlow(self.val_source, augmentations, tag='val') +# +# self._train_loader = data.DataLoader( +# self.train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers +# ) +# +# self._val_loader = data.DataLoader( +# self.val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers +# ) +# +# @property +# def train_loader(self): +# """ PyTorch loader of training data """ +# return self._train_loader +# +# @property +# def val_loader(self): +# """ PyTorch loader of validation data """ +# return self._val_loader +# +# @property +# def train_dataset(self): +# """ Return the training dataset """ +# return self.train_ds +# +# @property +# def val_dataset(self): +# """ Return the validation dataset """ +# return self.val_ds +# +# @property +# def train_iterations_per_epoch(self): +# """ Return number of iterations per epoch """ +# return len(self._train_loader) +# +# @property +# def val_iterations_per_epoch(self): +# """ Return number of iterations per epoch - validation """ +# return len(self._val_loader) diff --git a/vel/api/train_phase.py b/vel/api/train_phase.py index d73e7e31..cd0f9b58 100644 --- a/vel/api/train_phase.py +++ b/vel/api/train_phase.py @@ -1,6 +1,7 @@ from torch.optim import Optimizer -from vel.api import TrainingInfo, EpochInfo, Learner, Model, Source +from vel.api import TrainingInfo, EpochInfo, Model, Source +from vel.train import Trainer class TrainPhase: @@ -25,7 +26,7 @@ def epoch_info(self, training_info: TrainingInfo, global_idx: int, local_idx: in """ Create Epoch info """ raise NotImplementedError - def execute_epoch(self, epoch_info: EpochInfo, learner: Learner): + def execute_epoch(self, epoch_info: EpochInfo, trainer: Trainer): """ Execute epoch training. """ diff --git a/vel/api/transformation.py b/vel/api/transformation.py new file mode 100644 index 00000000..80ee433e --- /dev/null +++ b/vel/api/transformation.py @@ -0,0 +1,43 @@ +class Transformation: + """ Base class for all data augmentations """ + def __init__(self, tags=None): + self.tags = ['train', 'val', 'test'] if tags is None else tags + + def __call__(self, datapoint): + """ Do the transformation """ + raise NotImplementedError + + def denormalize(self, datapoint): + """ Operation reverse to normalization """ + return datapoint + + +class ScopedTransformation(Transformation): + """ Transformation applied only to certain keys of the datapoint """ + + def __init__(self, scope=None, tags=None): + super().__init__(tags) + + self.scope = ['x'] if scope is None else scope + + def transform(self, value): + """ Actual transformation code """ + raise NotImplementedError + + def denormalization_transform(self, value): + """ Operation reverse to normalization """ + return value + + def __call__(self, datapoint): + """ Do the transformation """ + for name in self.scope: + datapoint[name] = self.transform(datapoint[name]) + + return datapoint + + def denormalize(self, datapoint): + """ Operation reverse to normalization """ + for name in self.scope: + datapoint[name] = self.denormalization_transform(datapoint[name]) + + return datapoint diff --git a/vel/augmentation/to_tensor.py b/vel/augmentation/to_tensor.py deleted file mode 100644 index 33285e77..00000000 --- a/vel/augmentation/to_tensor.py +++ /dev/null @@ -1,26 +0,0 @@ -import numpy as np - -import torchvision.transforms.functional as F - -import vel.data as data - - -class ToTensor(data.Augmentation): - """ Convert image array to a tensor """ - def __init__(self, mode='x', tags=None): - super().__init__(mode, tags) - - def __call__(self, datum): - if len(datum.shape) == 2: - # If the image has only one channel, it still needs to be specified - datum = datum.reshape(datum.shape[0], datum.shape[1], 1) - - return F.to_tensor(datum) - - def denormalize(self, datum): - return np.transpose(datum.numpy(), (1, 2, 0)) - - -def create(mode='x', tags=None): - """ Vel factory function """ - return ToTensor(mode, tags) diff --git a/vel/augmentation/__init__.py b/vel/calc/__init__.py similarity index 100% rename from vel/augmentation/__init__.py rename to vel/calc/__init__.py diff --git a/vel/math/function.py b/vel/calc/function.py similarity index 100% rename from vel/math/function.py rename to vel/calc/function.py diff --git a/vel/math/process.py b/vel/calc/process.py similarity index 100% rename from vel/math/process.py rename to vel/calc/process.py diff --git a/vel/command/augvis_command.py b/vel/command/augvis_command.py index df4f5352..edfa4906 100644 --- a/vel/command/augvis_command.py +++ b/vel/command/augvis_command.py @@ -1,19 +1,19 @@ import matplotlib.pyplot as plt import numpy as np -from vel.api import Source +from vel.data import Loader class AugmentationVisualizationCommand: """ Visualize augmentations """ - def __init__(self, source: Source, samples, cases): - self.source = source + def __init__(self, loader: Loader, samples, cases): + self.loader = loader self.samples = samples self.cases = cases def run(self): """ Run the visualization """ - dataset = self.source.train_dataset + dataset = self.loader.transformed_source.train num_samples = len(dataset) fig, ax = plt.subplots(self.cases, self.samples+1) @@ -21,19 +21,19 @@ def run(self): selected_sample = np.sort(np.random.choice(num_samples, self.cases, replace=False)) for i in range(self.cases): - raw_image, _ = dataset.get_raw(selected_sample[i]) + raw_image = dataset.get_raw(selected_sample[i])['x'] ax[i, 0].imshow(raw_image) ax[i, 0].set_title("Original image") for j in range(self.samples): - augmented_image, _ = dataset[selected_sample[i]] - augmented_image = dataset.denormalize(augmented_image) - ax[i, j+1].imshow(augmented_image) + augmented_datapoint = dataset[selected_sample[i]] + denormalized_datapoint = dataset.denormalize(augmented_datapoint) + ax[i, j+1].imshow(denormalized_datapoint['x']) plt.show() -def create(source, samples, cases): +def create(loader: Loader, samples: int, cases: int): """ Vel factory function """ - return AugmentationVisualizationCommand(source, samples, cases) + return AugmentationVisualizationCommand(loader, samples, cases) diff --git a/vel/command/train_command.py b/vel/command/train_command.py index 2e708706..ea7e06d5 100644 --- a/vel/command/train_command.py +++ b/vel/command/train_command.py @@ -1,6 +1,8 @@ import typing import vel.api as api +import vel.data as data +import vel.train as train from vel.callback.time_tracker import TimeTracker @@ -10,7 +12,7 @@ class SimpleTrainCommand: def __init__(self, epochs: int, model_config: api.ModelConfig, model_factory: api.ModelFactory, optimizer_factory: api.OptimizerFactory, scheduler_factory: typing.Optional[api.SchedulerFactory], - source: api.Source, storage: api.Storage, callbacks: typing.Optional[typing.List[api.Callback]], + loader: data.Loader, storage: api.Storage, callbacks: typing.Optional[typing.List[api.Callback]], max_grad_norm: typing.Optional[float]): self.epochs = epochs self.model_config = model_config @@ -19,7 +21,7 @@ def __init__(self, epochs: int, model_config: api.ModelConfig, model_factory: ap self.optimizer_factory = optimizer_factory self.scheduler_factory = scheduler_factory - self.source = source + self.loader = loader self.storage = storage self.callbacks = callbacks if callbacks is not None else [] self.max_grad_norm = max_grad_norm @@ -28,7 +30,7 @@ def run(self): """ Run the command with supplied configuration """ device = self.model_config.torch_device() - learner = api.Learner(device, self.model_factory.instantiate(), self.max_grad_norm) + learner = train.Trainer(device, self.model_factory.instantiate(), self.max_grad_norm) optimizer = self.optimizer_factory.instantiate(learner.model) # All callbacks used for learning @@ -49,12 +51,12 @@ def run(self): epoch_info = api.EpochInfo( training_info=training_info, global_epoch_idx=global_epoch_idx, - batches_per_epoch=self.source.train_iterations_per_epoch, + batches_per_epoch=self.loader.size['train'], optimizer=optimizer ) # Execute learning - learner.run_epoch(epoch_info, self.source) + learner.run_epoch(epoch_info, self.loader) self.storage.checkpoint(epoch_info, learner.model) @@ -99,7 +101,7 @@ def resume_training(self, learner, callbacks, metrics) -> api.TrainingInfo: return training_info -def create(model_config, epochs, optimizer, model, source, storage, scheduler=None, callbacks=None, max_grad_norm=None): +def create(model_config, epochs, optimizer, model, loader, storage, scheduler=None, callbacks=None, max_grad_norm=None): """ Vel factory function """ return SimpleTrainCommand( epochs=epochs, @@ -107,7 +109,7 @@ def create(model_config, epochs, optimizer, model, source, storage, scheduler=No model_factory=model, optimizer_factory=optimizer, scheduler_factory=scheduler, - source=source, + loader=loader, storage=storage, callbacks=callbacks, max_grad_norm=max_grad_norm diff --git a/vel/data/__init__.py b/vel/data/__init__.py index dd02c4a5..41e0ebd8 100644 --- a/vel/data/__init__.py +++ b/vel/data/__init__.py @@ -1 +1,2 @@ -from .image_op import * # noqa +from .dataflow import DataFlow +from .loader import Loader diff --git a/vel/augmentation/tta/__init__.py b/vel/data/augmentation/__init__.py similarity index 100% rename from vel/augmentation/tta/__init__.py rename to vel/data/augmentation/__init__.py diff --git a/vel/augmentation/center_crop.py b/vel/data/augmentation/center_crop.py similarity index 100% rename from vel/augmentation/center_crop.py rename to vel/data/augmentation/center_crop.py diff --git a/vel/augmentation/normalize.py b/vel/data/augmentation/normalize.py similarity index 94% rename from vel/augmentation/normalize.py rename to vel/data/augmentation/normalize.py index b0b787a6..d67a2c6a 100644 --- a/vel/augmentation/normalize.py +++ b/vel/data/augmentation/normalize.py @@ -3,7 +3,7 @@ import vel.api as api -class Normalize(api.Augmentation): +class Normalize(api.Transformation): """ Normalize input mean and standard deviation """ def __init__(self, mean, std, mode='x', tags=None): diff --git a/vel/augmentation/random_crop.py b/vel/data/augmentation/random_crop.py similarity index 100% rename from vel/augmentation/random_crop.py rename to vel/data/augmentation/random_crop.py diff --git a/vel/augmentation/random_horizontal_flip.py b/vel/data/augmentation/random_horizontal_flip.py similarity index 93% rename from vel/augmentation/random_horizontal_flip.py rename to vel/data/augmentation/random_horizontal_flip.py index e4d6c142..ba397519 100644 --- a/vel/augmentation/random_horizontal_flip.py +++ b/vel/data/augmentation/random_horizontal_flip.py @@ -4,7 +4,7 @@ import vel.api as api -class RandomHorizontalFlip(api.Augmentation): +class RandomHorizontalFlip(api.Transformation): """ Apply a horizontal flip randomly to input images """ def __init__(self, p=0.5, mode='x', tags=None): diff --git a/vel/augmentation/random_lighting.py b/vel/data/augmentation/random_lighting.py similarity index 94% rename from vel/augmentation/random_lighting.py rename to vel/data/augmentation/random_lighting.py index d85c450c..9c51d9ae 100644 --- a/vel/augmentation/random_lighting.py +++ b/vel/data/augmentation/random_lighting.py @@ -4,7 +4,7 @@ import vel.data as data -class RandomLighting(api.Augmentation): +class RandomLighting(api.Transformation): """ Apply a horizontal flip randomly to input images """ def __init__(self, b, c, mode='x', tags=None): diff --git a/vel/augmentation/random_rotate.py b/vel/data/augmentation/random_rotate.py similarity index 100% rename from vel/augmentation/random_rotate.py rename to vel/data/augmentation/random_rotate.py diff --git a/vel/augmentation/random_scale.py b/vel/data/augmentation/random_scale.py similarity index 100% rename from vel/augmentation/random_scale.py rename to vel/data/augmentation/random_scale.py diff --git a/vel/augmentation/scale_min_size.py b/vel/data/augmentation/scale_min_size.py similarity index 100% rename from vel/augmentation/scale_min_size.py rename to vel/data/augmentation/scale_min_size.py diff --git a/vel/math/__init__.py b/vel/data/augmentation/tta/__init__.py similarity index 100% rename from vel/math/__init__.py rename to vel/data/augmentation/tta/__init__.py diff --git a/vel/augmentation/tta/train_tta.py b/vel/data/augmentation/tta/train_tta.py similarity index 100% rename from vel/augmentation/tta/train_tta.py rename to vel/data/augmentation/tta/train_tta.py diff --git a/vel/augmentation/unsupervised.py b/vel/data/augmentation/unsupervised.py similarity index 100% rename from vel/augmentation/unsupervised.py rename to vel/data/augmentation/unsupervised.py diff --git a/vel/data/dataflow.py b/vel/data/dataflow.py new file mode 100644 index 00000000..ae4b28ad --- /dev/null +++ b/vel/data/dataflow.py @@ -0,0 +1,55 @@ +import typing +import torch.utils.data as data + +from vel.api import Source, Transformation + + +def pre_map(datapoint): + """ Map datapoint from a list into the dictionary """ + if isinstance(datapoint, (list, tuple)): + return dict(zip("xyzw", datapoint)) + return datapoint + + +class DataFlow(data.Dataset): + """ A dataset wrapping underlying data source with transformations """ + + @staticmethod + def transform(source: Source, transformations: typing.List[Transformation]) -> Source: + """ Transform supplied source with a list of given transformations """ + return Source( + train=DataFlow(source.train, transformations, 'train'), + validation=DataFlow(source.validation, transformations, 'val'), + test=None if source.test is None else DataFlow(source.test, transformations, 'test') + ) + + def __init__(self, dataset, transformations, tag): + self.dataset = dataset + + if transformations is None: + self.transformations = [] + else: + self.transformations = [t for t in transformations if tag in t.tags] + + self.tag = tag + + def get_raw(self, index): + return pre_map(self.dataset[index]) + + def __getitem__(self, index): + datapoint = self.get_raw(index) + + for t in self.transformations: + datapoint = t(datapoint) + + return datapoint + + def denormalize(self, datapoint): + """ Perform a reverse normalization (for viewing) """ + for t in self.transformations[::-1]: + datapoint = t.denormalize(datapoint) + + return datapoint + + def __len__(self): + return len(self.dataset) diff --git a/vel/data/loader.py b/vel/data/loader.py new file mode 100644 index 00000000..4e98742f --- /dev/null +++ b/vel/data/loader.py @@ -0,0 +1,71 @@ +import typing +import torch.utils.data as data + +from vel.api import Source + +from .dataflow import DataFlow + + +class Loader: + """ Loads data from a data source to serve it to the model """ + + def __init__(self, source: Source, batch_size: int, num_workers: int, + transformations: typing.Optional[list] = None): + self.source = source + self.batch_size = batch_size + self.num_workers = num_workers + self.transformations = transformations + + if transformations is not None: + self.transformed_source = DataFlow.transform(self.source, transformations) + else: + self.transformed_source = source + + self.train_loader = data.DataLoader( + self.transformed_source.train, batch_size=batch_size, shuffle=True, num_workers=num_workers, + drop_last=True + ) + + self.val_loader = data.DataLoader( + self.transformed_source.validation, batch_size=batch_size, shuffle=False, num_workers=num_workers, + ) + + if self.transformed_source.test is not None: + self.test_loader = data.DataLoader( + self.transformed_source.test, batch_size=batch_size, shuffle=False, num_workers=num_workers + ) + else: + self.test_loader = None + + self._loaders = { + 'train': self.train_loader, + 'val': self.val_loader, + 'test': self.test_loader + } + + self._loader_sizes = { + 'train': len(self.train_loader), + 'val': len(self.val_loader), + 'test': 0 if self.test_loader is None else len(self.test_loader) + } + + def __getitem__(self, item): + return self._loaders[item] + + @property + def loader(self): + return self._loaders + + @property + def size(self): + return self._loader_sizes + + +def create(source: Source, batch_size: int, num_workers: int=0, transformations: typing.Optional[list] = None): + """ Vel factory function """ + return Loader( + source=source, + batch_size=batch_size, + num_workers=num_workers, + transformations=transformations + ) diff --git a/vel/phase/__init__.py b/vel/data/operation/__init__.py similarity index 100% rename from vel/phase/__init__.py rename to vel/data/operation/__init__.py diff --git a/vel/data/image_op.py b/vel/data/operation/image_op.py similarity index 100% rename from vel/data/image_op.py rename to vel/data/operation/image_op.py diff --git a/vel/schedule/__init__.py b/vel/data/source/__init__.py similarity index 100% rename from vel/schedule/__init__.py rename to vel/data/source/__init__.py diff --git a/vel/source/img_dir_source.py b/vel/data/source/img_dir_source.py similarity index 100% rename from vel/source/img_dir_source.py rename to vel/data/source/img_dir_source.py diff --git a/vel/source/__init__.py b/vel/data/source/nlp/__init__.py similarity index 100% rename from vel/source/__init__.py rename to vel/data/source/nlp/__init__.py diff --git a/vel/source/nlp/imdb.py b/vel/data/source/nlp/imdb.py similarity index 100% rename from vel/source/nlp/imdb.py rename to vel/data/source/nlp/imdb.py diff --git a/vel/source/nlp/multi30k.py b/vel/data/source/nlp/multi30k.py similarity index 100% rename from vel/source/nlp/multi30k.py rename to vel/data/source/nlp/multi30k.py diff --git a/vel/source/nlp/text_url.py b/vel/data/source/nlp/text_url.py similarity index 100% rename from vel/source/nlp/text_url.py rename to vel/data/source/nlp/text_url.py diff --git a/vel/source/nlp/wmt14.py b/vel/data/source/nlp/wmt14.py similarity index 100% rename from vel/source/nlp/wmt14.py rename to vel/data/source/nlp/wmt14.py diff --git a/vel/source/nlp/__init__.py b/vel/data/source/vision/__init__.py similarity index 100% rename from vel/source/nlp/__init__.py rename to vel/data/source/vision/__init__.py diff --git a/vel/source/vision/cifar10.py b/vel/data/source/vision/cifar10.py similarity index 100% rename from vel/source/vision/cifar10.py rename to vel/data/source/vision/cifar10.py diff --git a/vel/data/source/vision/mnist.py b/vel/data/source/vision/mnist.py new file mode 100644 index 00000000..8ab6b49e --- /dev/null +++ b/vel/data/source/vision/mnist.py @@ -0,0 +1,51 @@ +from torchvision import datasets + +from vel.api import Source + + + +def create(model_config): + """ Create a MNIST dataset, normalized """ + path = model_config.data_dir('mnist') + + train_dataset = datasets.MNIST(path, train=True, download=True) + test_dataset = datasets.MNIST(path, train=False, download=True) + + train_data = train_dataset.data + mean_value = (train_data.double() / 255).mean().item() + std_value = (train_data.double() / 255).std().item() + + return Source( + train=train_dataset, + validation=test_dataset, + metadata={ + 'train_mean': mean_value, + 'train_std': std_value + } + ) + +# from vel.api import SupervisedTrainingData +# +# from vel.augmentations.normalize import Normalize +# from vel.augmentations.to_tensor import ToTensor +# from vel.augmentations.to_array import ToArray +# from vel.augmentations.unsupervised import Unsupervised + + # augmentations = [ToArray()] + (augmentations if augmentations is not None else []) + # + # if normalize: + # + # augmentations.append(Normalize(mean=mean_value, std=std_value, tags=['train', 'val'])) + # + # augmentations.append(ToTensor()) + # + # if unsupervised: + # augmentations.append(Unsupervised()) + # + # return SupervisedTrainingData( + # train_dataset, + # test_dataset, + # num_workers=num_workers, + # batch_size=batch_size, + # augmentations=augmentations + # ) diff --git a/vel/source/vision/__init__.py b/vel/data/transformation/__init__.py similarity index 100% rename from vel/source/vision/__init__.py rename to vel/data/transformation/__init__.py diff --git a/vel/data/transformation/image_to_tensor.py b/vel/data/transformation/image_to_tensor.py new file mode 100644 index 00000000..75eebc32 --- /dev/null +++ b/vel/data/transformation/image_to_tensor.py @@ -0,0 +1,31 @@ +import numpy as np + +import torchvision.transforms.functional as F + +from vel.api.transformation import ScopedTransformation + + +class ImageToTensor(ScopedTransformation): + """ Convert image array to a tensor """ + def transform(self, value): + # First let's make sure it's actually a numpy array + value = np.asarray(value) + + if len(value.shape) == 2: + # If the image has only one channel, it still needs to be specified + value = value.reshape(value.shape[0], value.shape[1], 1) + + return F.to_tensor(value) + + def denormalization_transform(self, value): + image_array = np.transpose(value.numpy(), (1, 2, 0)) + + if len(image_array.shape) == 3 and image_array.shape[-1] == 1: + return image_array[:, :, 0] + + return image_array + + +def create(mode='x', tags=None): + """ Vel factory function """ + return ImageToTensor(mode, tags) diff --git a/vel/augmentation/to_array.py b/vel/data/transformation/to_array.py similarity index 56% rename from vel/augmentation/to_array.py rename to vel/data/transformation/to_array.py index e1f3a5f0..8c1e838c 100644 --- a/vel/augmentation/to_array.py +++ b/vel/data/transformation/to_array.py @@ -1,15 +1,13 @@ import numpy as np -import vel.data as data +from vel.api.transformation import ScopedTransformation -class ToArray(data.Augmentation): +class ToArray(ScopedTransformation): """ Convert image to an array of floats """ - def __init__(self, mode='x', tags=None): - super().__init__(mode, tags) - def __call__(self, x_data): - array = np.array(x_data) + def transform(self, value): + array = np.array(value) if array.dtype == np.uint8: return array.astype(np.float32) / 255.0 diff --git a/vel/function/__init__.py b/vel/function/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/schedule/constant.py b/vel/function/constant.py similarity index 100% rename from vel/schedule/constant.py rename to vel/function/constant.py diff --git a/vel/schedule/linear.py b/vel/function/linear.py similarity index 100% rename from vel/schedule/linear.py rename to vel/function/linear.py diff --git a/vel/schedule/linear_and_constant.py b/vel/function/linear_and_constant.py similarity index 100% rename from vel/schedule/linear_and_constant.py rename to vel/function/linear_and_constant.py diff --git a/vel/model/autoencoder/mnist_cnn_vae.py b/vel/model/autoencoder/mnist_cnn_vae.py index b678a9de..1a1dd3e9 100644 --- a/vel/model/autoencoder/mnist_cnn_vae.py +++ b/vel/model/autoencoder/mnist_cnn_vae.py @@ -7,13 +7,13 @@ import vel.util.network as net_util -from vel.api import SupervisedModel, ModelFactory +from vel.api import GradientModel, ModelFactory from vel.metric.averaging_metric import AveragingNamedMetric from vel.metric.loss_metric import Loss from vel.module.layers import Flatten, Reshape -class MnistCnnVAE(SupervisedModel): +class MnistCnnVAE(GradientModel): """ A simple MNIST variational autoencoder, containing 3 convolutional layers. """ diff --git a/vel/source/vision/mnist.py b/vel/source/vision/mnist.py deleted file mode 100644 index 0ac79aae..00000000 --- a/vel/source/vision/mnist.py +++ /dev/null @@ -1,39 +0,0 @@ -from torchvision import datasets - - -from vel.api import SupervisedTrainingData - -from vel.augmentations.normalize import Normalize -from vel.augmentations.to_tensor import ToTensor -from vel.augmentations.to_array import ToArray -from vel.augmentations.unsupervised import Unsupervised - - -def create(model_config, batch_size, normalize=True, num_workers=0, augmentations=None, unsupervised=False): - """ Create a MNIST dataset, normalized """ - path = model_config.data_dir('mnist') - - train_dataset = datasets.MNIST(path, train=True, download=True) - test_dataset = datasets.MNIST(path, train=False, download=True) - - augmentations = [ToArray()] + (augmentations if augmentations is not None else []) - - if normalize: - train_data = train_dataset.data - mean_value = (train_data.double() / 255).mean().item() - std_value = (train_data.double() / 255).std().item() - - augmentations.append(Normalize(mean=mean_value, std=std_value, tags=['train', 'val'])) - - augmentations.append(ToTensor()) - - if unsupervised: - augmentations.append(Unsupervised()) - - return SupervisedTrainingData( - train_dataset, - test_dataset, - num_workers=num_workers, - batch_size=batch_size, - augmentations=augmentations - ) diff --git a/vel/train/__init__.py b/vel/train/__init__.py new file mode 100644 index 00000000..260e4c8d --- /dev/null +++ b/vel/train/__init__.py @@ -0,0 +1 @@ +from .trainer import Trainer diff --git a/vel/train/phase/__init__.py b/vel/train/phase/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/phase/cycle.py b/vel/train/phase/cycle.py similarity index 100% rename from vel/phase/cycle.py rename to vel/train/phase/cycle.py diff --git a/vel/phase/freeze.py b/vel/train/phase/freeze.py similarity index 100% rename from vel/phase/freeze.py rename to vel/train/phase/freeze.py diff --git a/vel/phase/generic.py b/vel/train/phase/generic.py similarity index 100% rename from vel/phase/generic.py rename to vel/train/phase/generic.py diff --git a/vel/phase/unfreeze.py b/vel/train/phase/unfreeze.py similarity index 100% rename from vel/phase/unfreeze.py rename to vel/train/phase/unfreeze.py diff --git a/vel/api/learner.py b/vel/train/trainer.py similarity index 65% rename from vel/api/learner.py rename to vel/train/trainer.py index 5dfd4393..44288497 100644 --- a/vel/api/learner.py +++ b/vel/train/trainer.py @@ -4,14 +4,16 @@ import tqdm import typing -from .model import SupervisedModel -from .info import BatchInfo, EpochInfo, TrainingInfo -from .source import Source +from vel.api import GradientModel, TrainingInfo, EpochInfo, BatchInfo +from vel.data import Loader +from vel.util.tensor_util import to_device -class Learner: + +class Trainer: """ Manages training process of a single model """ - def __init__(self, device: torch.device, model: SupervisedModel, max_grad_norm: typing.Optional[float] = None): + + def __init__(self, device: torch.device, model: GradientModel, max_grad_norm: typing.Optional[float] = None): self.device = device self.model = model.to(device) self.max_grad_norm = max_grad_norm @@ -43,64 +45,67 @@ def initialize_training(self, training_info: TrainingInfo, model_state=None, hid else: self.model.load_state_dict(model_state) - def run_epoch(self, epoch_info: EpochInfo, source: 'Source'): + def run_epoch(self, epoch_info: EpochInfo, loader: Loader): """ Run full epoch of learning """ epoch_info.on_epoch_begin() lr = epoch_info.optimizer.param_groups[-1]['lr'] print("|-------- Epoch {:06} Lr={:.6f} ----------|".format(epoch_info.global_epoch_idx, lr)) - self.train_epoch(epoch_info, source) + self.train_epoch(epoch_info, loader) epoch_info.result_accumulator.freeze_results('train') - self.validation_epoch(epoch_info, source) + self.validation_epoch(epoch_info, loader) epoch_info.result_accumulator.freeze_results('val') epoch_info.on_epoch_end() - def train_epoch(self, epoch_info, source: 'Source', interactive=True): + def train_epoch(self, epoch_info, loader: Loader, interactive=True): """ Run a single training epoch """ self.train() if interactive: - iterator = tqdm.tqdm(source.train_loader, desc="Training", unit="iter", file=sys.stdout) + iterator = tqdm.tqdm(loader['train'], desc="Training", unit="iter", file=sys.stdout) else: - iterator = source.train_loader + iterator = loader['train'] - for batch_idx, (data, target) in enumerate(iterator): + for batch_idx, data in enumerate(iterator): batch_info = BatchInfo(epoch_info, batch_idx) batch_info.on_batch_begin() - self.train_batch(batch_info, data, target) + self.train_batch(batch_info, data) batch_info.on_batch_end() iterator.set_postfix(loss=epoch_info.result_accumulator.intermediate_value('loss')) - def validation_epoch(self, epoch_info, source: 'Source'): + def validation_epoch(self, epoch_info, loader: Loader, interactive=True): """ Run a single evaluation epoch """ self.eval() - iterator = tqdm.tqdm(source.val_loader, desc="Validation", unit="iter", file=sys.stdout) + if interactive: + iterator = tqdm.tqdm(loader['val'], desc="Training", unit="iter", file=sys.stdout) + else: + iterator = loader['val'] with torch.no_grad(): - for batch_idx, (data, target) in enumerate(iterator): + for batch_idx, data in enumerate(iterator): batch_info = BatchInfo(epoch_info, batch_idx) batch_info.on_validation_batch_begin() - self.feed_batch(batch_info, data, target) + self.feed_batch(batch_info, data) batch_info.on_validation_batch_end() - def feed_batch(self, batch_info, data, target): + def feed_batch(self, batch_info, data): """ Run single batch of data """ - data, target = data.to(self.device), target.to(self.device) - metrics = self.model.calculate_gradient(data, target) + data = to_device(data, self.device) # Move a data batch into the right device + metrics = self.model.calculate_gradient(data) batch_info.update(metrics) - def train_batch(self, batch_info, data, target): + def train_batch(self, batch_info, data): """ Train single batch of data """ batch_info.optimizer.zero_grad() - self.feed_batch(batch_info, data, target) + self.feed_batch(batch_info, data) if self.max_grad_norm is not None: batch_info['grad_norm'] = torch.nn.utils.clip_grad_norm_( From 5120966c505796c39a10a9660c00e07395670755 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 23 Jun 2019 18:00:34 -0700 Subject: [PATCH 054/162] Fixed again AE and VAE models. --- .../autoencoder/mnist/mnist_cnn_ae.yaml | 25 ++++++++++--------- .../autoencoder/mnist/mnist_cnn_vae.yaml | 22 ++++++++-------- .../classification/mnist/mnist_cnn_01.yaml | 7 +++--- vel/data/augmentation/unsupervised.py | 14 ----------- vel/data/transformation/unsupervised.py | 12 +++++++++ vel/model/autoencoder/mnist_cnn_vae.py | 6 ++--- 6 files changed, 43 insertions(+), 43 deletions(-) delete mode 100644 vel/data/augmentation/unsupervised.py create mode 100644 vel/data/transformation/unsupervised.py diff --git a/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml b/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml index ea1782b4..690a7f8c 100644 --- a/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml +++ b/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml @@ -1,8 +1,8 @@ -name: 'mnist_cnn_autoenoder' +name: 'mnist_cnn_ae' model: - name: vel.models.autoencoder.mnist_cnn_autoencoder + name: vel.model.autoencoder.mnist_cnn_autoencoder img_rows: 28 img_cols: 28 img_channels: 1 @@ -11,28 +11,29 @@ model: source: - name: vel.sources.vision.mnist + name: vel.data.source.vision.mnist + + +loader: + name: vel.data.loader batch_size: 128 num_workers: 4 - normalize: False - unsupervised: true + + transformations: + - name: vel.data.transformation.image_to_tensor + - name: vel.data.transformation.unsupervised optimizer: - name: vel.optimizers.adam + name: vel.optimizer.adam lr: 1.0e-3 commands: train: - name: vel.commands.train_command + name: vel.command.train_command epochs: 12 log_frequency: 100 - checkpoint: metric: 'val:loss' - - - visdom: - name: vel.commands.vis_store_command diff --git a/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml b/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml index 74c499c9..c5a5f9c9 100644 --- a/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml +++ b/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml @@ -2,7 +2,7 @@ name: 'mnist_cnn_autoenoder' model: - name: vel.models.autoencoder.mnist_cnn_vae + name: vel.model.autoencoder.mnist_cnn_vae img_rows: 28 img_cols: 28 img_channels: 1 @@ -11,28 +11,28 @@ model: source: - name: vel.sources.vision.mnist + name: vel.data.source.vision.mnist + +loader: + name: vel.data.loader batch_size: 128 - normalize: False num_workers: 4 - unsupervised: true + + transformations: + - name: vel.data.transformation.image_to_tensor + - name: vel.data.transformation.unsupervised optimizer: - name: vel.optimizers.adam + name: vel.optimizer.adam lr: 1.0e-3 commands: train: - name: vel.commands.train_command + name: vel.command.train_command epochs: 12 log_frequency: 100 - checkpoint: metric: 'val:loss' - - - visdom: - name: vel.commands.vis_store_command diff --git a/examples-configs/classification/mnist/mnist_cnn_01.yaml b/examples-configs/classification/mnist/mnist_cnn_01.yaml index b0f75729..6e06faea 100644 --- a/examples-configs/classification/mnist/mnist_cnn_01.yaml +++ b/examples-configs/classification/mnist/mnist_cnn_01.yaml @@ -22,15 +22,16 @@ loader: - name: vel.data.transformation.image_to_tensor +optimizer: + name: vel.optimizer.adadelta + + commands: train: name: vel.command.train_command epochs: 12 log_frequency: 100 - optimizer: - name: vel.optimizer.adadelta - checkpoint: metric: 'val:loss' diff --git a/vel/data/augmentation/unsupervised.py b/vel/data/augmentation/unsupervised.py deleted file mode 100644 index 678ab7d3..00000000 --- a/vel/data/augmentation/unsupervised.py +++ /dev/null @@ -1,14 +0,0 @@ -import vel.data as data - - -class Unsupervised(data.Augmentation): - """ Simply transform supervised to an unsupervised dataset, cloning data to a target """ - def __init__(self): - super().__init__('both', None) - - def __call__(self, x_data, y_data): - return x_data, x_data - - -def create(): - return Unsupervised() diff --git a/vel/data/transformation/unsupervised.py b/vel/data/transformation/unsupervised.py new file mode 100644 index 00000000..7af7e560 --- /dev/null +++ b/vel/data/transformation/unsupervised.py @@ -0,0 +1,12 @@ +from vel.api import Transformation + + +class Unsupervised(Transformation): + """ Simply transform supervised to an unsupervised dataset, cloning data to a target """ + def __call__(self, datapoint): + datapoint['y'] = datapoint['x'] + return datapoint + + +def create(): + return Unsupervised() diff --git a/vel/model/autoencoder/mnist_cnn_vae.py b/vel/model/autoencoder/mnist_cnn_vae.py index 1a1dd3e9..3eb1dadd 100644 --- a/vel/model/autoencoder/mnist_cnn_vae.py +++ b/vel/model/autoencoder/mnist_cnn_vae.py @@ -108,9 +108,9 @@ def forward(self, sample): 'std': std } - def calculate_gradient(self, x_data, y_true): + def calculate_gradient(self, data): """ Calculate a gradient of loss function """ - output = self(x_data) + output = self(data['x']) y_pred = output['decoded'] @@ -124,7 +124,7 @@ def calculate_gradient(self, x_data, y_true): # reconstruction = 0.5 * F.mse_loss(y_pred, y_true) # We must sum over all image axis and average only on minibatch axis - reconstruction = F.binary_cross_entropy(y_pred, y_true, reduce=False).sum(1).sum(1).sum(1).mean() + reconstruction = F.binary_cross_entropy(y_pred, data['y'], reduce=False).sum(1).sum(1).sum(1).mean() loss = reconstruction + kl_divergence if self.training: From 5e07fd95e8bbf0ea8c64e0cb3b10f00c501cfb0a Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 23 Jun 2019 18:04:35 -0700 Subject: [PATCH 055/162] Small updates to README. --- README.md | 114 +----------------------------------------------------- 1 file changed, 2 insertions(+), 112 deletions(-) diff --git a/README.md b/README.md index fd5d7c42..5b39c7a7 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,6 @@ [![Build Status](https://travis-ci.org/MillionIntegrals/vel.svg?branch=master)](https://travis-ci.org/MillionIntegrals/vel) [![PyPI version](https://badge.fury.io/py/vel.svg)](https://badge.fury.io/py/vel) [![GitHub](https://img.shields.io/github/license/mashape/apistatus.svg)](https://github.com/MillionIntegrals/vel/blob/master/LICENSE) -[![Gitter chat](https://badges.gitter.im/MillionIngegrals/vel.png)](https://gitter.im/deep-learning-vel) Bring **velocity** to deep-learning research. @@ -130,14 +129,14 @@ Most of the examples for this framework are defined using config files in the For example, to run the A2C algorithm on a Breakout atari environment, simply invoke: ``` -python -m vel.launcher examples-configs/rl/atari/a2c/breakout_a2c.yaml train +python -m vel.launcher examples-configs/rl/atari/atari_a2c.yaml train ``` If you install the library locally, you'll have a special wrapper created that will invoke the launcher for you. Then, above becomes: ``` -vel examples-configs/rl/atari/a2c/breakout_a2c.yaml train +vel examples-configs/rl/atari/atari_a2c.yaml train ``` General command line interface of the launcher is: @@ -154,112 +153,6 @@ If you prefer to use the library from inside your scripts, take a look at the well. Scripts generally don't require any MongoDB or Visdom setup, so they can be run straight away in any setup, but their output will be less rich and less informative. -Here is an example script running the same setup as a config file from above: - -```python -import torch -import torch.optim as optim - -from vel.rl.metrics import EpisodeRewardMetric -from vel.storage.streaming.stdout import StdoutStreaming -from vel.util.random import set_seed - -from vel.rl.env.classic_atari import ClassicAtariEnv -from vel.rl.vecenv.subproc import SubprocVecEnvWrapper - -from vel.modules.input.image_to_tensor import ImageToTensorFactory -from vel.rl.models.stochastic_policy_model import StochasticPolicyModelFactory -from vel.rl.models.backbone.nature_cnn import NatureCnnFactory - - -from vel.rl.reinforcers.on_policy_iteration_reinforcer import ( - OnPolicyIterationReinforcer, OnPolicyIterationReinforcerSettings -) - -from vel.rl.algo.policy_gradient.a2c import A2CPolicyGradient -from vel.rl.env_roller.step_env_roller import StepEnvRoller - -from vel.api.info import TrainingInfo, EpochInfo - - -def breakout_a2c(): - device = torch.device('cuda:0') - seed = 1001 - - # Set random seed in python std lib, numpy and pytorch - set_seed(seed) - - # Create 16 environments evaluated in parallel in sub processess with all usual DeepMind wrappers - # These are just helper functions for that - vec_env = SubprocVecEnvWrapper( - ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 - ).instantiate(parallel_envs=16, seed=seed) - - # Again, use a helper to create a model - # But because model is owned by the reinforcer, model should not be accessed using this variable - # but from reinforcer.model property - model = StochasticPolicyModelFactory( - input_block=ImageToTensorFactory(), - backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) - ).instantiate(action_space=vec_env.action_space) - - # Reinforcer - an object managing the learning process - reinforcer = OnPolicyIterationReinforcer( - device=device, - settings=OnPolicyIterationReinforcerSettings( - batch_size=256, - number_of_steps=5, - ), - model=model, - algo=A2CPolicyGradient( - entropy_coefficient=0.01, - value_coefficient=0.5, - max_grad_norm=0.5, - discount_factor=0.99, - ), - env_roller=StepEnvRoller( - environment=vec_env, - device=device, - ) - ) - - # Model optimizer - optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=7.0e-4, eps=1e-3) - - # Overall information store for training information - training_info = TrainingInfo( - metrics=[ - EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode - ], - callbacks=[StdoutStreaming()] # Print live metrics every epoch to standard output - ) - - # A bit of training initialization bookkeeping... - training_info.initialize() - reinforcer.initialize_training(training_info) - training_info.on_train_begin() - - # Let's make 100 batches per epoch to average metrics nicely - num_epochs = int(1.1e7 / (5 * 16) / 100) - - # Normal handrolled training loop - for i in range(1, num_epochs+1): - epoch_info = EpochInfo( - training_info=training_info, - global_epoch_idx=i, - batches_per_epoch=100, - optimizer=optimizer - ) - - reinforcer.train_epoch(epoch_info) - - training_info.on_train_end() - - -if __name__ == '__main__': - breakout_a2c() -``` - # Docker Dockerized version of this library is available in from the Docker Hub as @@ -316,10 +209,7 @@ Possible to be included: Code quality: -- Rename models to policies -- Force dictionary inputs and outputs for policies - Factor action noise back into the policy -- Use linter as a part of the build process # Citing From acdce019e85b6a28323b314b517b9146d154d2e8 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 23 Jun 2019 21:04:58 -0700 Subject: [PATCH 056/162] Updating some metrics. --- vel/api/info.py | 22 ++++++++------------ vel/callback/sample_tracker.py | 0 vel/metric/__init__.py | 2 +- vel/metric/accuracy.py | 4 ++-- vel/metric/averaging_metric.py | 12 +++++------ vel/metric/base_metric.py | 8 +++++++- vel/metric/loss_metric.py | 4 ++-- vel/metric/summing_metric.py | 4 ++-- vel/metric/value_metric.py | 4 ++-- vel/storage/streaming/stdout.py | 25 +++++++++++------------ vel/storage/streaming/tensorboard.py | 30 +++++++++++++++++----------- 11 files changed, 60 insertions(+), 55 deletions(-) create mode 100644 vel/callback/sample_tracker.py diff --git a/vel/api/info.py b/vel/api/info.py index b5f978c3..a84d1873 100644 --- a/vel/api/info.py +++ b/vel/api/info.py @@ -116,34 +116,28 @@ def _reset_metrics(self): for m in self.metrics: m.reset() - def value(self): - """ Return current value of the metrics """ - return {m.name: m.value() for m in self.metrics} + def value(self, dataset=None): + """ Return current dictionary value of the metrics """ + from vel.metric import MetricKey + return {MetricKey(dataset, m.name, m.scope): m.value() for m in self.metrics} def intermediate_value(self, metric): """ Return an intermediate (inter-epoch) value of a metric """ if ':' in metric: + # TODO(jerry) There's got to be a better way to do it metric_name = metric.split(':')[-1] else: metric_name = metric return self.metrics_by_name[metric_name].value() - def freeze_results(self, name=None): - new_results = self.value() - - if name is None: - for key, value in new_results.items(): - self.frozen_results[key] = value - else: - for key, value in new_results.items(): - self.frozen_results[f'{name}:{key}'] = value - + def freeze_results(self, dataset=None): + self.frozen_results.update(self.value(dataset)) self._reset_metrics() def result(self): """ Return the epoch result """ - final_result = {'epoch_idx': self.global_epoch_idx} + final_result = {} for key, value in self.frozen_results.items(): final_result[key] = value diff --git a/vel/callback/sample_tracker.py b/vel/callback/sample_tracker.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/metric/__init__.py b/vel/metric/__init__.py index 7bb2fe79..37708fa3 100644 --- a/vel/metric/__init__.py +++ b/vel/metric/__init__.py @@ -1,3 +1,3 @@ -from .base_metric import BaseMetric # noqa +from .base_metric import BaseMetric, MetricKey # noqa from .averaging_metric import AveragingMetric, AveragingNamedMetric, AveragingSupervisedMetric # noqa from .value_metric import ValueMetric # noqa diff --git a/vel/metric/accuracy.py b/vel/metric/accuracy.py index 442f8470..44dcdb2f 100644 --- a/vel/metric/accuracy.py +++ b/vel/metric/accuracy.py @@ -3,8 +3,8 @@ class Accuracy(AveragingSupervisedMetric): """ Classification accuracy """ - def __init__(self): - super().__init__("accuracy") + def __init__(self, scope="train"): + super().__init__("accuracy", scope=scope) def _value_function(self, x_input, y_true, y_pred): """ Return classification accuracy of input """ diff --git a/vel/metric/averaging_metric.py b/vel/metric/averaging_metric.py index 37b667f3..2355b7fd 100644 --- a/vel/metric/averaging_metric.py +++ b/vel/metric/averaging_metric.py @@ -5,8 +5,8 @@ class AveragingMetric(BaseMetric): """ Base class for metrics that simply calculate the average over the epoch """ - def __init__(self, name): - super().__init__(name) + def __init__(self, name, scope="general"): + super().__init__(name, scope=scope) self.storage = [] @@ -29,8 +29,8 @@ def value(self): class AveragingNamedMetric(AveragingMetric): """ Super simple averaging metric that just takes a value from dictionary and averages it over samples """ - def __init__(self, name): - super().__init__(name) + def __init__(self, name, scope="general"): + super().__init__(name, scope=scope) def _value_function(self, batch_info): return batch_info[self.name] @@ -38,8 +38,8 @@ def _value_function(self, batch_info): class AveragingSupervisedMetric(BaseMetric): """ Base class for metrics that simply calculate the average over the epoch """ - def __init__(self, name): - super().__init__(name) + def __init__(self, name, scope="general"): + super().__init__(name, scope=scope) self.storage = [] diff --git a/vel/metric/base_metric.py b/vel/metric/base_metric.py index d8d12523..6a64d1b2 100644 --- a/vel/metric/base_metric.py +++ b/vel/metric/base_metric.py @@ -1,11 +1,17 @@ +import collections + from vel.api import TrainingInfo +MetricKey = collections.namedtuple('MetricKey', ['dataset', 'name', 'scope']) + + class BaseMetric: """ Base class for all the metrics """ - def __init__(self, name): + def __init__(self, name, scope="general"): self.name = name + self.scope = scope def calculate(self, batch_info): """ Calculate value of a metric based on supplied data """ diff --git a/vel/metric/loss_metric.py b/vel/metric/loss_metric.py index 1e02ce4d..f4fa9df5 100644 --- a/vel/metric/loss_metric.py +++ b/vel/metric/loss_metric.py @@ -3,8 +3,8 @@ class Loss(AveragingMetric): """ Just a loss function """ - def __init__(self): - super().__init__("loss") + def __init__(self, scope="train"): + super().__init__("loss", scope=scope) def _value_function(self, batch_info): """ Just forward a value of the loss""" diff --git a/vel/metric/summing_metric.py b/vel/metric/summing_metric.py index 3d1a389e..f08ceab7 100644 --- a/vel/metric/summing_metric.py +++ b/vel/metric/summing_metric.py @@ -3,8 +3,8 @@ class SummingMetric(BaseMetric): """ Base class for metrics that simply calculate the sum over the epoch """ - def __init__(self, name, reset_value=True): - super().__init__(name) + def __init__(self, name, scope="general", reset_value=True): + super().__init__(name, scope=scope) self.reset_value = reset_value self.buffer = 0 diff --git a/vel/metric/value_metric.py b/vel/metric/value_metric.py index 309e9e57..934ecc0c 100644 --- a/vel/metric/value_metric.py +++ b/vel/metric/value_metric.py @@ -4,8 +4,8 @@ class ValueMetric(BaseMetric): """ Base class for metrics that don't have state and just calculate a simple value """ - def __init__(self, name): - super().__init__(name) + def __init__(self, name, scope="general"): + super().__init__(name, scope=scope) self._metric_value = None diff --git a/vel/storage/streaming/stdout.py b/vel/storage/streaming/stdout.py index e4bc5fc3..d83e8f9d 100644 --- a/vel/storage/streaming/stdout.py +++ b/vel/storage/streaming/stdout.py @@ -9,33 +9,32 @@ def on_epoch_end(self, epoch_info: EpochInfo): else: print(f"=>>>>>>>>>> EPOCH {epoch_info.global_epoch_idx}") - if any(':' not in x for x in epoch_info.result.keys()): - self._print_metrics_line(epoch_info.result, head=None) + if any(x.dataset is None for x in epoch_info.result.keys()): + self._print_metrics_line(epoch_info.result, dataset=None) - head_set = sorted({x.split(':')[0] + ':' for x in epoch_info.result.keys() if ':' in x}) + head_set = sorted({x.dataset for x in epoch_info.result.keys() if x.dataset is not None}) for head in head_set: - if any(x.startswith(head) for x in epoch_info.result.keys()): - self._print_metrics_line(epoch_info.result, head) + self._print_metrics_line(epoch_info.result, head) print(f"=>>>>>>>>>> DONE") @staticmethod - def _print_metrics_line(metrics, head=None): - if head is None: - head = 'Metrics:' + def _print_metrics_line(metrics, dataset=None): + if dataset is None: + dataset = 'Metrics:' metrics_list = [ - "{} {:.06f}".format(k, metrics[k]) - for k in sorted([k for k in metrics.keys() if ':' not in k]) + "{}/{} {:.06f}".format(k.scope, k.name, metrics[k]) + for k in sorted([k for k in metrics.keys() if k.dataset is None]) ] else: metrics_list = [ - "{} {:.06f}".format(k.split(':')[1], metrics[k]) - for k in sorted([k for k in metrics.keys() if k.startswith(head)]) + "{}/{} {:.06f}".format(k.scope, k.name, metrics[k]) + for k in sorted([k for k in metrics.keys() if k.dataset == dataset]) ] - print('{0: <10}'.format(head.capitalize()), " ".join(metrics_list)) + print('{0: <10}'.format(dataset.capitalize()), " ".join(metrics_list)) def create(): diff --git a/vel/storage/streaming/tensorboard.py b/vel/storage/streaming/tensorboard.py index ef90b267..f75ca570 100644 --- a/vel/storage/streaming/tensorboard.py +++ b/vel/storage/streaming/tensorboard.py @@ -1,7 +1,7 @@ import os import shutil -from vel.api import ModelConfig, Callback, TrainingInfo +from vel.api import ModelConfig, Callback, TrainingInfo, EpochInfo from torch.utils.tensorboard import SummaryWriter @@ -18,21 +18,27 @@ def on_train_begin(self, training_info: TrainingInfo) -> None: if os.path.exists(self.logdir): shutil.rmtree(self.logdir) - def on_epoch_end(self, epoch_info): + def on_epoch_end(self, epoch_info: EpochInfo): """ Push data to tensorboard on push """ - summary_writer = SummaryWriter(log_dir=self.logdir) + head_set = sorted({x.dataset for x in epoch_info.result.keys()}) - for key, value in epoch_info.result.items(): - if key == 'epoch_idx': - continue + for head in head_set: + if head is None: + summary_writer = SummaryWriter(log_dir=os.path.join(self.logdir, "generic")) + else: + summary_writer = SummaryWriter(log_dir=os.path.join(self.logdir, head)) - summary_writer.add_scalar( - tag=key, - scalar_value=value, - global_step=epoch_info.global_epoch_idx, - ) + for key, value in epoch_info.result.items(): + if key.dataset == head: + tag = '{}/{}'.format(key.scope, key.name) - summary_writer.close() + summary_writer.add_scalar( + tag=tag, + scalar_value=value, + global_step=epoch_info.global_epoch_idx, + ) + + summary_writer.close() def create(model_config): From 489a7b69623cc51fab28a3276dbcc6474a117c27 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 23 Jun 2019 21:07:19 -0700 Subject: [PATCH 057/162] Added scope for some training metrics. --- vel/model/autoencoder/mnist_cnn_vae.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vel/model/autoencoder/mnist_cnn_vae.py b/vel/model/autoencoder/mnist_cnn_vae.py index 3eb1dadd..258294ff 100644 --- a/vel/model/autoencoder/mnist_cnn_vae.py +++ b/vel/model/autoencoder/mnist_cnn_vae.py @@ -140,8 +140,8 @@ def metrics(self): """ Set of metrics for this model """ return [ Loss(), - AveragingNamedMetric('reconstruction'), - AveragingNamedMetric('kl_divergence') + AveragingNamedMetric('reconstruction', scope="train"), + AveragingNamedMetric('kl_divergence', scope="train") ] From 9f59f0908c81b423e861c18e72eae86c5b669a84 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 23 Jun 2019 21:08:51 -0700 Subject: [PATCH 058/162] Added some comment docstring. --- vel/api/model_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vel/api/model_config.py b/vel/api/model_config.py index 4426dd81..bd100eeb 100644 --- a/vel/api/model_config.py +++ b/vel/api/model_config.py @@ -34,6 +34,7 @@ def find_project_directory(start_path) -> str: @staticmethod def from_project_directory(path) -> str: + """ Locate given path relative to project directory """ return os.path.join(ModelConfig.find_project_directory('.'), path) @classmethod From c146b323facf81e925dd82627e535accaff61c51 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 23 Jun 2019 21:59:28 -0700 Subject: [PATCH 059/162] Updated CIFAR10 configs. --- .../autoencoder/mnist/mnist_cnn_vae.yaml | 3 - .../cifar10/cifar10_cnn_01.yaml | 50 ++++++----- .../cifar10/cifar10_resnetv1_110.yaml | 81 ++++++++--------- .../cifar10/cifar10_resnetv1_32.yaml | 85 ++++++++---------- .../cifar10/cifar10_resnetv2_110.yaml | 84 ++++++++--------- .../cifar10_resnetv2_164_bottleneck.yaml | 89 ++++++++----------- .../cifar10/cifar10_resnetv2_32.yaml | 84 ++++++++--------- .../cifar10/cifar10_resnext_29_c1.yaml | 88 ++++++++---------- .../cifar10/cifar10_resnext_29_c8.yaml | 89 ++++++++----------- .../classification/mnist/mnist_cnn_01.yaml | 3 - vel/api/__init__.py | 5 +- vel/api/transformation.py | 8 ++ vel/data/augmentation/normalize.py | 24 ----- vel/data/augmentation/random_crop.py | 25 +++--- .../augmentation/random_horizontal_flip.py | 12 +-- vel/data/dataflow.py | 10 ++- vel/data/loader.py | 4 +- vel/data/source/vision/cifar10.py | 52 ++++++----- vel/data/source/vision/mnist.py | 27 ------ vel/data/transformation/image_to_tensor.py | 5 ++ vel/data/transformation/normalize.py | 28 ++++++ vel/{api => train}/train_phase.py | 0 22 files changed, 389 insertions(+), 467 deletions(-) delete mode 100644 vel/data/augmentation/normalize.py create mode 100644 vel/data/transformation/normalize.py rename vel/{api => train}/train_phase.py (100%) diff --git a/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml b/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml index c5a5f9c9..74619b28 100644 --- a/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml +++ b/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml @@ -32,7 +32,4 @@ commands: train: name: vel.command.train_command epochs: 12 - log_frequency: 100 - checkpoint: - metric: 'val:loss' diff --git a/examples-configs/classification/cifar10/cifar10_cnn_01.yaml b/examples-configs/classification/cifar10/cifar10_cnn_01.yaml index cce9c77b..8ce60349 100644 --- a/examples-configs/classification/cifar10/cifar10_cnn_01.yaml +++ b/examples-configs/classification/cifar10/cifar10_cnn_01.yaml @@ -2,36 +2,46 @@ name: 'cifar10_cnn_01' model: - name: vel.models.vision.cifar10_cnn_01 + name: vel.model.vision.cifar10_cnn_01 img_rows: 32 img_cols: 32 img_channels: 3 num_classes: 10 + source: - name: vel.sources.vision.cifar10 -# batch_size: 128 - batch_size: 32 + name: vel.data.source.vision.cifar10 + + +loader: + name: vel.data.loader + batch_size: 128 num_workers: 4 - normalize: True - augmentations: - - name: vel.augmentations.random_crop - mode: x - tags: ["train"] - width: 32 - height: 32 - padding: 4 - padding_mode: 'reflect' + transformations: + - name: vel.data.transformation.to_array + - name: vel.data.transformation.normalize + - name: vel.data.augmentation.random_crop + tags: ["train"] + width: 32 + height: 32 + padding: 4 + padding_mode: 'reflect' + - name: vel.data.augmentation.random_horizontal_flip + tags: ["train"] + - name: vel.data.transformation.image_to_tensor + + +optimizer: + name: vel.optimizer.adadelta - - name: vel.augmentations.random_horizontal_flip - mode: x - tags: ["train"] commands: train: - name: vel.commands.train_command + name: vel.command.train_command epochs: 12 - log_frequency: 20 - optimizer: - name: vel.optimizers.adadelta + + augvis: + name: vel.command.augvis_command + samples: 3 + cases: 3 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv1_110.yaml b/examples-configs/classification/cifar10/cifar10_resnetv1_110.yaml index a34c34a5..fc01f757 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv1_110.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv1_110.yaml @@ -2,32 +2,51 @@ name: 'cifar10_resnetv1_110' model: - name: vel.models.vision.cifar_resnet_v1 + name: vel.model.vision.cifar_resnet_v1 img_channels: 3 num_classes: 10 mode: basic # Basic or bottleneck blocks: [18, 18, 18] # ResNet110 + source: - name: vel.sources.vision.cifar10 + name: vel.data.source.vision.cifar10 + + +loader: + name: vel.data.loader batch_size: 128 -# batch_size: 32 num_workers: 4 - normalize: True - augmentations: - - name: vel.augmentations.random_crop - mode: x - tags: ["train"] - width: 32 - height: 32 - padding: 4 - padding_mode: 'reflect' + transformations: + - name: vel.data.transformation.to_array + - name: vel.data.transformation.normalize + - name: vel.data.augmentation.random_crop + tags: ["train"] + width: 32 + height: 32 + padding: 4 + padding_mode: 'reflect' + - name: vel.data.augmentation.random_horizontal_flip + tags: ["train"] + - name: vel.data.transformation.image_to_tensor + + +optimizer: + name: vel.optimizers.sgd + lr: 0.1 + weight_decay: 0.0001 + momentum: 0.9 + - - name: vel.augmentations.random_horizontal_flip - mode: x - tags: ["train"] +scheduler: + name: vel.scheduler.ladder + ladder: + - [5, 0.1] # Special iteration to kickstart convergence + - [75, 1.0] + - [40, 0.1] + - [80, 0.01] commands: @@ -36,38 +55,8 @@ commands: epochs: 200 log_frequency: 100 - optimizer: - name: vel.optimizers.sgd - lr: 0.1 - weight_decay: 0.0001 - momentum: 0.9 - -# optimizer: -# name: vel.optimizers.adam -# lr: 0.001 -# weight_decay: 0.001 - -# scheduler: -# name: vel.scheduler.multi_step -# milestones: [20, 50, 100, 150] -# gamma: 0.33 - - scheduler: - name: vel.scheduler.ladder - ladder: - - [5, 0.1] # Special iteration to kickstart convergence - - [75, 1.0] - - [40, 0.1] - - [80, 0.01] - -# scheduler: -# name: vel.scheduler.reduce_lr_on_plateau -# metric_name: val:accuracy -# factor: 0.33 -# patience: 15 -# cooldown: 20 -# min_lr: 0.5e-6 augvis: name: vel.commands.augvis_command samples: 5 + cases: 3 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv1_32.yaml b/examples-configs/classification/cifar10/cifar10_resnetv1_32.yaml index d85ce5bf..f04e40fc 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv1_32.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv1_32.yaml @@ -2,74 +2,59 @@ name: 'cifar10_resnetv1_32' model: - name: vel.models.vision.cifar_resnet_v1 + name: vel.model.vision.cifar_resnet_v1 img_channels: 3 num_classes: 10 mode: basic # Basic or bottleneck blocks: [5, 5, 5] # ResNet32 + source: - name: vel.sources.vision.cifar10 + name: vel.data.source.vision.cifar10 + + +loader: + name: vel.data.loader batch_size: 128 -# batch_size: 32 num_workers: 4 - normalize: True - augmentations: - - name: vel.augmentations.random_crop - mode: x - tags: ["train"] - width: 32 - height: 32 - padding: 4 - padding_mode: 'reflect' + transformations: + - name: vel.data.transformation.to_array + - name: vel.data.transformation.normalize + - name: vel.data.augmentation.random_crop + tags: ["train"] + width: 32 + height: 32 + padding: 4 + padding_mode: 'reflect' + - name: vel.data.augmentation.random_horizontal_flip + tags: ["train"] + - name: vel.data.transformation.image_to_tensor - - name: vel.augmentations.random_horizontal_flip - mode: x - tags: ["train"] +optimizer: + name: vel.optimizer.sgd + lr: 0.1 + weight_decay: 0.0001 + momentum: 0.9 + + +scheduler: + name: vel.scheduler.ladder + ladder: + - [80, 1.0] + - [40, 0.1] + - [80, 0.01] commands: train: - name: vel.commands.train_command + name: vel.command.train_command epochs: 200 - log_frequency: 100 - - - optimizer: - name: vel.optimizers.sgd - lr: 0.1 - weight_decay: 0.0001 - momentum: 0.9 - -# optimizer: -# name: vel.optimizers.adam -# lr: 0.001 -# weight_decay: 0.001 - -# scheduler: -# name: vel.scheduler.multi_step -# milestones: [20, 50, 100, 150] -# gamma: 0.33 - - scheduler: - name: vel.scheduler.ladder - ladder: - - [80, 1.0] - - [40, 0.1] - - [80, 0.01] - -# scheduler: -# name: vel.scheduler.reduce_lr_on_plateau -# metric_name: val:accuracy -# factor: 0.33 -# patience: 15 -# cooldown: 20 -# min_lr: 0.5e-6 + augvis: - name: vel.commands.augvis_command + name: vel.command.augvis_command samples: 5 cases: 3 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv2_110.yaml b/examples-configs/classification/cifar10/cifar10_resnetv2_110.yaml index b9998cdf..9291ee8d 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv2_110.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv2_110.yaml @@ -2,7 +2,7 @@ name: 'cifar10_resnetv2_110' model: - name: vel.models.vision.cifar_resnet_v2 + name: vel.model.vision.cifar_resnet_v2 img_channels: 3 num_classes: 10 @@ -10,63 +10,51 @@ model: blocks: [18, 18, 18] # ResNet110 source: - name: vel.sources.vision.cifar10 + name: vel.data.source.vision.cifar10 + + +loader: + name: vel.data.loader batch_size: 128 -# batch_size: 32 num_workers: 4 - normalize: True - augmentations: - - name: vel.augmentations.random_horizontal_flip - mode: x - tags: ["train"] - - name: vel.augmentations.random_crop - mode: x - tags: ["train"] - width: 32 - height: 32 - padding: 4 - padding_mode: 'reflect' + transformations: + - name: vel.data.transformation.to_array + - name: vel.data.transformation.normalize + - name: vel.data.augmentation.random_crop + tags: ["train"] + width: 32 + height: 32 + padding: 4 + padding_mode: 'reflect' + - name: vel.data.augmentation.random_horizontal_flip + tags: ["train"] + - name: vel.data.transformation.image_to_tensor -commands: - train: - name: vel.commands.train_command - epochs: 200 - log_frequency: 100 +optimizer: + name: vel.optimizer.sgd + lr: 0.1 + weight_decay: 0.0001 + momentum: 0.9 - optimizer: - name: vel.optimizers.sgd - lr: 0.1 - weight_decay: 0.0001 - momentum: 0.9 -# optimizer: -# name: vel.optimizers.adam -# lr: 0.001 -# weight_decay: 0.001 +scheduler: + name: vel.scheduler.ladder + ladder: + - [5, 0.1] # Special iteration to kickstart convergence + - [75, 1.0] + - [40, 0.1] + - [80, 0.01] -# scheduler: -# name: vel.scheduler.multi_step -# milestones: [20, 50, 100, 150] -# gamma: 0.33 - scheduler: - name: vel.scheduler.ladder - ladder: - - [5, 0.1] # Special iteration to kickstart convergence - - [75, 1.0] - - [40, 0.1] - - [80, 0.01] +commands: + train: + name: vel.command.train_command + epochs: 200 -# scheduler: -# name: vel.scheduler.reduce_lr_on_plateau -# metric_name: val:accuracy -# factor: 0.33 -# patience: 15 -# cooldown: 20 -# min_lr: 0.5e-6 augvis: - name: vel.commands.augvis_command + name: vel.command.augvis_command samples: 5 + cases: 3 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml b/examples-configs/classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml index fb01e547..86d5dbaa 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml @@ -2,7 +2,7 @@ name: 'cifar10_resnetv2_110' model: - name: vel.models.vision.cifar_resnet_v2 + name: vel.model.vision.cifar_resnet_v2 img_channels: 3 num_classes: 10 @@ -10,64 +10,53 @@ model: mode: bottleneck # Basic or bottleneck blocks: [18, 18, 18] # ResNet110 + source: - name: vel.sources.vision.cifar10 + name: vel.data.source.vision.cifar10 + + +loader: + name: vel.data.loader batch_size: 128 -# batch_size: 32 num_workers: 4 - normalize: True - augmentations: - - name: vel.augmentations.random_horizontal_flip - mode: x - tags: ["train"] - - name: vel.augmentations.random_crop - mode: x - tags: ["train"] - width: 32 - height: 32 - padding: 4 - padding_mode: 'reflect' + transformations: + - name: vel.data.transformation.to_array + - name: vel.data.transformation.normalize + - name: vel.data.augmentation.random_crop + tags: ["train"] + width: 32 + height: 32 + padding: 4 + padding_mode: 'reflect' + - name: vel.data.augmentation.random_horizontal_flip + tags: ["train"] + - name: vel.data.transformation.image_to_tensor + + +optimizer: + name: vel.optimizer.sgd + lr: 0.1 + weight_decay: 0.0001 + momentum: 0.9 + + +scheduler: + name: vel.scheduler.ladder + ladder: + - [5, 0.1] # Special iteration to kickstart convergence + - [75, 1.0] + - [40, 0.1] + - [80, 0.01] commands: train: - name: vel.commands.train_command + name: vel.command.train_command epochs: 200 - log_frequency: 100 - - optimizer: - name: vel.optimizers.sgd - lr: 0.1 - weight_decay: 0.0001 - momentum: 0.9 - -# optimizer: -# name: vel.optimizers.adam -# lr: 0.001 -# weight_decay: 0.001 - -# scheduler: -# name: vel.scheduler.multi_step -# milestones: [20, 50, 100, 150] -# gamma: 0.33 - - scheduler: - name: vel.scheduler.ladder - ladder: - - [5, 0.1] # Special iteration to kickstart convergence - - [75, 1.0] - - [40, 0.1] - - [80, 0.01] - -# scheduler: -# name: vel.scheduler.reduce_lr_on_plateau -# metric_name: val:accuracy -# factor: 0.33 -# patience: 15 -# cooldown: 20 -# min_lr: 0.5e-6 + augvis: - name: vel.commands.augvis_command + name: vel.command.augvis_command samples: 5 + cases: 3 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv2_32.yaml b/examples-configs/classification/cifar10/cifar10_resnetv2_32.yaml index 4e0684a4..7f38b3fb 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv2_32.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv2_32.yaml @@ -2,7 +2,7 @@ name: 'cifar10_resnetv2_110' model: - name: vel.models.vision.cifar_resnet_v2 + name: vel.model.vision.cifar_resnet_v2 img_channels: 3 num_classes: 10 @@ -10,63 +10,51 @@ model: blocks: [5, 5, 5] # ResNet32 source: - name: vel.sources.vision.cifar10 + name: vel.data.source.vision.cifar10 + + +loader: + name: vel.data.loader batch_size: 128 -# batch_size: 32 num_workers: 4 - normalize: True - augmentations: - - name: vel.augmentations.random_horizontal_flip - mode: x - tags: ["train"] - - name: vel.augmentations.random_crop - mode: x - tags: ["train"] - width: 32 - height: 32 - padding: 4 - padding_mode: 'reflect' + transformations: + - name: vel.data.transformation.to_array + - name: vel.data.transformation.normalize + - name: vel.data.augmentation.random_crop + tags: ["train"] + width: 32 + height: 32 + padding: 4 + padding_mode: 'reflect' + - name: vel.data.augmentation.random_horizontal_flip + tags: ["train"] + - name: vel.data.transformation.image_to_tensor -commands: - train: - name: vel.commands.train_command - epochs: 200 - log_frequency: 100 +optimizer: + name: vel.optimizer.sgd + lr: 0.1 + weight_decay: 0.0001 + momentum: 0.9 - optimizer: - name: vel.optimizers.sgd - lr: 0.1 - weight_decay: 0.0001 - momentum: 0.9 -# optimizer: -# name: vel.optimizers.adam -# lr: 0.001 -# weight_decay: 0.001 +scheduler: + name: vel.scheduler.ladder + ladder: + - [5, 0.1] # Special iteration to kickstart convergence + - [75, 1.0] + - [40, 0.1] + - [80, 0.01] -# scheduler: -# name: vel.scheduler.multi_step -# milestones: [20, 50, 100, 150] -# gamma: 0.33 - scheduler: - name: vel.scheduler.ladder - ladder: - - [5, 0.1] # Special iteration to kickstart convergence - - [75, 1.0] - - [40, 0.1] - - [80, 0.01] +commands: + train: + name: vel.command.train_command + epochs: 200 -# scheduler: -# name: vel.scheduler.reduce_lr_on_plateau -# metric_name: val:accuracy -# factor: 0.33 -# patience: 15 -# cooldown: 20 -# min_lr: 0.5e-6 augvis: - name: vel.commands.augvis_command + name: vel.command.augvis_command samples: 5 + cases: 3 diff --git a/examples-configs/classification/cifar10/cifar10_resnext_29_c1.yaml b/examples-configs/classification/cifar10/cifar10_resnext_29_c1.yaml index 88099c4e..eabd968a 100644 --- a/examples-configs/classification/cifar10/cifar10_resnext_29_c1.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnext_29_c1.yaml @@ -2,7 +2,7 @@ name: 'cifar10_resnetv1_32' model: - name: vel.models.vision.cifar_resnext + name: vel.model.vision.cifar_resnext img_channels: 3 num_classes: 10 @@ -13,64 +13,52 @@ model: divisor: 4 cardinality: 1 + source: - name: vel.sources.vision.cifar10 + name: vel.data.source.vision.cifar10 + + +loader: + name: vel.data.loader batch_size: 128 -# batch_size: 32 num_workers: 4 - normalize: True - augmentations: - - name: vel.augmentations.random_horizontal_flip - mode: x - tags: ["train"] - - name: vel.augmentations.random_crop - mode: x - tags: ["train"] - width: 32 - height: 32 - padding: 4 - padding_mode: 'reflect' + transformations: + - name: vel.data.transformation.to_array + - name: vel.data.transformation.normalize + - name: vel.data.augmentation.random_crop + tags: ["train"] + width: 32 + height: 32 + padding: 4 + padding_mode: 'reflect' + - name: vel.data.augmentation.random_horizontal_flip + tags: ["train"] + - name: vel.data.transformation.image_to_tensor + + +optimizer: + name: vel.optimizer.sgd + lr: 0.1 + weight_decay: 0.0001 + momentum: 0.9 + + +scheduler: + name: vel.scheduler.ladder + ladder: + - [150, 1.0] + - [75, 0.1] + - [75, 0.01] commands: train: - name: vel.commands.train_command + name: vel.command.train_command epochs: 300 - log_frequency: 100 - - - optimizer: - name: vel.optimizers.sgd - lr: 0.1 - weight_decay: 0.0001 - momentum: 0.9 - -# optimizer: -# name: vel.optimizers.adam -# lr: 0.001 -# weight_decay: 0.001 - -# scheduler: -# name: vel.scheduler.multi_step -# milestones: [20, 50, 100, 150] -# gamma: 0.33 - - scheduler: - name: vel.scheduler.ladder - ladder: - - [150, 1.0] - - [75, 0.1] - - [75, 0.01] - -# scheduler: -# name: vel.scheduler.reduce_lr_on_plateau -# metric_name: val:accuracy -# factor: 0.33 -# patience: 15 -# cooldown: 20 -# min_lr: 0.5e-6 + augvis: - name: vel.commands.augvis_command + name: vel.command.augvis_command samples: 5 + cases: 3 diff --git a/examples-configs/classification/cifar10/cifar10_resnext_29_c8.yaml b/examples-configs/classification/cifar10/cifar10_resnext_29_c8.yaml index 23dcb949..9e41c5aa 100644 --- a/examples-configs/classification/cifar10/cifar10_resnext_29_c8.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnext_29_c8.yaml @@ -2,7 +2,7 @@ name: 'cifar10_resnetv1_32' model: - name: vel.models.vision.cifar_resnext + name: vel.model.vision.cifar_resnext img_channels: 3 num_classes: 10 @@ -13,65 +13,52 @@ model: divisor: 4 cardinality: 8 + source: - name: vel.sources.vision.cifar10 - batch_size: 32 -# batch_size: 32 + name: vel.data.source.vision.cifar10 + + +loader: + name: vel.data.loader + batch_size: 128 num_workers: 4 - normalize: True - augmentations: - - name: vel.augmentations.random_horizontal_flip - mode: x - tags: ["train"] - - name: vel.augmentations.random_crop - mode: x - tags: ["train"] - width: 32 - height: 32 - padding: 4 - padding_mode: 'reflect' + transformations: + - name: vel.data.transformation.to_array + - name: vel.data.transformation.normalize + - name: vel.data.augmentation.random_crop + tags: ["train"] + width: 32 + height: 32 + padding: 4 + padding_mode: 'reflect' + - name: vel.data.augmentation.random_horizontal_flip + tags: ["train"] + - name: vel.data.transformation.image_to_tensor + +optimizer: + name: vel.optimizer.sgd + lr: 0.1 + weight_decay: 0.0001 + momentum: 0.9 + + +scheduler: + name: vel.scheduler.ladder + ladder: + - [150, 1.0] + - [75, 0.1] + - [75, 0.01] commands: train: - name: vel.commands.train_command + name: vel.command.train_command epochs: 300 - log_frequency: 100 - - - optimizer: - name: vel.optimizers.sgd - lr: 0.1 - weight_decay: 0.0001 - momentum: 0.9 - -# optimizer: -# name: vel.optimizers.adam -# lr: 0.001 -# weight_decay: 0.001 - -# scheduler: -# name: vel.scheduler.multi_step -# milestones: [20, 50, 100, 150] -# gamma: 0.33 - - scheduler: - name: vel.scheduler.ladder - ladder: - - [150, 1.0] - - [75, 0.1] - - [75, 0.01] - -# scheduler: -# name: vel.scheduler.reduce_lr_on_plateau -# metric_name: val:accuracy -# factor: 0.33 -# patience: 15 -# cooldown: 20 -# min_lr: 0.5e-6 + augvis: - name: vel.commands.augvis_command + name: vel.command.augvis_command samples: 5 + cases: 3 diff --git a/examples-configs/classification/mnist/mnist_cnn_01.yaml b/examples-configs/classification/mnist/mnist_cnn_01.yaml index 6e06faea..e58d5dd9 100644 --- a/examples-configs/classification/mnist/mnist_cnn_01.yaml +++ b/examples-configs/classification/mnist/mnist_cnn_01.yaml @@ -30,10 +30,7 @@ commands: train: name: vel.command.train_command epochs: 12 - log_frequency: 100 - checkpoint: - metric: 'val:loss' augvis: name: vel.command.augvis_command diff --git a/vel/api/__init__.py b/vel/api/__init__.py index 1e5c9b65..89d2026f 100644 --- a/vel/api/__init__.py +++ b/vel/api/__init__.py @@ -1,14 +1,13 @@ -from .transformation import Transformation from .callback import Callback from .info import BatchInfo, EpochInfo, TrainingInfo from .model import ( Model, GradientModel, LossFunctionModel, BackboneModel, LinearBackboneModel ) +from .model_config import ModelConfig from .model_factory import ModelFactory from .optimizer import OptimizerFactory from .schedule import Schedule from .scheduler import SchedulerFactory from .source import Source from .storage import Storage -from .train_phase import TrainPhase, EmptyTrainPhase -from .model_config import ModelConfig +from .transformation import Transformation, ScopedTransformation diff --git a/vel/api/transformation.py b/vel/api/transformation.py index 80ee433e..9f84e9de 100644 --- a/vel/api/transformation.py +++ b/vel/api/transformation.py @@ -3,6 +3,10 @@ class Transformation: def __init__(self, tags=None): self.tags = ['train', 'val', 'test'] if tags is None else tags + def initialize(self, source): + """ Initialize transformation from source """ + pass + def __call__(self, datapoint): """ Do the transformation """ raise NotImplementedError @@ -20,6 +24,10 @@ def __init__(self, scope=None, tags=None): self.scope = ['x'] if scope is None else scope + # If there is only one, we wrap it as a list + if isinstance(self.scope, str): + self.scope = [self.scope] + def transform(self, value): """ Actual transformation code """ raise NotImplementedError diff --git a/vel/data/augmentation/normalize.py b/vel/data/augmentation/normalize.py deleted file mode 100644 index d67a2c6a..00000000 --- a/vel/data/augmentation/normalize.py +++ /dev/null @@ -1,24 +0,0 @@ -import numpy as np - -import vel.api as api - - -class Normalize(api.Transformation): - """ Normalize input mean and standard deviation """ - - def __init__(self, mean, std, mode='x', tags=None): - super().__init__(mode, tags) - self.mean = np.array(mean, dtype=np.float32) - self.std = np.array(std, dtype=np.float32) - - def __call__(self, x_data): - return (x_data - self.mean) / self.std - - def denormalize(self, x_data): - """ Operation reverse to normalization """ - return x_data * self.std + self.mean - - -def create(mean, std, mode='x', tags=None): - """ Vel factory function """ - return Normalize(mean=mean, std=std, mode=mode, tags=tags) diff --git a/vel/data/augmentation/random_crop.py b/vel/data/augmentation/random_crop.py index bbc56dec..ffdf89c1 100644 --- a/vel/data/augmentation/random_crop.py +++ b/vel/data/augmentation/random_crop.py @@ -6,10 +6,11 @@ import numbers import random -import vel.data as data +import vel.api as api +import vel.data.operation.image_op as image_op -class RandomCrop(data.Augmentation): +class RandomCrop(api.ScopedTransformation): """Crop the given PIL Image at a random location. Args: @@ -24,8 +25,8 @@ class RandomCrop(data.Augmentation): desired size to avoid raising an exception. """ - def __init__(self, size, padding=0, padding_mode='constant', pad_if_needed=False, mode='x', tags=None): - super().__init__(mode, tags) + def __init__(self, size, padding=0, padding_mode='constant', pad_if_needed=False, scope='x', tags=None): + super().__init__(scope, tags) if isinstance(size, numbers.Number): self.size = (int(size), int(size)) @@ -34,7 +35,7 @@ def __init__(self, size, padding=0, padding_mode='constant', pad_if_needed=False self.padding = padding self.padding_mode = padding_mode - self.padding_mode_cv = data.mode_to_cv2(self.padding_mode) + self.padding_mode_cv = image_op.mode_to_cv2(self.padding_mode) self.pad_if_needed = pad_if_needed @staticmethod @@ -58,7 +59,7 @@ def get_params(img, output_size): j = random.randint(0, w - tw) return i, j, th, tw - def __call__(self, img): + def transform(self, img): """ Args: img (PIL Image): Image to be cropped. @@ -67,24 +68,24 @@ def __call__(self, img): PIL Image: Cropped image. """ if self.padding > 0: - img = data.pad(img, self.padding, mode=self.padding_mode_cv) + img = image_op.pad(img, self.padding, mode=self.padding_mode_cv) # pad the width if needed if self.pad_if_needed and img.size[0] < self.size[1]: - img = data.pad(img, (int((1 + self.size[1] - img.size[0]) / 2), 0), mode=self.padding_mode_cv) + img = image_op.pad(img, (int((1 + self.size[1] - img.size[0]) / 2), 0), mode=self.padding_mode_cv) # pad the height if needed if self.pad_if_needed and img.size[1] < self.size[0]: - img = data.pad(img, (0, int((1 + self.size[0] - img.size[1]) / 2)), mode=self.padding_mode_cv) + img = image_op.pad(img, (0, int((1 + self.size[0] - img.size[1]) / 2)), mode=self.padding_mode_cv) i, j, h, w = self.get_params(img, self.size) - return data.crop(img, j, i, w, h) + return image_op.crop(img, j, i, w, h) def __repr__(self): return self.__class__.__name__ + '(size={0}, padding={1})'.format(self.size, self.padding) -def create(width, height, padding=0, padding_mode='constant', mode='x', tags=None): +def create(width, height, padding=0, padding_mode='constant', scope='x', tags=None): """ Vel factory function """ - return RandomCrop(size=(width, height), padding=padding, padding_mode=padding_mode, mode=mode, tags=tags) + return RandomCrop(size=(width, height), padding=padding, padding_mode=padding_mode, scope=scope, tags=tags) diff --git a/vel/data/augmentation/random_horizontal_flip.py b/vel/data/augmentation/random_horizontal_flip.py index ba397519..b50caa8f 100644 --- a/vel/data/augmentation/random_horizontal_flip.py +++ b/vel/data/augmentation/random_horizontal_flip.py @@ -4,14 +4,14 @@ import vel.api as api -class RandomHorizontalFlip(api.Transformation): +class RandomHorizontalFlip(api.ScopedTransformation): """ Apply a horizontal flip randomly to input images """ - def __init__(self, p=0.5, mode='x', tags=None): - super().__init__(mode, tags) + def __init__(self, p=0.5, scope='x', tags=None): + super().__init__(scope, tags) self.p = p - def __call__(self, img): + def transform(self, img): """ Args: img (PIL Image): Image to be flipped. @@ -27,5 +27,5 @@ def __repr__(self): return self.__class__.__name__ + '(p={})'.format(self.p) -def create(p=0.5): - return RandomHorizontalFlip(p) +def create(p=0.5, scope='x', tags=None): + return RandomHorizontalFlip(p, scope=scope, tags=tags) diff --git a/vel/data/dataflow.py b/vel/data/dataflow.py index ae4b28ad..4c28a96d 100644 --- a/vel/data/dataflow.py +++ b/vel/data/dataflow.py @@ -17,6 +17,10 @@ class DataFlow(data.Dataset): @staticmethod def transform(source: Source, transformations: typing.List[Transformation]) -> Source: """ Transform supplied source with a list of given transformations """ + # Initialize transformations from source + for t in transformations: + t.initialize(source) + return Source( train=DataFlow(source.train, transformations, 'train'), validation=DataFlow(source.validation, transformations, 'val'), @@ -25,18 +29,19 @@ def transform(source: Source, transformations: typing.List[Transformation]) -> S def __init__(self, dataset, transformations, tag): self.dataset = dataset + self.tag = tag if transformations is None: self.transformations = [] else: self.transformations = [t for t in transformations if tag in t.tags] - self.tag = tag - def get_raw(self, index): + """ Get raw data point """ return pre_map(self.dataset[index]) def __getitem__(self, index): + """ Get data point from the dataset """ datapoint = self.get_raw(index) for t in self.transformations: @@ -52,4 +57,5 @@ def denormalize(self, datapoint): return datapoint def __len__(self): + """ Length of the dataset """ return len(self.dataset) diff --git a/vel/data/loader.py b/vel/data/loader.py index 4e98742f..a75a3453 100644 --- a/vel/data/loader.py +++ b/vel/data/loader.py @@ -54,14 +54,16 @@ def __getitem__(self, item): @property def loader(self): + """ Get a dict of loaders """ return self._loaders @property def size(self): + """ Get a dict of sizes of each loader """ return self._loader_sizes -def create(source: Source, batch_size: int, num_workers: int=0, transformations: typing.Optional[list] = None): +def create(source: Source, batch_size: int, num_workers: int = 0, transformations: typing.Optional[list] = None): """ Vel factory function """ return Loader( source=source, diff --git a/vel/data/source/vision/cifar10.py b/vel/data/source/vision/cifar10.py index 53ec1080..17641b59 100644 --- a/vel/data/source/vision/cifar10.py +++ b/vel/data/source/vision/cifar10.py @@ -1,13 +1,9 @@ from torchvision import datasets -from vel.api import SupervisedTrainingData +from vel.api import Source -from vel.augmentation.normalize import Normalize -from vel.augmentation.to_tensor import ToTensor -from vel.augmentation.to_array import ToArray - -def create(model_config, batch_size, normalize=True, num_workers=0, augmentations=None): +def create(model_config): """ Create a CIFAR10 dataset, normalized. Augmentations are the same as in the literature benchmarking CIFAR performance. @@ -17,21 +13,31 @@ def create(model_config, batch_size, normalize=True, num_workers=0, augmentation train_dataset = datasets.CIFAR10(path, train=True, download=True) test_dataset = datasets.CIFAR10(path, train=False, download=True) - augmentations = [ToArray()] + (augmentations if augmentations is not None else []) - - if normalize: - train_data = train_dataset.data - mean_value = (train_data / 255).mean(axis=(0, 1, 2)) - std_value = (train_data / 255).std(axis=(0, 1, 2)) - - augmentations.append(Normalize(mean=mean_value, std=std_value, tags=['train', 'val'])) - - augmentations.append(ToTensor()) - - return SupervisedTrainingData( - train_dataset, - test_dataset, - batch_size=batch_size, - num_workers=num_workers, - augmentations=augmentations + train_data = train_dataset.data + mean_value = (train_data / 255).mean(axis=(0, 1, 2)) + std_value = (train_data / 255).std(axis=(0, 1, 2)) + + return Source( + train=train_dataset, + validation=test_dataset, + metadata={ + 'train_mean': mean_value, + 'train_std': std_value + } ) + + # augmentations = [ToArray()] + (augmentations if augmentations is not None else []) + + # if normalize: + # + # augmentations.append(Normalize(mean=mean_value, std=std_value, tags=['train', 'val'])) + # + # augmentations.append(ToTensor()) + # + # return SupervisedTrainingData( + # train_dataset, + # test_dataset, + # batch_size=batch_size, + # num_workers=num_workers, + # augmentations=augmentations + # ) diff --git a/vel/data/source/vision/mnist.py b/vel/data/source/vision/mnist.py index 8ab6b49e..16640cac 100644 --- a/vel/data/source/vision/mnist.py +++ b/vel/data/source/vision/mnist.py @@ -3,7 +3,6 @@ from vel.api import Source - def create(model_config): """ Create a MNIST dataset, normalized """ path = model_config.data_dir('mnist') @@ -23,29 +22,3 @@ def create(model_config): 'train_std': std_value } ) - -# from vel.api import SupervisedTrainingData -# -# from vel.augmentations.normalize import Normalize -# from vel.augmentations.to_tensor import ToTensor -# from vel.augmentations.to_array import ToArray -# from vel.augmentations.unsupervised import Unsupervised - - # augmentations = [ToArray()] + (augmentations if augmentations is not None else []) - # - # if normalize: - # - # augmentations.append(Normalize(mean=mean_value, std=std_value, tags=['train', 'val'])) - # - # augmentations.append(ToTensor()) - # - # if unsupervised: - # augmentations.append(Unsupervised()) - # - # return SupervisedTrainingData( - # train_dataset, - # test_dataset, - # num_workers=num_workers, - # batch_size=batch_size, - # augmentations=augmentations - # ) diff --git a/vel/data/transformation/image_to_tensor.py b/vel/data/transformation/image_to_tensor.py index 75eebc32..aae7b486 100644 --- a/vel/data/transformation/image_to_tensor.py +++ b/vel/data/transformation/image_to_tensor.py @@ -11,6 +11,11 @@ def transform(self, value): # First let's make sure it's actually a numpy array value = np.asarray(value) + if value.dtype == np.uint8: + value = value.astype(np.float32) / 255.0 + + value = value.astype(np.float32) + if len(value.shape) == 2: # If the image has only one channel, it still needs to be specified value = value.reshape(value.shape[0], value.shape[1], 1) diff --git a/vel/data/transformation/normalize.py b/vel/data/transformation/normalize.py new file mode 100644 index 00000000..ccfa5064 --- /dev/null +++ b/vel/data/transformation/normalize.py @@ -0,0 +1,28 @@ +import vel.api as api + + +class Normalize(api.ScopedTransformation): + """ Normalize input mean and standard deviation """ + + def __init__(self, scope='x', tags=None): + super().__init__(scope, tags) + + self.mean = None + self.std = None + + def initialize(self, source): + """ Initialize transformation from source """ + self.mean = source.metadata['train_mean'] + self.std = source.metadata['train_std'] + + def transform(self, value): + return (value - self.mean) / self.std + + def denormalization_transform(self, value): + """ Operation reverse to normalization """ + return value * self.std + self.mean + + +def create(mode='x', tags=None): + """ Vel factory function """ + return Normalize(scope=mode, tags=tags) diff --git a/vel/api/train_phase.py b/vel/train/train_phase.py similarity index 100% rename from vel/api/train_phase.py rename to vel/train/train_phase.py From 0d4445253b748423639623ee9b7e8ce565ff0be9 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 23 Jun 2019 22:28:51 -0700 Subject: [PATCH 060/162] A bit more work on unifying metrics. Adding samples/sec metric. --- vel/api/callback.py | 18 +++---------- vel/api/info.py | 21 +++------------ vel/callback/sample_tracker.py | 26 +++++++++++++++++++ vel/callback/time_tracker.py | 3 ++- vel/command/train_command.py | 6 +++-- vel/data/dataflow.py | 4 +++ vel/metric/__init__.py | 6 ++--- vel/metric/accuracy.py | 2 +- vel/metric/base/__init__.py | 0 vel/metric/{ => base}/averaging_metric.py | 0 vel/metric/{ => base}/base_metric.py | 0 vel/metric/{ => base}/summing_metric.py | 0 vel/metric/{ => base}/value_metric.py | 0 vel/metric/loss_metric.py | 2 +- vel/metric/samples_per_sec.py | 11 ++++++++ ...fered_mixed_policy_iteration_reinforcer.py | 4 +-- ...uffered_off_policy_iteration_reinforcer.py | 4 +-- .../on_policy_iteration_reinforcer.py | 4 +-- vel/storage/streaming/visdom.py | 2 +- vel/train/trainer.py | 19 ++++++++------ 20 files changed, 78 insertions(+), 54 deletions(-) create mode 100644 vel/metric/base/__init__.py rename vel/metric/{ => base}/averaging_metric.py (100%) rename vel/metric/{ => base}/base_metric.py (100%) rename vel/metric/{ => base}/summing_metric.py (100%) rename vel/metric/{ => base}/value_metric.py (100%) create mode 100644 vel/metric/samples_per_sec.py diff --git a/vel/api/callback.py b/vel/api/callback.py index 6c28ad10..6a4e7819 100644 --- a/vel/api/callback.py +++ b/vel/api/callback.py @@ -1,3 +1,5 @@ +import typing + from .info import EpochInfo, BatchInfo, TrainingInfo @@ -38,30 +40,18 @@ def on_epoch_end(self, epoch_info: EpochInfo) -> None: """ pass - def on_batch_begin(self, batch_info: BatchInfo) -> None: + def on_batch_begin(self, batch_info: BatchInfo, dataset: typing.Optional[str] = None) -> None: """ Runs for each batch before batch is evaluated """ pass - def on_batch_end(self, batch_info: BatchInfo) -> None: + def on_batch_end(self, batch_info: BatchInfo, dataset: typing.Optional[str] = None) -> None: """ Runs for each batch after batch is evaluated """ pass - def on_validation_batch_begin(self, batch_info: BatchInfo) -> None: - """ - Supervised learning only - runs before validation batch - """ - pass - - def on_validation_batch_end(self, batch_info: BatchInfo) -> None: - """ - Supervised learning only - runs after validation batch - """ - pass - def write_state_dict(self, training_info: TrainingInfo, hidden_state_dict: dict) -> None: """ Persist callback state to the state dictionary diff --git a/vel/api/info.py b/vel/api/info.py index a84d1873..4e5957d4 100644 --- a/vel/api/info.py +++ b/vel/api/info.py @@ -254,28 +254,15 @@ def __init__(self, epoch_info: EpochInfo, batch_number: int): self.batch_number = batch_number self.data_dict = {} - def on_batch_begin(self): + def on_batch_begin(self, dataset=None): """ Initialize batch processing """ for callback in self.callbacks: - callback.on_batch_begin(self) + callback.on_batch_begin(self, dataset) - def on_batch_end(self): + def on_batch_end(self, dataset=None): """ Finalize batch processing """ for callback in self.callbacks: - callback.on_batch_end(self) - - # Even with all the experience replay, we count the single rollout as a single batch - self.epoch_info.result_accumulator.calculate(self) - - def on_validation_batch_begin(self): - """ Initialize batch processing """ - for callback in self.callbacks: - callback.on_validation_batch_begin(self) - - def on_validation_batch_end(self): - """ Finalize batch processing """ - for callback in self.callbacks: - callback.on_validation_batch_end(self) + callback.on_batch_end(self, dataset) # Even with all the experience replay, we count the single rollout as a single batch self.epoch_info.result_accumulator.calculate(self) diff --git a/vel/callback/sample_tracker.py b/vel/callback/sample_tracker.py index e69de29b..a1c9d789 100644 --- a/vel/callback/sample_tracker.py +++ b/vel/callback/sample_tracker.py @@ -0,0 +1,26 @@ +import typing +import collections + +from vel.api import BatchInfo, TrainingInfo, Callback + + +class SampleTracker(Callback): + """ Callback that calculates number of samples processed during the training process """ + + def on_initialization(self, training_info: TrainingInfo): + training_info['samples'] = collections.defaultdict(int) + + def on_batch_end(self, batch_info: BatchInfo, dataset: typing.Optional[str] = None) -> None: + samples = batch_info['datapoint']['x'].shape[0] + + batch_info['samples'] = samples + + if dataset is not None: + batch_info.training_info['samples'][dataset] += samples + + def write_state_dict(self, training_info: TrainingInfo, hidden_state_dict: dict): + hidden_state_dict['sample_tracker/samples'] = training_info['samples'] + + def load_state_dict(self, training_info: TrainingInfo, hidden_state_dict: dict): + training_info['samples'] = hidden_state_dict['sample_tracker/samples'] + diff --git a/vel/callback/time_tracker.py b/vel/callback/time_tracker.py index 8a59f9d1..df280213 100644 --- a/vel/callback/time_tracker.py +++ b/vel/callback/time_tracker.py @@ -5,6 +5,7 @@ class TimeTracker(Callback): """ Track training time - in seconds """ + def __init__(self): self.start_time = None @@ -14,7 +15,7 @@ def on_initialization(self, training_info: TrainingInfo): def on_train_begin(self, training_info: TrainingInfo): self.start_time = time.time() - def on_batch_end(self, batch_info: BatchInfo): + def on_batch_end(self, batch_info: BatchInfo, dataset=None): current_time = time.time() batch_time = current_time - self.start_time self.start_time = current_time diff --git a/vel/command/train_command.py b/vel/command/train_command.py index ea7e06d5..6a65d993 100644 --- a/vel/command/train_command.py +++ b/vel/command/train_command.py @@ -4,7 +4,9 @@ import vel.data as data import vel.train as train +from vel.metric.samples_per_sec import SamplesPerSec from vel.callback.time_tracker import TimeTracker +from vel.callback.sample_tracker import SampleTracker class SimpleTrainCommand: @@ -37,7 +39,7 @@ def run(self): callbacks = self.gather_callbacks(optimizer) # Metrics to track through this training - metrics = learner.metrics() + metrics = learner.metrics() + [SamplesPerSec()] # Check if training was already started and potentially continue where we left off training_info = self.resume_training(learner, callbacks, metrics) @@ -66,7 +68,7 @@ def run(self): def gather_callbacks(self, optimizer) -> list: """ Gather all the callbacks to be used in this training run """ - callbacks = [TimeTracker()] + callbacks = [TimeTracker(), SampleTracker()] if self.scheduler_factory is not None: callbacks.append(self.scheduler_factory.instantiate(optimizer)) diff --git a/vel/data/dataflow.py b/vel/data/dataflow.py index 4c28a96d..b0731729 100644 --- a/vel/data/dataflow.py +++ b/vel/data/dataflow.py @@ -8,6 +8,10 @@ def pre_map(datapoint): """ Map datapoint from a list into the dictionary """ if isinstance(datapoint, (list, tuple)): return dict(zip("xyzw", datapoint)) + + if 'x' in datapoint: + datapoint['size'] = datapoint['x'].shape[0] + return datapoint diff --git a/vel/metric/__init__.py b/vel/metric/__init__.py index 37708fa3..be14e6c4 100644 --- a/vel/metric/__init__.py +++ b/vel/metric/__init__.py @@ -1,3 +1,3 @@ -from .base_metric import BaseMetric, MetricKey # noqa -from .averaging_metric import AveragingMetric, AveragingNamedMetric, AveragingSupervisedMetric # noqa -from .value_metric import ValueMetric # noqa +from .base.base_metric import BaseMetric, MetricKey # noqa +from .base.averaging_metric import AveragingMetric, AveragingNamedMetric, AveragingSupervisedMetric # noqa +from .base.value_metric import ValueMetric # noqa diff --git a/vel/metric/accuracy.py b/vel/metric/accuracy.py index 44dcdb2f..1416e61a 100644 --- a/vel/metric/accuracy.py +++ b/vel/metric/accuracy.py @@ -1,4 +1,4 @@ -from vel.metric.averaging_metric import AveragingSupervisedMetric +from vel.metric.base.averaging_metric import AveragingSupervisedMetric class Accuracy(AveragingSupervisedMetric): diff --git a/vel/metric/base/__init__.py b/vel/metric/base/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/metric/averaging_metric.py b/vel/metric/base/averaging_metric.py similarity index 100% rename from vel/metric/averaging_metric.py rename to vel/metric/base/averaging_metric.py diff --git a/vel/metric/base_metric.py b/vel/metric/base/base_metric.py similarity index 100% rename from vel/metric/base_metric.py rename to vel/metric/base/base_metric.py diff --git a/vel/metric/summing_metric.py b/vel/metric/base/summing_metric.py similarity index 100% rename from vel/metric/summing_metric.py rename to vel/metric/base/summing_metric.py diff --git a/vel/metric/value_metric.py b/vel/metric/base/value_metric.py similarity index 100% rename from vel/metric/value_metric.py rename to vel/metric/base/value_metric.py diff --git a/vel/metric/loss_metric.py b/vel/metric/loss_metric.py index f4fa9df5..b2f45ce6 100644 --- a/vel/metric/loss_metric.py +++ b/vel/metric/loss_metric.py @@ -1,4 +1,4 @@ -from vel.metric.averaging_metric import AveragingMetric +from vel.metric.base.averaging_metric import AveragingMetric class Loss(AveragingMetric): diff --git a/vel/metric/samples_per_sec.py b/vel/metric/samples_per_sec.py new file mode 100644 index 00000000..edac2291 --- /dev/null +++ b/vel/metric/samples_per_sec.py @@ -0,0 +1,11 @@ +from vel.metric.base.averaging_metric import AveragingMetric + + +class SamplesPerSec(AveragingMetric): + """ Just a loss function """ + def __init__(self, scope="train"): + super().__init__("samples_per_sec", scope=scope) + + def _value_function(self, batch_info): + """ Just forward a value of the loss""" + return batch_info['samples'] / batch_info['time'] diff --git a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py index c2d97e31..4faa513f 100644 --- a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py @@ -84,9 +84,9 @@ def train_epoch(self, epoch_info: EpochInfo, interactive=True): for batch_idx in iterator: batch_info = BatchInfo(epoch_info, batch_idx) - batch_info.on_batch_begin() + batch_info.on_batch_begin('train') self.train_batch(batch_info) - batch_info.on_batch_end() + batch_info.on_batch_end('train') epoch_info.result_accumulator.freeze_results() epoch_info.on_epoch_end() diff --git a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py index c2ec6fe8..dbef9bd2 100644 --- a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py @@ -82,9 +82,9 @@ def train_epoch(self, epoch_info: EpochInfo, interactive=True) -> None: for batch_idx in iterator: batch_info = BatchInfo(epoch_info, batch_idx) - batch_info.on_batch_begin() + batch_info.on_batch_begin('train') self.train_batch(batch_info) - batch_info.on_batch_end() + batch_info.on_batch_end('train') epoch_info.result_accumulator.freeze_results() epoch_info.on_epoch_end() diff --git a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py index d889c681..d9ff7ab9 100644 --- a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py @@ -86,9 +86,9 @@ def train_epoch(self, epoch_info: EpochInfo, interactive=True) -> None: for batch_idx in iterator: batch_info = BatchInfo(epoch_info, batch_idx) - batch_info.on_batch_begin() + batch_info.on_batch_begin('train') self.train_batch(batch_info) - batch_info.on_batch_end() + batch_info.on_batch_end('train') epoch_info.result_accumulator.freeze_results() epoch_info.on_epoch_end() diff --git a/vel/storage/streaming/visdom.py b/vel/storage/streaming/visdom.py index a32bf83b..c861afe0 100644 --- a/vel/storage/streaming/visdom.py +++ b/vel/storage/streaming/visdom.py @@ -29,7 +29,7 @@ def on_epoch_end(self, epoch_info): first_epoch=epoch_info.global_epoch_idx == 1 ) - def on_batch_end(self, batch_info): + def on_batch_end(self, batch_info, dataset=None): """ Stream LR to visdom """ if self.settings.stream_lr: iteration_idx = ( diff --git a/vel/train/trainer.py b/vel/train/trainer.py index 44288497..89284147 100644 --- a/vel/train/trainer.py +++ b/vel/train/trainer.py @@ -69,12 +69,13 @@ def train_epoch(self, epoch_info, loader: Loader, interactive=True): else: iterator = loader['train'] - for batch_idx, data in enumerate(iterator): + for batch_idx, datapoint in enumerate(iterator): batch_info = BatchInfo(epoch_info, batch_idx) + batch_info['datapoint'] = datapoint - batch_info.on_batch_begin() - self.train_batch(batch_info, data) - batch_info.on_batch_end() + batch_info.on_batch_begin('train') + self.train_batch(batch_info, datapoint) + batch_info.on_batch_end('train') iterator.set_postfix(loss=epoch_info.result_accumulator.intermediate_value('loss')) @@ -88,16 +89,18 @@ def validation_epoch(self, epoch_info, loader: Loader, interactive=True): iterator = loader['val'] with torch.no_grad(): - for batch_idx, data in enumerate(iterator): + for batch_idx, datapoint in enumerate(iterator): batch_info = BatchInfo(epoch_info, batch_idx) + batch_info['datapoint'] = datapoint - batch_info.on_validation_batch_begin() - self.feed_batch(batch_info, data) - batch_info.on_validation_batch_end() + batch_info.on_batch_begin('val') + self.feed_batch(batch_info, datapoint) + batch_info.on_batch_end('val') def feed_batch(self, batch_info, data): """ Run single batch of data """ data = to_device(data, self.device) # Move a data batch into the right device + metrics = self.model.calculate_gradient(data) batch_info.update(metrics) From ea25504ddbf28ddd8880607f102b3092cdf8f4ec Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Mon, 24 Jun 2019 22:06:27 -0700 Subject: [PATCH 061/162] Fixing augmentations of cats vs dogs training. --- .../cifar10/cifar10_cnn_01.yaml | 4 +- .../cats_vs_dogs_resnet34.yaml | 75 ++++------ vel/api/source.py | 130 ------------------ vel/api/transformation.py | 3 + vel/command/augvis_command.py | 2 +- vel/data/augmentation/center_crop.py | 17 +-- vel/data/augmentation/random_lighting.py | 12 +- vel/data/augmentation/random_rotate.py | 17 +-- vel/data/augmentation/random_scale.py | 18 +-- vel/data/augmentation/scale_min_size.py | 17 +-- vel/data/source/img_dir_source.py | 11 +- vel/data/transformation/normalize.py | 25 +++- 12 files changed, 100 insertions(+), 231 deletions(-) diff --git a/examples-configs/classification/cifar10/cifar10_cnn_01.yaml b/examples-configs/classification/cifar10/cifar10_cnn_01.yaml index 8ce60349..14f3c83f 100644 --- a/examples-configs/classification/cifar10/cifar10_cnn_01.yaml +++ b/examples-configs/classification/cifar10/cifar10_cnn_01.yaml @@ -22,13 +22,13 @@ loader: - name: vel.data.transformation.to_array - name: vel.data.transformation.normalize - name: vel.data.augmentation.random_crop - tags: ["train"] + tags: train width: 32 height: 32 padding: 4 padding_mode: 'reflect' - name: vel.data.augmentation.random_horizontal_flip - tags: ["train"] + tags: train - name: vel.data.transformation.image_to_tensor diff --git a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml b/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml index 41860c8b..6aaffc16 100644 --- a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml +++ b/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml @@ -2,74 +2,61 @@ name: cats_vs_dogs_resnet34 model: - name: vel.models.imagenet.resnet34 + name: vel.model.imagenet.resnet34 fc_layers: [512, 2] dropout: [0.25, 0.5] pretrained: true source: - name: vel.sources.img_dir_source + name: vel.data.source.img_dir_source url: http://files.fast.ai/data/dogscats.zip extract_parent: true path: data/dogscats - num_workers: 8 - batch_size: 64 -# tta: -# name: vel.augmentations.tta.train_tta -# n_augmentations: 4 - augmentations: - - name: vel.augmentations.to_array - mode: x - tags: ["train", "val"] +loader: + name: vel.data.loader + num_workers: 8 + batch_size: 64 - - name: vel.augmentations.random_scale - mode: x - tags: ["train"] + transformations: + - name: vel.data.transformation.to_array + - name: vel.data.augmentation.random_scale + tags: train size: 224 max_zoom: 1.1 - - name: vel.augmentations.random_rotate - mode: x - tags: ["train"] + - name: vel.data.augmentation.random_rotate + tags: train deg: 10.0 - - name: vel.augmentations.random_crop - mode: x - tags: ["train"] + - name: vel.data.augmentation.random_crop + tags: train width: 224 height: 224 - - name: vel.augmentations.random_lighting - mode: x - tags: ["train"] + - name: vel.data.augmentation.random_lighting + tags: train b: 0.05 c: 0.05 - - name: vel.augmentations.random_horizontal_flip - mode: x - tags: ["train"] - - - name: vel.augmentations.scale_min_size - mode: x - tags: ["val"] + - name: vel.data.augmentation.random_horizontal_flip + tags: train + - name: vel.data.augmentation.scale_min_size + tags: val size: 224 - - name: vel.augmentations.center_crop - mode: x - tags: ["val"] + - name: vel.data.augmentation.center_crop + tags: val size: 224 - - name: vel.augmentations.normalize - mode: x + - name: vel.data.transformation.normalize tags: ["train", "val"] mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] - - name: vel.augmentations.to_tensor - mode: x + - name: vel.data.transformation.image_to_tensor tags: ["train", "val"] optimizer: - name: vel.optimizers.sgd + name: vel.optimizer.sgd lr: 0.01 weight_decay: 0.0 momentum: 0.9 @@ -78,8 +65,7 @@ optimizer: commands: train: - name: vel.commands.phase_train_command - restart: false + name: vel.command.phase_train_command phases: - name: vel.phase.freeze - name: vel.phase.cycle @@ -103,15 +89,14 @@ commands: cycle_mult: 2 simple_train: - name: vel.commands.train_command - restart: false + name: vel.command.train_command epochs: 3 summary: - name: vel.commands.summary_command + name: vel.command.summary_command lr_find: - name: vel.commands.lr_find_command + name: vel.command.lr_find_command metric: 'loss' start_lr: 1.0e-5 end_lr: 10.0 @@ -120,6 +105,6 @@ commands: freeze: true augvis: - name: vel.commands.augvis_command + name: vel.command.augvis_command cases: 3 samples: 4 diff --git a/vel/api/source.py b/vel/api/source.py index c6209c3a..fb02e7e6 100644 --- a/vel/api/source.py +++ b/vel/api/source.py @@ -17,133 +17,3 @@ def __init__(self, train: data.Dataset, validation: data.Dataset, self.test = test self.metadata = {} if metadata is None else metadata - -# @property -# def train_loader(self): -# """ PyTorch loader of training data """ -# raise NotImplementedError -# -# @property -# def val_loader(self): -# """ PyTorch loader of validation data """ -# raise NotImplementedError -# -# @property -# def train_dataset(self): -# """ Return the training dataset """ -# raise NotImplementedError -# -# @property -# def val_dataset(self): -# """ Return the validation dataset """ -# raise NotImplementedError -# -# @property -# def train_iterations_per_epoch(self): -# """ Return number of iterations per epoch """ -# raise NotImplementedError -# -# @property -# def val_iterations_per_epoch(self): -# """ Return number of iterations per epoch - validation """ -# raise NotImplementedError -# -# -# class SupervisedTextData(Source): -# """ An NLP torchtext data source """ -# def __init__(self, train_source, val_source, train_iterator, val_iterator, data_field, target_field): -# super().__init__() -# -# self.train_source = train_source -# self.val_source = val_source -# self.train_iterator = train_iterator -# self.val_iterator = val_iterator -# self.data_field = data_field -# self.target_field = target_field -# -# @property -# def train_loader(self): -# """ PyTorch loader of training data """ -# return self.train_iterator -# -# @property -# def val_loader(self): -# """ PyTorch loader of validation data """ -# return self.val_iterator -# -# @property -# def train_dataset(self): -# """ Return the training dataset """ -# return self.train_source -# -# @property -# def val_dataset(self): -# """ Return the validation dataset """ -# return self.val_source -# -# @property -# def train_iterations_per_epoch(self): -# """ Return number of iterations per epoch """ -# return len(self.train_iterator) -# -# @property -# def val_iterations_per_epoch(self): -# """ Return number of iterations per epoch - validation """ -# return len(self.val_iterator) -# -# -# class SupervisedTrainingData(Source): -# """ Most common source of data combining a basic datasource and sampler """ -# def __init__(self, train_source, val_source, num_workers, batch_size, augmentations=None): -# -# super().__init__() -# -# self.train_source = train_source -# self.val_source = val_source -# -# self.num_workers = num_workers -# self.batch_size = batch_size -# -# self.augmentations = augmentations -# -# # Derived values -# self.train_ds = DataFlow(self.train_source, augmentations, tag='train') -# self.val_ds = DataFlow(self.val_source, augmentations, tag='val') -# -# self._train_loader = data.DataLoader( -# self.train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers -# ) -# -# self._val_loader = data.DataLoader( -# self.val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers -# ) -# -# @property -# def train_loader(self): -# """ PyTorch loader of training data """ -# return self._train_loader -# -# @property -# def val_loader(self): -# """ PyTorch loader of validation data """ -# return self._val_loader -# -# @property -# def train_dataset(self): -# """ Return the training dataset """ -# return self.train_ds -# -# @property -# def val_dataset(self): -# """ Return the validation dataset """ -# return self.val_ds -# -# @property -# def train_iterations_per_epoch(self): -# """ Return number of iterations per epoch """ -# return len(self._train_loader) -# -# @property -# def val_iterations_per_epoch(self): -# """ Return number of iterations per epoch - validation """ -# return len(self._val_loader) diff --git a/vel/api/transformation.py b/vel/api/transformation.py index 9f84e9de..2bf960bf 100644 --- a/vel/api/transformation.py +++ b/vel/api/transformation.py @@ -3,6 +3,9 @@ class Transformation: def __init__(self, tags=None): self.tags = ['train', 'val', 'test'] if tags is None else tags + if isinstance(self.tags, str): + self.tags = [self.tags] + def initialize(self, source): """ Initialize transformation from source """ pass diff --git a/vel/command/augvis_command.py b/vel/command/augvis_command.py index edfa4906..b4a74fcd 100644 --- a/vel/command/augvis_command.py +++ b/vel/command/augvis_command.py @@ -29,7 +29,7 @@ def run(self): for j in range(self.samples): augmented_datapoint = dataset[selected_sample[i]] denormalized_datapoint = dataset.denormalize(augmented_datapoint) - ax[i, j+1].imshow(denormalized_datapoint['x']) + ax[i, j+1].imshow(np.clip(denormalized_datapoint['x'], 0.0, 1.0)) plt.show() diff --git a/vel/data/augmentation/center_crop.py b/vel/data/augmentation/center_crop.py index 21cf33bd..8bead415 100644 --- a/vel/data/augmentation/center_crop.py +++ b/vel/data/augmentation/center_crop.py @@ -3,10 +3,11 @@ https://github.com/fastai/fastai/blob/master/fastai/transforms.py """ -import vel.data as data +import vel.api as api +import vel.data.operation.image_op as op -class CenterCrop(data.Augmentation): +class CenterCrop(api.ScopedTransformation): """ A class that represents a Center Crop. This transforms (optionally) transforms x,y at with the same parameters. @@ -17,14 +18,14 @@ class CenterCrop(data.Augmentation): tfm_y : TfmType type of y transformation. """ - def __init__(self, size, mode='x', tags=None): - super().__init__(mode, tags) + def __init__(self, size, scope='x', tags=None): + super().__init__(scope, tags) self.size = size - def __call__(self, x): - return data.center_crop(x, self.size) + def transform(self, x): + return op.center_crop(x, self.size) -def create(size, mode='x', tags=None): - return CenterCrop(size, mode, tags) +def create(size, scope='x', tags=None): + return CenterCrop(size, scope, tags) diff --git a/vel/data/augmentation/random_lighting.py b/vel/data/augmentation/random_lighting.py index 9c51d9ae..8331d75e 100644 --- a/vel/data/augmentation/random_lighting.py +++ b/vel/data/augmentation/random_lighting.py @@ -1,26 +1,26 @@ import random import vel.api as api -import vel.data as data +import vel.data.operation.image_op as op -class RandomLighting(api.Transformation): +class RandomLighting(api.ScopedTransformation): """ Apply a horizontal flip randomly to input images """ def __init__(self, b, c, mode='x', tags=None): super().__init__(mode, tags) self.b, self.c = b, c - def __call__(self, img): + def transform(self, img): """ Adjust lighting """ rand_b = random.uniform(-self.b, self.b) rand_c = random.uniform(-self.c, self.c) rand_c = -1/(rand_c-1) if rand_c < 0 else rand_c+1 - return data.lighting(img, rand_b, rand_c) + return op.lighting(img, rand_b, rand_c) def __repr__(self): return self.__class__.__name__ + '(b={}, c={})'.format(self.b, self.c) -def create(b, c, mode='x', tags=None): - return RandomLighting(b, c, mode, tags) +def create(b, c, scope='x', tags=None): + return RandomLighting(b, c, scope, tags) diff --git a/vel/data/augmentation/random_rotate.py b/vel/data/augmentation/random_rotate.py index c2c02246..65ae8a04 100644 --- a/vel/data/augmentation/random_rotate.py +++ b/vel/data/augmentation/random_rotate.py @@ -5,25 +5,26 @@ import cv2 import random -import vel.data as data +import vel.api as api +import vel.data.operation.image_op as op -class RandomRotate(data.Augmentation): +class RandomRotate(api.ScopedTransformation): """ Rotate image randomly by an angle between (-deg, +deg) """ - def __init__(self, deg, p=0.75, mode='x', tags=None): - super().__init__(mode, tags) + def __init__(self, deg, p=0.75, scope='x', tags=None): + super().__init__(scope, tags) self.deg = deg self.p = p - def __call__(self, x_data): + def transform(self, x_data): if random.random() < self.p: random_degree = random.uniform(-self.deg, self.deg) - return data.rotate_img(x_data, random_degree, mode=cv2.BORDER_REFLECT) + return op.rotate_img(x_data, random_degree, mode=cv2.BORDER_REFLECT) else: # No, don't do it return x_data -def create(deg, p=0.75, mode='x', tags=None): +def create(deg, p=0.75, scope='x', tags=None): """ Vel factory function """ - return RandomRotate(deg, p, mode, tags) + return RandomRotate(deg, p, scope, tags) diff --git a/vel/data/augmentation/random_scale.py b/vel/data/augmentation/random_scale.py index 882a3eb6..694cdf2f 100644 --- a/vel/data/augmentation/random_scale.py +++ b/vel/data/augmentation/random_scale.py @@ -6,18 +6,19 @@ import collections.abc as abc import random -import vel.data as data +import vel.api as api +import vel.data.operation.image_op as op -class RandomScale(data.Augmentation): +class RandomScale(api.ScopedTransformation): """ Scales the image so that the smallest axis is of 'size' times a random number between 1.0 and max_zoom. """ - def __init__(self, size, max_zoom, p=0.75, mode='x', tags=None): - super().__init__(mode, tags) + def __init__(self, size, max_zoom, p=0.75, scope='x', tags=None): + super().__init__(scope, tags) self.size = size self.max_zoom = max_zoom self.p = p - def __call__(self, x_data): + def transform(self, x_data): if random.random() < self.p: # Yes, do it min_z = 1. @@ -30,8 +31,9 @@ def __call__(self, x_data): # No, don't do it mult = 1.0 - return data.scale_min(x_data, int(self.size * mult), cv2.INTER_AREA) + return op.scale_min(x_data, int(self.size * mult), cv2.INTER_AREA) -def create(size, max_zoom, p=0.75, mode='x', tags=None): - return RandomScale(size, max_zoom, p, mode, tags) +def create(size, max_zoom, p=0.75, scope='x', tags=None): + """ Vel factory function """ + return RandomScale(size, max_zoom, p, scope, tags) diff --git a/vel/data/augmentation/scale_min_size.py b/vel/data/augmentation/scale_min_size.py index c1ebfa5d..d6ecd06b 100644 --- a/vel/data/augmentation/scale_min_size.py +++ b/vel/data/augmentation/scale_min_size.py @@ -4,19 +4,20 @@ """ import PIL.Image as Image -import vel.data as data +import vel.api as api +import vel.data.operation.image_op as op -class ScaleMinSize(data.Augmentation): +class ScaleMinSize(api.ScopedTransformation): """ Scales the image so that the smallest axis is of 'size'. """ - def __init__(self, size, mode='x', tags=None): - super().__init__(mode, tags) + def __init__(self, size, scope='x', tags=None): + super().__init__(scope, tags) self.size = size - def __call__(self, x_data): - return data.scale_min(x_data, self.size, Image.BILINEAR) + def transform(self, x_data): + return op.scale_min(x_data, self.size, Image.BILINEAR) -def create(size, mode='x', tags=None): +def create(size, scope='x', tags=None): """ Vel factory function """ - return ScaleMinSize(size, mode, tags) + return ScaleMinSize(size, scope, tags) diff --git a/vel/data/source/img_dir_source.py b/vel/data/source/img_dir_source.py index bbca4ad6..45b4ec2b 100644 --- a/vel/data/source/img_dir_source.py +++ b/vel/data/source/img_dir_source.py @@ -4,7 +4,7 @@ import torchvision.datasets as ds import torchvision.datasets.utils as ds_util -from vel.api import SupervisedTrainingData +from vel.api import Source class ImageDirSource(ds.ImageFolder): @@ -12,8 +12,7 @@ class ImageDirSource(ds.ImageFolder): pass -def create(model_config, path, num_workers, batch_size, augmentations=None, tta=None, url=None, - extract_parent=False): +def create(model_config, path, url=None, extract_parent=False): """ Create an ImageDirSource with supplied arguments """ if not os.path.isabs(path): path = model_config.project_top_dir(path) @@ -43,11 +42,7 @@ def create(model_config, path, num_workers, batch_size, augmentations=None, tta= train_ds = ImageDirSource(train_path) val_ds = ImageDirSource(valid_path) - return SupervisedTrainingData( + return Source( train_ds, val_ds, - num_workers=num_workers, - batch_size=batch_size, - augmentations=augmentations, - # test_time_augmentation=tta ) diff --git a/vel/data/transformation/normalize.py b/vel/data/transformation/normalize.py index ccfa5064..a29ad225 100644 --- a/vel/data/transformation/normalize.py +++ b/vel/data/transformation/normalize.py @@ -1,19 +1,30 @@ +import numpy as np + import vel.api as api class Normalize(api.ScopedTransformation): """ Normalize input mean and standard deviation """ - def __init__(self, scope='x', tags=None): + def __init__(self, mean=None, std=None, scope='x', tags=None): super().__init__(scope, tags) - self.mean = None - self.std = None + self.mean = mean + self.std = std + + if self.mean is not None: + self.mean = np.asarray(self.mean) + + if self.std is not None: + self.std = np.asarray(self.std) def initialize(self, source): """ Initialize transformation from source """ - self.mean = source.metadata['train_mean'] - self.std = source.metadata['train_std'] + if self.mean is None: + self.mean = source.metadata['train_mean'] + + if self.std is None: + self.std = source.metadata['train_std'] def transform(self, value): return (value - self.mean) / self.std @@ -23,6 +34,6 @@ def denormalization_transform(self, value): return value * self.std + self.mean -def create(mode='x', tags=None): +def create(mean=None, std=None, mode='x', tags=None): """ Vel factory function """ - return Normalize(scope=mode, tags=tags) + return Normalize(mean=mean, std=std, scope=mode, tags=tags) From 5061e91ea7295445ad9819c7aaddbb3fe6796094 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Mon, 24 Jun 2019 22:16:23 -0700 Subject: [PATCH 062/162] Fixing lr find command. --- vel/command/lr_find_command.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/vel/command/lr_find_command.py b/vel/command/lr_find_command.py index 32544c5d..ff927a52 100644 --- a/vel/command/lr_find_command.py +++ b/vel/command/lr_find_command.py @@ -8,8 +8,9 @@ import vel.util.interpolate as interp -from vel.api import Learner, TrainingInfo, EpochInfo, BatchInfo -from vel.metric.averaging_metric import AveragingNamedMetric +from vel.api import TrainingInfo, EpochInfo, BatchInfo +from vel.metric.base.averaging_metric import AveragingNamedMetric +from vel.train import Trainer class LrFindCommand: @@ -45,11 +46,11 @@ class LrFindCommand: http://arxiv.org/abs/1506.01186 """ - def __init__(self, model_config, model, source, optimizer_factory, start_lr=1e-5, end_lr=10, num_it=100, + def __init__(self, model_config, model, loader, optimizer_factory, start_lr=1e-5, end_lr=10, num_it=100, interpolation='logscale', freeze=False, stop_dv=True, divergence_threshold=4.0, metric='loss'): # Mandatory pieces self.model = model - self.source = source + self.loader = loader self.optimizer_factory = optimizer_factory self.model_config = model_config # Settings @@ -65,7 +66,7 @@ def __init__(self, model_config, model, source, optimizer_factory, start_lr=1e-5 def run(self): """ Run the command with supplied configuration """ device = self.model_config.torch_device() - learner = Learner(device, self.model.instantiate()) + learner = Trainer(device, self.model.instantiate()) lr_schedule = interp.interpolate_series(self.start_lr, self.end_lr, self.num_it, self.interpolation) @@ -75,7 +76,7 @@ def run(self): # Optimizer shoudl be created after freeze optimizer = self.optimizer_factory.instantiate(learner.model) - iterator = iter(self.source.train_loader) + iterator = iter(self.loader['train']) # Metrics to track through this training metrics = learner.metrics() + [AveragingNamedMetric("lr")] @@ -99,12 +100,12 @@ def run(self): param_group['lr'] = lr try: - data, target = next(iterator) + datapoint = next(iterator) except StopIteration: - iterator = iter(self.source.train_loader) - data, target = next(iterator) + iterator = iter(self.loader['train']) + datapoint = next(iterator) - learner.train_batch(batch_info, data, target) + learner.train_batch(batch_info, datapoint) batch_info['lr'] = lr @@ -149,13 +150,13 @@ def run(self): plt.show() -def create(model_config, model, source, optimizer, start_lr=1e-5, end_lr=10, iterations=100, freeze=False, +def create(model_config, model, loader, optimizer, start_lr=1e-5, end_lr=10, iterations=100, freeze=False, interpolation='logscale', stop_dv=True, divergence_threshold=4.0, metric='loss'): """ Vel factory function """ return LrFindCommand( model_config=model_config, model=model, - source=source, + loader=loader, optimizer_factory=optimizer, start_lr=start_lr, end_lr=end_lr, From 2e4ede029488cab5f2450a45d7eebd291f3da715 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Tue, 25 Jun 2019 21:37:40 -0700 Subject: [PATCH 063/162] Fixing cats vs dogs transfer learning example. --- .../cats_vs_dogs_resnet34.yaml | 8 ++--- vel/command/phase_train_command.py | 31 ++++++++++++------- vel/train/__init__.py | 1 + vel/train/phase/cycle.py | 16 +++++----- vel/train/phase/freeze.py | 6 ++-- vel/train/phase/generic.py | 14 +++++---- vel/train/phase/unfreeze.py | 6 ++-- vel/train/train_phase.py | 8 +++-- 8 files changed, 52 insertions(+), 38 deletions(-) diff --git a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml b/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml index 6aaffc16..d764ce9e 100644 --- a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml +++ b/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml @@ -67,8 +67,8 @@ commands: train: name: vel.command.phase_train_command phases: - - name: vel.phase.freeze - - name: vel.phase.cycle + - name: vel.train.phase.freeze + - name: vel.train.phase.cycle init_lr: 0.001 init_iter: 20 max_lr: 0.01 @@ -76,8 +76,8 @@ commands: interpolate: 'cosine' cycles: 3 cycle_len: 1 - - name: vel.phase.unfreeze - - name: vel.phase.cycle + - name: vel.train.phase.unfreeze + - name: vel.train.phase.cycle init_lr: 0.001 init_iter: 20 diff --git a/vel/command/phase_train_command.py b/vel/command/phase_train_command.py index e80768b2..b80a1b7a 100644 --- a/vel/command/phase_train_command.py +++ b/vel/command/phase_train_command.py @@ -2,17 +2,24 @@ import bisect import typing -from vel.api import Learner, TrainingInfo, ModelConfig, TrainPhase +import vel.api as api +import vel.data as data +import vel.train as train + +from vel.metric.samples_per_sec import SamplesPerSec +from vel.callback.time_tracker import TimeTracker +from vel.callback.sample_tracker import SampleTracker class PhaseTrainCommand: """ Training command - learn according to a set of phases """ - def __init__(self, model_config: ModelConfig, model_factory, source, storage, phases: typing.List[TrainPhase], + def __init__(self, model_config: api.ModelConfig, model_factory: api.ModelFactory, loader: data.Loader, + storage: api.Storage, phases: typing.List[train.TrainPhase], callbacks=None, restart=True): self.model_config = model_config self.model_factory = model_factory - self.source = source + self.loader = loader self.storage = storage self.phases = phases self.ladder = self._build_phase_ladder(phases) @@ -49,13 +56,13 @@ def _select_phase_right_bound(self, epoch_number): def run(self): """ Run the command with supplied configuration """ device = self.model_config.torch_device() - learner = Learner(device, self.model_factory.instantiate()) + learner = train.Trainer(device, self.model_factory.instantiate()) # All callbacks useful for learning callbacks = self.gather_callbacks() # Metrics to track through this training - metrics = learner.metrics() + metrics = learner.metrics() + [SamplesPerSec()] # Check if training was already started and potentially continue where we left off training_info, hidden_state = self.resume_training(learner, callbacks, metrics) @@ -65,7 +72,7 @@ def run(self): current_phase = self.phases[current_phase_idx] local_idx = training_info.start_epoch_idx - self.ladder[current_phase_idx] - current_phase.set_up_phase(training_info, learner.model, self.source) + current_phase.set_up_phase(training_info, learner.model, self.loader) print(current_phase.banner()) if training_info.start_epoch_idx > 0: @@ -84,7 +91,7 @@ def run(self): current_phase_idx += 1 current_phase = self.phases[current_phase_idx] - current_phase.set_up_phase(training_info, learner.model, self.source) + current_phase.set_up_phase(training_info, learner.model, self.loader) print(current_phase.banner()) # Create epoch info @@ -106,21 +113,21 @@ def run(self): def gather_callbacks(self) -> list: """ Gather all the callbacks to be used in this training run """ - callbacks = [] + callbacks = [TimeTracker(), SampleTracker()] callbacks.extend(self.callbacks) callbacks.extend(self.storage.streaming_callbacks()) return callbacks - def resume_training(self, learner, callbacks, metrics) -> (TrainingInfo, dict): + def resume_training(self, learner, callbacks, metrics) -> (api.TrainingInfo, dict): """ Possibly resume training from a saved state from the storage """ if self.model_config.continue_training: start_epoch = self.storage.last_epoch_idx() else: start_epoch = 0 - training_info = TrainingInfo( + training_info = api.TrainingInfo( start_epoch_idx=start_epoch, run_name=self.model_config.run_name, metrics=metrics, @@ -139,12 +146,12 @@ def resume_training(self, learner, callbacks, metrics) -> (TrainingInfo, dict): return training_info, hidden_state -def create(model_config, model, source, storage, phases, callbacks=None, restart=True): +def create(model_config, model, loader, storage, phases, callbacks=None, restart=True): """ Vel factory function """ return PhaseTrainCommand( model_config=model_config, model_factory=model, - source=source, + loader=loader, storage=storage, phases=phases, callbacks=callbacks, diff --git a/vel/train/__init__.py b/vel/train/__init__.py index 260e4c8d..482d91cd 100644 --- a/vel/train/__init__.py +++ b/vel/train/__init__.py @@ -1 +1,2 @@ from .trainer import Trainer +from .train_phase import TrainPhase, EmptyTrainPhase diff --git a/vel/train/phase/cycle.py b/vel/train/phase/cycle.py index 9b38b8be..1f3358c0 100644 --- a/vel/train/phase/cycle.py +++ b/vel/train/phase/cycle.py @@ -1,8 +1,10 @@ +import typing import numpy as np import vel.util.interpolate as interp -from vel.api import BatchInfo, EpochInfo, TrainingInfo, Callback, TrainPhase +from vel.api import BatchInfo, EpochInfo, TrainingInfo, Callback +from vel.train import TrainPhase class CycleCallback(Callback): @@ -52,7 +54,7 @@ def _init_cycle_dict(self): return dict_arr, length_arr, start_arr - def on_batch_begin(self, batch_info: BatchInfo): + def on_batch_begin(self, batch_info: BatchInfo, dataset: typing.Optional[str] = None): """ Set proper learning rate """ cycle_length = self.cycle_lengths[batch_info.local_epoch_number - 1] cycle_start = self.cycle_starts[batch_info.local_epoch_number - 1] @@ -113,7 +115,7 @@ def __init__(self, optimizer_factory, max_lr, min_lr, cycles, cycle_len=1, cycle self.freeze = freeze self._optimizer_instance = None - self._source = None + self._loader = None self.special_callback = None @@ -121,11 +123,11 @@ def __init__(self, optimizer_factory, max_lr, min_lr, cycles, cycle_len=1, cycle def number_of_epochs(self) -> int: return self.epochs - def set_up_phase(self, training_info, model, source): + def set_up_phase(self, training_info, model, loader): """ Prepare the phase for learning """ # To parameter groups handles properly filtering parameters that don't require gradient self._optimizer_instance = self.optimizer_factory.instantiate(model) - self._source = source + self._loader = loader self.special_callback = CycleCallback( self._optimizer_instance, @@ -142,7 +144,7 @@ def epoch_info(self, training_info: TrainingInfo, global_idx: int, local_idx: in training_info=training_info, global_epoch_idx=global_idx, local_epoch_idx=local_idx, - batches_per_epoch=self._source.train_iterations_per_epoch, + batches_per_epoch=self._loader.size['train'], optimizer=self._optimizer_instance, # Add special callback for this epoch callbacks=[self.special_callback] + training_info.callbacks @@ -150,7 +152,7 @@ def epoch_info(self, training_info: TrainingInfo, global_idx: int, local_idx: in def execute_epoch(self, epoch_info, learner): """ Prepare the phase for learning """ - learner.run_epoch(epoch_info, self._source) + learner.run_epoch(epoch_info, self._loader) def create(optimizer, max_lr, min_lr, cycles, cycle_len=1, cycle_mult=1, interpolate='linear', init_lr=0, init_iter=0): diff --git a/vel/train/phase/freeze.py b/vel/train/phase/freeze.py index 576e372a..c230c762 100644 --- a/vel/train/phase/freeze.py +++ b/vel/train/phase/freeze.py @@ -1,10 +1,10 @@ -import vel.api as api +import vel.train as train -class FreezePhase(api.EmptyTrainPhase): +class FreezePhase(train.EmptyTrainPhase): """ Freeze the model """ - def set_up_phase(self, training_info, model, source): + def set_up_phase(self, training_info, model, loader): """ Freeze the model """ model.freeze() diff --git a/vel/train/phase/generic.py b/vel/train/phase/generic.py index 25c52c1e..f57e9923 100644 --- a/vel/train/phase/generic.py +++ b/vel/train/phase/generic.py @@ -1,4 +1,6 @@ -from vel.api import TrainingInfo, EpochInfo, TrainPhase, Source +from vel.api import TrainingInfo, EpochInfo +from vel.data import Loader +from vel.train import TrainPhase class GenericPhase(TrainPhase): @@ -10,16 +12,16 @@ def __init__(self, lr, epochs, optimizer_factory): self.optimizer_factory = optimizer_factory self._optimizer_instance = None - self._source = None + self._loader = None @property def number_of_epochs(self) -> int: return self.epochs - def set_up_phase(self, training_info, model, source: Source): + def set_up_phase(self, training_info, model, loader: Loader): """ Prepare the phase for learning """ self._optimizer_instance = self.optimizer_factory.instantiate(model) - self._source = source + self._loader = loader def epoch_info(self, training_info: TrainingInfo, global_idx: int, local_idx: int) -> EpochInfo: """ Create Epoch info """ @@ -27,7 +29,7 @@ def epoch_info(self, training_info: TrainingInfo, global_idx: int, local_idx: in training_info=training_info, global_epoch_idx=global_idx, local_epoch_idx=local_idx, - batches_per_epoch=self._source.train_iterations_per_epoch, + batches_per_epoch=self._loader.size['train'], optimizer=self._optimizer_instance ) @@ -36,7 +38,7 @@ def execute_epoch(self, epoch_info, learner): for param_group in epoch_info.optimizer.param_groups: param_group['lr'] = self.lr - epoch_result = learner.run_epoch(epoch_info, self._source) + epoch_result = learner.run_epoch(epoch_info, self._loader) return epoch_result diff --git a/vel/train/phase/unfreeze.py b/vel/train/phase/unfreeze.py index 48b15a0e..8fd40090 100644 --- a/vel/train/phase/unfreeze.py +++ b/vel/train/phase/unfreeze.py @@ -1,10 +1,10 @@ -import vel.api as api +import vel.train as train -class UnfreezePhase(api.EmptyTrainPhase): +class UnfreezePhase(train.EmptyTrainPhase): """ Freeze the model """ - def set_up_phase(self, training_info, model, source): + def set_up_phase(self, training_info, model, loader): """ Freeze the model """ model.unfreeze() diff --git a/vel/train/train_phase.py b/vel/train/train_phase.py index cd0f9b58..2515d4bb 100644 --- a/vel/train/train_phase.py +++ b/vel/train/train_phase.py @@ -1,7 +1,9 @@ from torch.optim import Optimizer -from vel.api import TrainingInfo, EpochInfo, Model, Source -from vel.train import Trainer +from vel.api import TrainingInfo, EpochInfo, Model +from vel.data import Loader + +from .trainer import Trainer class TrainPhase: @@ -12,7 +14,7 @@ def number_of_epochs(self) -> int: """ How many epochs does this phase take """ raise NotImplementedError - def set_up_phase(self, training_info: TrainingInfo, model: Model, source: Source) -> Optimizer: + def set_up_phase(self, training_info: TrainingInfo, model: Model, loader: Loader) -> Optimizer: """ Prepare the phase for learning, returns phase optimizer """ pass From dd3adca673664b36eea5ceefc70089fa5ea4fd8c Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Tue, 25 Jun 2019 22:32:24 -0700 Subject: [PATCH 064/162] Working on a loader for text. --- .../autoencoder/mnist/mnist_cnn_ae.yaml | 2 +- .../autoencoder/mnist/mnist_cnn_vae.yaml | 2 +- .../cifar10/cifar10_cnn_01.yaml | 2 +- .../cifar10/cifar10_resnetv1_110.yaml | 2 +- .../cifar10/cifar10_resnetv1_32.yaml | 2 +- .../cifar10/cifar10_resnetv2_110.yaml | 2 +- .../cifar10_resnetv2_164_bottleneck.yaml | 2 +- .../cifar10/cifar10_resnetv2_32.yaml | 2 +- .../cifar10/cifar10_resnext_29_c1.yaml | 2 +- .../cifar10/cifar10_resnext_29_c8.yaml | 2 +- .../cats_vs_dogs_resnet34.yaml | 2 +- .../classification/mnist/mnist_cnn_01.yaml | 2 +- notebooks/classic/mnist_cnn_01.ipynb | 158 ++++++++ .../imagenet_transfer/cats_vs_dogs.ipynb | 345 ++++++++++++++++++ vel/command/augvis_command.py | 6 +- vel/command/phase_train_command.py | 2 +- vel/command/train_command.py | 2 +- vel/data/__init__.py | 2 +- vel/data/{loader.py => dataset_loader.py} | 4 +- vel/data/source/nlp/text_url.py | 137 +------ vel/data/text_character_loader.py | 105 ++++++ vel/train/phase/generic.py | 4 +- vel/train/train_phase.py | 4 +- vel/train/trainer.py | 8 +- 24 files changed, 648 insertions(+), 153 deletions(-) create mode 100644 notebooks/classic/mnist_cnn_01.ipynb create mode 100644 notebooks/imagenet_transfer/cats_vs_dogs.ipynb rename vel/data/{loader.py => dataset_loader.py} (97%) create mode 100644 vel/data/text_character_loader.py diff --git a/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml b/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml index 690a7f8c..d4ce36c7 100644 --- a/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml +++ b/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml @@ -15,7 +15,7 @@ source: loader: - name: vel.data.loader + name: vel.data.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml b/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml index 74619b28..043edd22 100644 --- a/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml +++ b/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml @@ -14,7 +14,7 @@ source: name: vel.data.source.vision.mnist loader: - name: vel.data.loader + name: vel.data.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_cnn_01.yaml b/examples-configs/classification/cifar10/cifar10_cnn_01.yaml index 14f3c83f..e6292546 100644 --- a/examples-configs/classification/cifar10/cifar10_cnn_01.yaml +++ b/examples-configs/classification/cifar10/cifar10_cnn_01.yaml @@ -14,7 +14,7 @@ source: loader: - name: vel.data.loader + name: vel.data.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv1_110.yaml b/examples-configs/classification/cifar10/cifar10_resnetv1_110.yaml index fc01f757..3ce7feb8 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv1_110.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv1_110.yaml @@ -15,7 +15,7 @@ source: loader: - name: vel.data.loader + name: vel.data.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv1_32.yaml b/examples-configs/classification/cifar10/cifar10_resnetv1_32.yaml index f04e40fc..935b8277 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv1_32.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv1_32.yaml @@ -15,7 +15,7 @@ source: loader: - name: vel.data.loader + name: vel.data.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv2_110.yaml b/examples-configs/classification/cifar10/cifar10_resnetv2_110.yaml index 9291ee8d..f0bd7291 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv2_110.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv2_110.yaml @@ -14,7 +14,7 @@ source: loader: - name: vel.data.loader + name: vel.data.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml b/examples-configs/classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml index 86d5dbaa..a7ff1491 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml @@ -16,7 +16,7 @@ source: loader: - name: vel.data.loader + name: vel.data.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv2_32.yaml b/examples-configs/classification/cifar10/cifar10_resnetv2_32.yaml index 7f38b3fb..60ebf5ad 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv2_32.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv2_32.yaml @@ -14,7 +14,7 @@ source: loader: - name: vel.data.loader + name: vel.data.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_resnext_29_c1.yaml b/examples-configs/classification/cifar10/cifar10_resnext_29_c1.yaml index eabd968a..c007c5fe 100644 --- a/examples-configs/classification/cifar10/cifar10_resnext_29_c1.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnext_29_c1.yaml @@ -19,7 +19,7 @@ source: loader: - name: vel.data.loader + name: vel.data.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_resnext_29_c8.yaml b/examples-configs/classification/cifar10/cifar10_resnext_29_c8.yaml index 9e41c5aa..6e90611d 100644 --- a/examples-configs/classification/cifar10/cifar10_resnext_29_c8.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnext_29_c8.yaml @@ -19,7 +19,7 @@ source: loader: - name: vel.data.loader + name: vel.data.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml b/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml index d764ce9e..1b2ab425 100644 --- a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml +++ b/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml @@ -16,7 +16,7 @@ source: loader: - name: vel.data.loader + name: vel.data.dataset_loader num_workers: 8 batch_size: 64 diff --git a/examples-configs/classification/mnist/mnist_cnn_01.yaml b/examples-configs/classification/mnist/mnist_cnn_01.yaml index e58d5dd9..aaa96cef 100644 --- a/examples-configs/classification/mnist/mnist_cnn_01.yaml +++ b/examples-configs/classification/mnist/mnist_cnn_01.yaml @@ -14,7 +14,7 @@ source: loader: - name: vel.data.loader + name: vel.data.dataset_loader batch_size: 128 num_workers: 4 diff --git a/notebooks/classic/mnist_cnn_01.ipynb b/notebooks/classic/mnist_cnn_01.ipynb new file mode 100644 index 00000000..164dd088 --- /dev/null +++ b/notebooks/classic/mnist_cnn_01.ipynb @@ -0,0 +1,158 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Put these at the top of every notebook, to get automatic reloading and inline plotting\n", + "%reload_ext autoreload\n", + "%autoreload 2\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Adding /mnt/fast-data/repos/waterboy to the PYTHONPATH\n" + ] + } + ], + "source": [ + "# Initialize pythonpath to include waterboy. I don't know the way how to walk around this in other way\n", + "import os.path\n", + "import sys\n", + "\n", + "path = os.path.abspath('.')\n", + "\n", + "while True:\n", + " \n", + " if os.path.basename(path) == 'waterboy':\n", + " if path not in sys.path:\n", + " print(f'Adding {path} to the PYTHONPATH')\n", + " sys.path.append(path)\n", + " break\n", + " else:\n", + " up_path = os.path.realpath(os.path.join(path, '..'))\n", + " if path == up_path:\n", + " break\n", + " else:\n", + " path = up_path " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import waterboy.notebook as nb" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import importlib\n", + "importlib.reload(nb)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "model_config = nb.load(\"../../examples/classification/mnist/mnist_cnn_01.yaml\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import waterboy.internals.model_config\n", + "importlib.reload(waterboy.internals.model_config)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "waterboy", + "language": "python", + "name": "waterboy" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/imagenet_transfer/cats_vs_dogs.ipynb b/notebooks/imagenet_transfer/cats_vs_dogs.ipynb new file mode 100644 index 00000000..a8d657d3 --- /dev/null +++ b/notebooks/imagenet_transfer/cats_vs_dogs.ipynb @@ -0,0 +1,345 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Put these at the top of every notebook, to get automatic reloading and inline plotting\n", + "%reload_ext autoreload\n", + "%autoreload 2\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Adding /mnt/fast-data/repos/waterboy to the PYTHONPATH\n" + ] + } + ], + "source": [ + "# Initialize pythonpath to include waterboy. I don't know the way how to walk around this in other way\n", + "import os.path\n", + "import sys\n", + "\n", + "path = os.path.abspath('.')\n", + "\n", + "while True:\n", + " \n", + " if os.path.basename(path) == 'waterboy':\n", + " if path not in sys.path:\n", + " print(f'Adding {path} to the PYTHONPATH')\n", + " sys.path.append(path)\n", + " break\n", + " else:\n", + " up_path = os.path.realpath(os.path.join(path, '..'))\n", + " if path == up_path:\n", + " break\n", + " else:\n", + " path = up_path " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import torch\n", + "import waterboy.notebook as nb" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "model_config = nb.load(\"../../examples/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml\", device='cuda:1')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "Pytorch version: 0.4.0 cuda version 9.1.85 cudnn version 7102\n", + "Running model cats_vs_dogs_resnet34, run 0 -- command notebook -- device cuda:1\n", + "CUDA Device name GeForce GTX 1080 Ti\n", + "2018/06/10 - 12:19:38\n", + "================================================================================\n" + ] + } + ], + "source": [ + "model_config.banner(\"notebook\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true, + "scrolled": true + }, + "outputs": [], + "source": [ + "model = model_config.provide(\"model\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "model.model.load_state_dict(torch.load(\"/tmp/weight.pt\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----------------------------------------------------------------\n", + " Layer (type) Output Shape Param #\n", + "================================================================\n", + " Conv2d-1 [-1, 64, 112, 112] 9,408\n", + " BatchNorm2d-2 [-1, 64, 112, 112] 128\n", + " ReLU-3 [-1, 64, 112, 112] 0\n", + " MaxPool2d-4 [-1, 64, 56, 56] 0\n", + " Conv2d-5 [-1, 64, 56, 56] 36,864\n", + " BatchNorm2d-6 [-1, 64, 56, 56] 128\n", + " ReLU-7 [-1, 64, 56, 56] 0\n", + " Conv2d-8 [-1, 64, 56, 56] 36,864\n", + " BatchNorm2d-9 [-1, 64, 56, 56] 128\n", + " ReLU-10 [-1, 64, 56, 56] 0\n", + " BasicBlock-11 [-1, 64, 56, 56] 0\n", + " Conv2d-12 [-1, 64, 56, 56] 36,864\n", + " BatchNorm2d-13 [-1, 64, 56, 56] 128\n", + " ReLU-14 [-1, 64, 56, 56] 0\n", + " Conv2d-15 [-1, 64, 56, 56] 36,864\n", + " BatchNorm2d-16 [-1, 64, 56, 56] 128\n", + " ReLU-17 [-1, 64, 56, 56] 0\n", + " BasicBlock-18 [-1, 64, 56, 56] 0\n", + " Conv2d-19 [-1, 64, 56, 56] 36,864\n", + " BatchNorm2d-20 [-1, 64, 56, 56] 128\n", + " ReLU-21 [-1, 64, 56, 56] 0\n", + " Conv2d-22 [-1, 64, 56, 56] 36,864\n", + " BatchNorm2d-23 [-1, 64, 56, 56] 128\n", + " ReLU-24 [-1, 64, 56, 56] 0\n", + " BasicBlock-25 [-1, 64, 56, 56] 0\n", + " Conv2d-26 [-1, 128, 28, 28] 73,728\n", + " BatchNorm2d-27 [-1, 128, 28, 28] 256\n", + " ReLU-28 [-1, 128, 28, 28] 0\n", + " Conv2d-29 [-1, 128, 28, 28] 147,456\n", + " BatchNorm2d-30 [-1, 128, 28, 28] 256\n", + " Conv2d-31 [-1, 128, 28, 28] 8,192\n", + " BatchNorm2d-32 [-1, 128, 28, 28] 256\n", + " ReLU-33 [-1, 128, 28, 28] 0\n", + " BasicBlock-34 [-1, 128, 28, 28] 0\n", + " Conv2d-35 [-1, 128, 28, 28] 147,456\n", + " BatchNorm2d-36 [-1, 128, 28, 28] 256\n", + " ReLU-37 [-1, 128, 28, 28] 0\n", + " Conv2d-38 [-1, 128, 28, 28] 147,456\n", + " BatchNorm2d-39 [-1, 128, 28, 28] 256\n", + " ReLU-40 [-1, 128, 28, 28] 0\n", + " BasicBlock-41 [-1, 128, 28, 28] 0\n", + " Conv2d-42 [-1, 128, 28, 28] 147,456\n", + " BatchNorm2d-43 [-1, 128, 28, 28] 256\n", + " ReLU-44 [-1, 128, 28, 28] 0\n", + " Conv2d-45 [-1, 128, 28, 28] 147,456\n", + " BatchNorm2d-46 [-1, 128, 28, 28] 256\n", + " ReLU-47 [-1, 128, 28, 28] 0\n", + " BasicBlock-48 [-1, 128, 28, 28] 0\n", + " Conv2d-49 [-1, 128, 28, 28] 147,456\n", + " BatchNorm2d-50 [-1, 128, 28, 28] 256\n", + " ReLU-51 [-1, 128, 28, 28] 0\n", + " Conv2d-52 [-1, 128, 28, 28] 147,456\n", + " BatchNorm2d-53 [-1, 128, 28, 28] 256\n", + " ReLU-54 [-1, 128, 28, 28] 0\n", + " BasicBlock-55 [-1, 128, 28, 28] 0\n", + " Conv2d-56 [-1, 256, 14, 14] 294,912\n", + " BatchNorm2d-57 [-1, 256, 14, 14] 512\n", + " ReLU-58 [-1, 256, 14, 14] 0\n", + " Conv2d-59 [-1, 256, 14, 14] 589,824\n", + " BatchNorm2d-60 [-1, 256, 14, 14] 512\n", + " Conv2d-61 [-1, 256, 14, 14] 32,768\n", + " BatchNorm2d-62 [-1, 256, 14, 14] 512\n", + " ReLU-63 [-1, 256, 14, 14] 0\n", + " BasicBlock-64 [-1, 256, 14, 14] 0\n", + " Conv2d-65 [-1, 256, 14, 14] 589,824\n", + " BatchNorm2d-66 [-1, 256, 14, 14] 512\n", + " ReLU-67 [-1, 256, 14, 14] 0\n", + " Conv2d-68 [-1, 256, 14, 14] 589,824\n", + " BatchNorm2d-69 [-1, 256, 14, 14] 512\n", + " ReLU-70 [-1, 256, 14, 14] 0\n", + " BasicBlock-71 [-1, 256, 14, 14] 0\n", + " Conv2d-72 [-1, 256, 14, 14] 589,824\n", + " BatchNorm2d-73 [-1, 256, 14, 14] 512\n", + " ReLU-74 [-1, 256, 14, 14] 0\n", + " Conv2d-75 [-1, 256, 14, 14] 589,824\n", + " BatchNorm2d-76 [-1, 256, 14, 14] 512\n", + " ReLU-77 [-1, 256, 14, 14] 0\n", + " BasicBlock-78 [-1, 256, 14, 14] 0\n", + " Conv2d-79 [-1, 256, 14, 14] 589,824\n", + " BatchNorm2d-80 [-1, 256, 14, 14] 512\n", + " ReLU-81 [-1, 256, 14, 14] 0\n", + " Conv2d-82 [-1, 256, 14, 14] 589,824\n", + " BatchNorm2d-83 [-1, 256, 14, 14] 512\n", + " ReLU-84 [-1, 256, 14, 14] 0\n", + " BasicBlock-85 [-1, 256, 14, 14] 0\n", + " Conv2d-86 [-1, 256, 14, 14] 589,824\n", + " BatchNorm2d-87 [-1, 256, 14, 14] 512\n", + " ReLU-88 [-1, 256, 14, 14] 0\n", + " Conv2d-89 [-1, 256, 14, 14] 589,824\n", + " BatchNorm2d-90 [-1, 256, 14, 14] 512\n", + " ReLU-91 [-1, 256, 14, 14] 0\n", + " BasicBlock-92 [-1, 256, 14, 14] 0\n", + " Conv2d-93 [-1, 256, 14, 14] 589,824\n", + " BatchNorm2d-94 [-1, 256, 14, 14] 512\n", + " ReLU-95 [-1, 256, 14, 14] 0\n", + " Conv2d-96 [-1, 256, 14, 14] 589,824\n", + " BatchNorm2d-97 [-1, 256, 14, 14] 512\n", + " ReLU-98 [-1, 256, 14, 14] 0\n", + " BasicBlock-99 [-1, 256, 14, 14] 0\n", + " Conv2d-100 [-1, 512, 7, 7] 1,179,648\n", + " BatchNorm2d-101 [-1, 512, 7, 7] 1,024\n", + " ReLU-102 [-1, 512, 7, 7] 0\n", + " Conv2d-103 [-1, 512, 7, 7] 2,359,296\n", + " BatchNorm2d-104 [-1, 512, 7, 7] 1,024\n", + " Conv2d-105 [-1, 512, 7, 7] 131,072\n", + " BatchNorm2d-106 [-1, 512, 7, 7] 1,024\n", + " ReLU-107 [-1, 512, 7, 7] 0\n", + " BasicBlock-108 [-1, 512, 7, 7] 0\n", + " Conv2d-109 [-1, 512, 7, 7] 2,359,296\n", + " BatchNorm2d-110 [-1, 512, 7, 7] 1,024\n", + " ReLU-111 [-1, 512, 7, 7] 0\n", + " Conv2d-112 [-1, 512, 7, 7] 2,359,296\n", + " BatchNorm2d-113 [-1, 512, 7, 7] 1,024\n", + " ReLU-114 [-1, 512, 7, 7] 0\n", + " BasicBlock-115 [-1, 512, 7, 7] 0\n", + " Conv2d-116 [-1, 512, 7, 7] 2,359,296\n", + " BatchNorm2d-117 [-1, 512, 7, 7] 1,024\n", + " ReLU-118 [-1, 512, 7, 7] 0\n", + " Conv2d-119 [-1, 512, 7, 7] 2,359,296\n", + " BatchNorm2d-120 [-1, 512, 7, 7] 1,024\n", + " ReLU-121 [-1, 512, 7, 7] 0\n", + " BasicBlock-122 [-1, 512, 7, 7] 0\n", + "AdaptiveMaxPool2d-123 [-1, 512, 1, 1] 0\n", + "AdaptiveAvgPool2d-124 [-1, 512, 1, 1] 0\n", + "AdaptiveConcatPool2d-125 [-1, 1024, 1, 1] 0\n", + " Flatten-126 [-1, 1024] 0\n", + " BatchNorm1d-127 [-1, 1024] 2,048\n", + " Dropout-128 [-1, 1024] 0\n", + " Linear-129 [-1, 512] 524,800\n", + " ReLU-130 [-1, 512] 0\n", + " BatchNorm1d-131 [-1, 512] 1,024\n", + " Dropout-132 [-1, 512] 0\n", + " Linear-133 [-1, 2] 1,026\n", + " LogSoftmax-134 [-1, 2] 0\n", + "================================================================\n", + "Total params: 21,813,570\n", + "Trainable params: 21,813,570\n", + "Non-trainable params: 0\n", + "----------------------------------------------------------------\n" + ] + } + ], + "source": [ + "model.summary(input_size=(3, 224, 224))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "run_command = model_config.get_command('train')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train Epoch: 0001 [000000/023000 (00%)]\tloss: 0.854900 accuracy: 0.562500\n", + "Train Epoch: 0001 [006400/023000 (28%)]\tloss: 0.150858 accuracy: 0.939202\n", + "Train Epoch: 0001 [012800/023000 (56%)]\tloss: 0.117378 accuracy: 0.953358\n", + "Train Epoch: 0001 [019200/023000 (83%)]\tloss: 0.104418 accuracy: 0.958576\n", + "=>>>>>>>>>> EPOCH 1\n", + "Train accuracy 0.960822 loss 0.100100\n", + "Validation accuracy 0.983887 loss 0.035918\n", + "=>>>>>>>>>> DONE\n", + "Train Epoch: 0002 [000000/023000 (00%)]\tloss: 0.058765 accuracy: 0.953125\n", + "Train Epoch: 0002 [006400/023000 (28%)]\tloss: 0.076278 accuracy: 0.971225\n", + "Train Epoch: 0002 [012800/023000 (56%)]\tloss: 0.071321 accuracy: 0.973025\n", + "Train Epoch: 0002 [019200/023000 (83%)]\tloss: 0.069153 accuracy: 0.973681\n", + "=>>>>>>>>>> EPOCH 2\n", + "Train accuracy 0.973799 loss 0.069813\n", + "Validation accuracy 0.983887 loss 0.037188\n", + "=>>>>>>>>>> DONE\n" + ] + } + ], + "source": [ + "run_command.run()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "waterboy", + "language": "python", + "name": "waterboy" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/vel/command/augvis_command.py b/vel/command/augvis_command.py index b4a74fcd..880830b9 100644 --- a/vel/command/augvis_command.py +++ b/vel/command/augvis_command.py @@ -1,12 +1,12 @@ import matplotlib.pyplot as plt import numpy as np -from vel.data import Loader +from vel.data import DatasetLoader class AugmentationVisualizationCommand: """ Visualize augmentations """ - def __init__(self, loader: Loader, samples, cases): + def __init__(self, loader: DatasetLoader, samples, cases): self.loader = loader self.samples = samples self.cases = cases @@ -34,6 +34,6 @@ def run(self): plt.show() -def create(loader: Loader, samples: int, cases: int): +def create(loader: DatasetLoader, samples: int, cases: int): """ Vel factory function """ return AugmentationVisualizationCommand(loader, samples, cases) diff --git a/vel/command/phase_train_command.py b/vel/command/phase_train_command.py index b80a1b7a..18566ebf 100644 --- a/vel/command/phase_train_command.py +++ b/vel/command/phase_train_command.py @@ -14,7 +14,7 @@ class PhaseTrainCommand: """ Training command - learn according to a set of phases """ - def __init__(self, model_config: api.ModelConfig, model_factory: api.ModelFactory, loader: data.Loader, + def __init__(self, model_config: api.ModelConfig, model_factory: api.ModelFactory, loader: data.DatasetLoader, storage: api.Storage, phases: typing.List[train.TrainPhase], callbacks=None, restart=True): self.model_config = model_config diff --git a/vel/command/train_command.py b/vel/command/train_command.py index 6a65d993..9d1a1965 100644 --- a/vel/command/train_command.py +++ b/vel/command/train_command.py @@ -14,7 +14,7 @@ class SimpleTrainCommand: def __init__(self, epochs: int, model_config: api.ModelConfig, model_factory: api.ModelFactory, optimizer_factory: api.OptimizerFactory, scheduler_factory: typing.Optional[api.SchedulerFactory], - loader: data.Loader, storage: api.Storage, callbacks: typing.Optional[typing.List[api.Callback]], + loader: data.DatasetLoader, storage: api.Storage, callbacks: typing.Optional[typing.List[api.Callback]], max_grad_norm: typing.Optional[float]): self.epochs = epochs self.model_config = model_config diff --git a/vel/data/__init__.py b/vel/data/__init__.py index 41e0ebd8..122edbd6 100644 --- a/vel/data/__init__.py +++ b/vel/data/__init__.py @@ -1,2 +1,2 @@ from .dataflow import DataFlow -from .loader import Loader +from .dataset_loader import DatasetLoader diff --git a/vel/data/loader.py b/vel/data/dataset_loader.py similarity index 97% rename from vel/data/loader.py rename to vel/data/dataset_loader.py index a75a3453..8228cf52 100644 --- a/vel/data/loader.py +++ b/vel/data/dataset_loader.py @@ -6,7 +6,7 @@ from .dataflow import DataFlow -class Loader: +class DatasetLoader: """ Loads data from a data source to serve it to the model """ def __init__(self, source: Source, batch_size: int, num_workers: int, @@ -65,7 +65,7 @@ def size(self): def create(source: Source, batch_size: int, num_workers: int = 0, transformations: typing.Optional[list] = None): """ Vel factory function """ - return Loader( + return DatasetLoader( source=source, batch_size=batch_size, num_workers=num_workers, diff --git a/vel/data/source/nlp/text_url.py b/vel/data/source/nlp/text_url.py index 5478837c..5f3d61eb 100644 --- a/vel/data/source/nlp/text_url.py +++ b/vel/data/source/nlp/text_url.py @@ -9,88 +9,12 @@ from vel.api import Source -class TextIterator: - """ Iterator over a text dataset """ - def __init__(self, padded_sequence, sequence_length, batch_size, alphabet_size, num_batches): - self.sequence_length = sequence_length - self.batch_size = batch_size - self.num_batches = num_batches - self.alphabet_size = alphabet_size - - self.padded_sequence = padded_sequence[:-1].reshape(self.num_batches * self.batch_size, self.sequence_length) - self.padded_sequence_next = padded_sequence[1:].reshape( - self.num_batches * self.batch_size, self.sequence_length - ) - - self.sequence_indices = np.arange(self.num_batches * self.batch_size) - - np.random.shuffle(self.sequence_indices) - - self.sequence_indices = self.sequence_indices.reshape(self.num_batches, self.batch_size) - - self.batch_idx = 0 - - def __iter__(self): - return self - - def __next__(self): - if self.batch_idx == self.num_batches: - raise StopIteration - else: - input_data = torch.from_numpy(self.padded_sequence[self.sequence_indices[self.batch_idx]]) - target_data = torch.from_numpy(self.padded_sequence_next[self.sequence_indices[self.batch_idx]]) - - self.batch_idx += 1 - - return input_data.to(torch.long), target_data.to(torch.long) - - -class TextLoader: - """ Loader of sequential text data """ - def __init__(self, sequence, sequence_length, batch_size, alphabet_size): - self.sequence = sequence - self.sequence_length = sequence_length - self.batch_size = batch_size - self.alphabet_size = alphabet_size - - # 1 is for the last element as the target needs to be shifted by 1 - residual_length = (len(self.sequence) - self.sequence_length - 1) - full_size = self.sequence_length * self.batch_size - - rest = residual_length % full_size - self.num_batches = residual_length // full_size - - if rest > 0: - self.sequence = np.pad(self.sequence, (0, full_size - rest), mode='constant') - self.num_batches += 1 - - def __iter__(self): - initial_offset = np.random.randint(self.sequence_length) - relevant_subsequence = self.sequence[ - # 1 is for the last element as the target needs to be shifted by 1 - initial_offset:self.num_batches * self.sequence_length * self.batch_size + initial_offset + 1 - ] - - return TextIterator( - relevant_subsequence, self.sequence_length, self.batch_size, - alphabet_size=self.alphabet_size, - num_batches=self.num_batches - ) - - def __len__(self): - """ Number of batches in this loader """ - return self.num_batches - - class TextUrlSource(Source): """ Download text from source and model it character by character """ - def __init__(self, url, absolute_data_path, sequence_length, batch_size, train_val_split=0.8): - super().__init__() + def __init__(self, url, absolute_data_path, train_val_split=0.8): self.url = url self.data_path = absolute_data_path - self.sequence_length = sequence_length - self.batch_size = batch_size self.train_val_split = train_val_split self.text_path = os.path.join(self.data_path, 'text.txt') @@ -99,55 +23,20 @@ def __init__(self, url, absolute_data_path, sequence_length, batch_size, train_v self.data_dict = self.download() content_encoded = self.data_dict['content_encoded'] - alphabet_size = len(self.data_dict['alphabet']) split_idx = int(len(content_encoded) * train_val_split) - self._train_loader = TextLoader( - sequence=content_encoded[:split_idx], - sequence_length=sequence_length, - batch_size=batch_size, - alphabet_size=alphabet_size, - ) - - self._val_loader = TextLoader( - sequence=content_encoded[split_idx:], - sequence_length=sequence_length, - batch_size=batch_size, - alphabet_size=alphabet_size, + super().__init__( + train=content_encoded[:split_idx], + validation=content_encoded[split_idx:], + metadata={ + 'alphabet': self.data_dict['alphabet'], + 'character_to_index': self.data_dict['character_to_index'], + 'index_to_character': self.data_dict['index_to_character'] + } ) - def encode_character(self, char): - return self.data_dict['character_to_index'][char] - - def decode_character(self, index): - return self.data_dict['index_to_character'][index] - - def train_loader(self): - """ PyTorch loader of training data """ - return self._train_loader - - def val_loader(self): - """ PyTorch loader of validation data """ - return self._val_loader - - def train_dataset(self): - """ Return the training dataset """ - return None - - def val_dataset(self): - """ Return the validation dataset """ - return None - - def train_iterations_per_epoch(self): - """ Return number of iterations per epoch """ - return len(self._train_loader) - - def val_iterations_per_epoch(self): - """ Return number of iterations per epoch - validation """ - return len(self._val_loader) - - def download(self): + def download(self) -> dict: """ Make sure data file is downloaded and stored properly """ if not os.path.exists(self.data_path): # Create if it doesn't exist @@ -188,7 +77,7 @@ def download(self): return data_dict -def create(model_config, url, local_dir, sequence_length=64, batch_size=64, train_val_split=0.8): +def create(model_config, url, local_dir, train_val_split=0.8): """ Vel factory function """ if not os.path.isabs(local_dir): local_dir = model_config.project_data_dir(local_dir) @@ -196,7 +85,5 @@ def create(model_config, url, local_dir, sequence_length=64, batch_size=64, trai return TextUrlSource( url, absolute_data_path=local_dir, - sequence_length=sequence_length, - batch_size=batch_size, train_val_split=train_val_split, - ) +) diff --git a/vel/data/text_character_loader.py b/vel/data/text_character_loader.py new file mode 100644 index 00000000..d193bbb8 --- /dev/null +++ b/vel/data/text_character_loader.py @@ -0,0 +1,105 @@ +import numpy as np +import torch + +from vel.api import Source + + +class TextIterator: + """ Iterator over a text dataset """ + def __init__(self, padded_sequence, sequence_length, batch_size, alphabet_size, num_batches): + self.sequence_length = sequence_length + self.batch_size = batch_size + self.num_batches = num_batches + self.alphabet_size = alphabet_size + + self.padded_sequence = padded_sequence[:-1].reshape(self.num_batches * self.batch_size, self.sequence_length) + self.padded_sequence_next = padded_sequence[1:].reshape( + self.num_batches * self.batch_size, self.sequence_length + ) + + self.sequence_indices = np.arange(self.num_batches * self.batch_size) + + np.random.shuffle(self.sequence_indices) + + self.sequence_indices = self.sequence_indices.reshape(self.num_batches, self.batch_size) + + self.batch_idx = 0 + + def __iter__(self): + return self + + def __next__(self): + if self.batch_idx == self.num_batches: + raise StopIteration + else: + input_data = torch.from_numpy(self.padded_sequence[self.sequence_indices[self.batch_idx]]) + target_data = torch.from_numpy(self.padded_sequence_next[self.sequence_indices[self.batch_idx]]) + + self.batch_idx += 1 + + return input_data.to(torch.long), target_data.to(torch.long) + + +class TextLoader: + """ Loader of sequential text data """ + def __init__(self, sequence, sequence_length, batch_size, alphabet_size): + self.sequence = sequence + self.sequence_length = sequence_length + self.batch_size = batch_size + self.alphabet_size = alphabet_size + + # 1 is for the last element as the target needs to be shifted by 1 + residual_length = (len(self.sequence) - self.sequence_length - 1) + full_size = self.sequence_length * self.batch_size + + rest = residual_length % full_size + self.num_batches = residual_length // full_size + + if rest > 0: + self.sequence = np.pad(self.sequence, (0, full_size - rest), mode='constant') + self.num_batches += 1 + + def __iter__(self): + initial_offset = np.random.randint(self.sequence_length) + relevant_subsequence = self.sequence[ + # 1 is for the last element as the target needs to be shifted by 1 + initial_offset:self.num_batches * self.sequence_length * self.batch_size + initial_offset + 1 + ] + + return TextIterator( + relevant_subsequence, self.sequence_length, self.batch_size, + alphabet_size=self.alphabet_size, + num_batches=self.num_batches + ) + + def __len__(self): + """ Number of batches in this loader """ + return self.num_batches + + +class TextCharacterLoader: + """ Loader for the text character data source """ + + def __init__(self, source, sequence_length: int, batch_size: int): + self.source = source + self.sequence_length = sequence_length + self.batch_size = batch_size + self.alphabet = self.source.metadata['alphabet'] + + self._loaders = { + 'train': TextLoader(self.source.train, self.sequence_length, self.batch_size, len(self.alphabet)) + } + + +def create(source: Source, sequence_length: int = 64, batch_size: int = 64): + """ Vel factory function """ + return TextCharacterLoader( + source=source, + sequence_length=sequence_length, + batch_size=batch_size + ) + + + + + diff --git a/vel/train/phase/generic.py b/vel/train/phase/generic.py index f57e9923..db790fed 100644 --- a/vel/train/phase/generic.py +++ b/vel/train/phase/generic.py @@ -1,5 +1,5 @@ from vel.api import TrainingInfo, EpochInfo -from vel.data import Loader +from vel.data import DatasetLoader from vel.train import TrainPhase @@ -18,7 +18,7 @@ def __init__(self, lr, epochs, optimizer_factory): def number_of_epochs(self) -> int: return self.epochs - def set_up_phase(self, training_info, model, loader: Loader): + def set_up_phase(self, training_info, model, loader: DatasetLoader): """ Prepare the phase for learning """ self._optimizer_instance = self.optimizer_factory.instantiate(model) self._loader = loader diff --git a/vel/train/train_phase.py b/vel/train/train_phase.py index 2515d4bb..daeb5733 100644 --- a/vel/train/train_phase.py +++ b/vel/train/train_phase.py @@ -1,7 +1,7 @@ from torch.optim import Optimizer from vel.api import TrainingInfo, EpochInfo, Model -from vel.data import Loader +from vel.data import DatasetLoader from .trainer import Trainer @@ -14,7 +14,7 @@ def number_of_epochs(self) -> int: """ How many epochs does this phase take """ raise NotImplementedError - def set_up_phase(self, training_info: TrainingInfo, model: Model, loader: Loader) -> Optimizer: + def set_up_phase(self, training_info: TrainingInfo, model: Model, loader: DatasetLoader) -> Optimizer: """ Prepare the phase for learning, returns phase optimizer """ pass diff --git a/vel/train/trainer.py b/vel/train/trainer.py index 89284147..9bddd3d5 100644 --- a/vel/train/trainer.py +++ b/vel/train/trainer.py @@ -5,7 +5,7 @@ import typing from vel.api import GradientModel, TrainingInfo, EpochInfo, BatchInfo -from vel.data import Loader +from vel.data import DatasetLoader from vel.util.tensor_util import to_device @@ -45,7 +45,7 @@ def initialize_training(self, training_info: TrainingInfo, model_state=None, hid else: self.model.load_state_dict(model_state) - def run_epoch(self, epoch_info: EpochInfo, loader: Loader): + def run_epoch(self, epoch_info: EpochInfo, loader: DatasetLoader): """ Run full epoch of learning """ epoch_info.on_epoch_begin() @@ -60,7 +60,7 @@ def run_epoch(self, epoch_info: EpochInfo, loader: Loader): epoch_info.on_epoch_end() - def train_epoch(self, epoch_info, loader: Loader, interactive=True): + def train_epoch(self, epoch_info, loader: DatasetLoader, interactive=True): """ Run a single training epoch """ self.train() @@ -79,7 +79,7 @@ def train_epoch(self, epoch_info, loader: Loader, interactive=True): iterator.set_postfix(loss=epoch_info.result_accumulator.intermediate_value('loss')) - def validation_epoch(self, epoch_info, loader: Loader, interactive=True): + def validation_epoch(self, epoch_info, loader: DatasetLoader, interactive=True): """ Run a single evaluation epoch """ self.eval() From 055c007c661f7909a44d2c83602501ce67c8fc95 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Wed, 26 Jun 2019 13:00:49 -0700 Subject: [PATCH 065/162] Finished fixing shakespeare text generation. --- .../nlp/generation/gen_shakespeare_gru.yaml | 16 +++++--- .../gen_shakespeare_gru_embedding.yaml | 16 +++++--- .../nlp/generation/gen_shakespeare_lstm.yaml | 16 +++++--- .../gen_shakespeare_lstm_embedding.yaml | 16 +++++--- vel/data/text_character_loader.py | 41 ++++++++++++++++--- vel/module/rnn_layer.py | 4 ++ vel/util/tensor_util.py | 2 + 7 files changed, 81 insertions(+), 30 deletions(-) diff --git a/examples-configs/nlp/generation/gen_shakespeare_gru.yaml b/examples-configs/nlp/generation/gen_shakespeare_gru.yaml index 4e54fb31..2ae82918 100644 --- a/examples-configs/nlp/generation/gen_shakespeare_gru.yaml +++ b/examples-configs/nlp/generation/gen_shakespeare_gru.yaml @@ -2,19 +2,23 @@ name: 'gen_shakespeare_gru' source: - name: vel.sources.nlp.text_url + name: vel.data.source.nlp.text_url # Andrej Karpathy built a small (4.4mb) file with combined all works of Shakespeare url: 'https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt' local_dir: './rnn_shakespeare' + + +loader: + name: vel.data.text_character_loader sequence_length: 128 batch_size: 64 model: - name: vel.models.rnn.multilayer_rnn_sequence_model + name: vel.model.rnn.multilayer_rnn_sequence_model input_block: - name: vel.modules.input.one_hot_encoding + name: vel.module.input.one_hot_encoding alphabet_size: 68 # Size of the alphabet + 1 hidden_layers: [512, 512, 512] @@ -24,19 +28,19 @@ model: optimizer: - name: vel.optimizers.adam + name: vel.optimizer.adam lr: 1.0e-3 epsilon: 1.0e-5 commands: train: - name: vel.commands.train_command + name: vel.command.train_command max_grad_norm: 0.5 epochs: 20 generate: - name: vel.commands.rnn.generate_text + name: vel.command.rnn.generate_text start_letter: !param start_letter = 'A' length: !param length = 500 temperature: !param temperature = 0.8 diff --git a/examples-configs/nlp/generation/gen_shakespeare_gru_embedding.yaml b/examples-configs/nlp/generation/gen_shakespeare_gru_embedding.yaml index 9603cb8d..70e1a961 100644 --- a/examples-configs/nlp/generation/gen_shakespeare_gru_embedding.yaml +++ b/examples-configs/nlp/generation/gen_shakespeare_gru_embedding.yaml @@ -2,19 +2,23 @@ name: 'gen_shakespeare_gru_embedding' source: - name: vel.sources.nlp.text_url + name: vel.data.source.nlp.text_url # Andrej Karpathy built a small (4.4mb) file with combined all works of Shakespeare url: 'https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt' local_dir: './rnn_shakespeare' + + +loader: + name: vel.data.text_character_loader sequence_length: 128 batch_size: 64 model: - name: vel.models.rnn.multilayer_rnn_sequence_model + name: vel.model.rnn.multilayer_rnn_sequence_model input_block: - name: vel.modules.input.embedding + name: vel.module.input.embedding alphabet_size: 68 # Size of the alphabet + 1 output_dim: 512 # Embedding dimension @@ -25,19 +29,19 @@ model: optimizer: - name: vel.optimizers.adam + name: vel.optimizer.adam lr: 1.0e-3 epsilon: 1.0e-5 commands: train: - name: vel.commands.train_command + name: vel.command.train_command max_grad_norm: 0.5 epochs: 20 generate: - name: vel.commands.rnn.generate_text + name: vel.command.rnn.generate_text start_letter: !param start_letter = 'A' length: !param length = 500 temperature: !param temperature = 0.8 diff --git a/examples-configs/nlp/generation/gen_shakespeare_lstm.yaml b/examples-configs/nlp/generation/gen_shakespeare_lstm.yaml index 56129c6f..0f06d487 100644 --- a/examples-configs/nlp/generation/gen_shakespeare_lstm.yaml +++ b/examples-configs/nlp/generation/gen_shakespeare_lstm.yaml @@ -2,19 +2,23 @@ name: 'gen_shakespeare_lstm' source: - name: vel.sources.nlp.text_url + name: vel.data.source.nlp.text_url # Andrej Karpathy built a small (4.4mb) file with combined all works of Shakespeare url: 'https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt' local_dir: './rnn_shakespeare' + + +loader: + name: vel.data.text_character_loader sequence_length: 128 batch_size: 64 model: - name: vel.models.rnn.multilayer_rnn_sequence_model + name: vel.model.rnn.multilayer_rnn_sequence_model input_block: - name: vel.modules.input.one_hot_encoding + name: vel.module.input.one_hot_encoding alphabet_size: 68 # Size of the alphabet + 1 hidden_layers: [512, 512, 512] @@ -24,19 +28,19 @@ model: optimizer: - name: vel.optimizers.adam + name: vel.optimizer.adam lr: 1.0e-3 epsilon: 1.0e-5 commands: train: - name: vel.commands.train_command + name: vel.command.train_command max_grad_norm: 0.5 epochs: 20 generate: - name: vel.commands.rnn.generate_text + name: vel.command.rnn.generate_text start_letter: !param start_letter = 'A' length: !param length = 500 temperature: !param temperature = 0.8 diff --git a/examples-configs/nlp/generation/gen_shakespeare_lstm_embedding.yaml b/examples-configs/nlp/generation/gen_shakespeare_lstm_embedding.yaml index 79a91d57..c84af0d0 100644 --- a/examples-configs/nlp/generation/gen_shakespeare_lstm_embedding.yaml +++ b/examples-configs/nlp/generation/gen_shakespeare_lstm_embedding.yaml @@ -2,19 +2,23 @@ name: 'gen_shakespeare_lstm_embedding' source: - name: vel.sources.nlp.text_url + name: vel.data.source.nlp.text_url # Andrej Karpathy built a small (4.4mb) file with combined all works of Shakespeare url: 'https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt' local_dir: './rnn_shakespeare' + + +loader: + name: vel.data.text_character_loader sequence_length: 128 batch_size: 64 model: - name: vel.models.rnn.multilayer_rnn_sequence_model + name: vel.model.rnn.multilayer_rnn_sequence_model input_block: - name: vel.modules.input.embedding + name: vel.module.input.embedding alphabet_size: 68 # Size of the alphabet + 1 output_dim: 512 # Embedding dimension @@ -25,19 +29,19 @@ model: optimizer: - name: vel.optimizers.adam + name: vel.optimizer.adam lr: 1.0e-3 epsilon: 1.0e-5 commands: train: - name: vel.commands.train_command + name: vel.command.train_command max_grad_norm: 0.5 epochs: 20 generate: - name: vel.commands.rnn.generate_text + name: vel.command.rnn.generate_text start_letter: !param start_letter = 'A' length: !param length = 500 temperature: !param temperature = 0.8 diff --git a/vel/data/text_character_loader.py b/vel/data/text_character_loader.py index d193bbb8..92f9d405 100644 --- a/vel/data/text_character_loader.py +++ b/vel/data/text_character_loader.py @@ -37,11 +37,11 @@ def __next__(self): self.batch_idx += 1 - return input_data.to(torch.long), target_data.to(torch.long) + return {'x': input_data.to(torch.long), 'y': target_data.to(torch.long)} class TextLoader: - """ Loader of sequential text data """ + """ Creates iterators over a sequential block of text """ def __init__(self, sequence, sequence_length, batch_size, alphabet_size): self.sequence = sequence self.sequence_length = sequence_length @@ -62,9 +62,9 @@ def __init__(self, sequence, sequence_length, batch_size, alphabet_size): def __iter__(self): initial_offset = np.random.randint(self.sequence_length) relevant_subsequence = self.sequence[ - # 1 is for the last element as the target needs to be shifted by 1 - initial_offset:self.num_batches * self.sequence_length * self.batch_size + initial_offset + 1 - ] + # 1 is for the last element as the target needs to be shifted by 1 + initial_offset:self.num_batches * self.sequence_length * self.batch_size + initial_offset + 1 + ] return TextIterator( relevant_subsequence, self.sequence_length, self.batch_size, @@ -86,10 +86,39 @@ def __init__(self, source, sequence_length: int, batch_size: int): self.batch_size = batch_size self.alphabet = self.source.metadata['alphabet'] + self.train_loader = TextLoader(self.source.train, self.sequence_length, self.batch_size, len(self.alphabet)) + self.val_loader = TextLoader(self.source.validation, self.sequence_length, self.batch_size, len(self.alphabet)) + + if self.source.test is None: + self.test_loader = None + else: + self.test_loader = TextLoader(self.source.test, self.sequence_length, self.batch_size, len(self.alphabet)) + self._loaders = { - 'train': TextLoader(self.source.train, self.sequence_length, self.batch_size, len(self.alphabet)) + 'train': self.train_loader, + 'val': self.val_loader, + 'test': self.test_loader + } + + self._loader_sizes = { + 'train': len(self.train_loader), + 'val': len(self.val_loader), + 'test': 0 if self.test_loader is None else len(self.test_loader) } + def __getitem__(self, item): + return self._loaders[item] + + @property + def loader(self): + """ Get a dict of loaders """ + return self._loaders + + @property + def size(self): + """ Get a dict of sizes of each loader """ + return self._loader_sizes + def create(source: Source, sequence_length: int = 64, batch_size: int = 64): """ Vel factory function """ diff --git a/vel/module/rnn_layer.py b/vel/module/rnn_layer.py index 54d61904..0c5b2c85 100644 --- a/vel/module/rnn_layer.py +++ b/vel/module/rnn_layer.py @@ -57,6 +57,10 @@ def state_dim(self) -> int: else: return self.hidden_size + def zero_state(self, batch_size): + """ State for the model """ + return torch.zeros(batch_size, self.state_dim) + def forward(self, input_data, state=None): if state is None: if self.bidirectional: diff --git a/vel/util/tensor_util.py b/vel/util/tensor_util.py index b0683099..554ce2d2 100644 --- a/vel/util/tensor_util.py +++ b/vel/util/tensor_util.py @@ -28,5 +28,7 @@ def to_device(tensor, device: torch.device): return {k: to_device(v, device) for k, v in tensor.items()} elif isinstance(tensor, list): return [to_device(v, device) for v in tensor] + elif isinstance(tensor, tuple): + return tuple(to_device(v, device) for v in tensor) else: raise NotImplementedError From 634fe77db6322e878c4a9e37fe5ce0b5fce214a6 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 27 Jun 2019 13:23:35 -0700 Subject: [PATCH 066/162] Fixed a few broken imports. --- vel/metric/base/__init__.py | 4 ++++ vel/rl/algo/policy_gradient/a2c.py | 4 ++-- vel/rl/algo/policy_gradient/acer.py | 2 +- vel/rl/algo/policy_gradient/ddpg.py | 2 +- vel/rl/algo/policy_gradient/ppo.py | 6 +++--- vel/rl/algo/policy_gradient/trpo.py | 4 ++-- vel/rl/command/rl_train_command.py | 4 ++-- vel/scheduler/linear_batch_scaler.py | 4 +++- 8 files changed, 18 insertions(+), 12 deletions(-) diff --git a/vel/metric/base/__init__.py b/vel/metric/base/__init__.py index e69de29b..20f27c6d 100644 --- a/vel/metric/base/__init__.py +++ b/vel/metric/base/__init__.py @@ -0,0 +1,4 @@ +from .averaging_metric import AveragingSupervisedMetric, AveragingNamedMetric, AveragingMetric +from .base_metric import BaseMetric, MetricKey +from .summing_metric import SummingMetric, SummingNamedMetric +from .value_metric import ValueMetric \ No newline at end of file diff --git a/vel/rl/algo/policy_gradient/a2c.py b/vel/rl/algo/policy_gradient/a2c.py index 86485184..fdbbbb61 100644 --- a/vel/rl/algo/policy_gradient/a2c.py +++ b/vel/rl/algo/policy_gradient/a2c.py @@ -1,8 +1,8 @@ import torch import torch.nn.functional as F -from vel.metric.averaging_metric import AveragingNamedMetric -from vel.math.function import explained_variance +from vel.metric.base import AveragingNamedMetric +from vel.calc.function import explained_variance from vel.rl.api import OptimizerAlgoBase, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae diff --git a/vel/rl/algo/policy_gradient/acer.py b/vel/rl/algo/policy_gradient/acer.py index 9426957d..6d78a603 100644 --- a/vel/rl/algo/policy_gradient/acer.py +++ b/vel/rl/algo/policy_gradient/acer.py @@ -1,7 +1,7 @@ import torch import torch.nn.functional as F -from vel.metric.averaging_metric import AveragingNamedMetric +from vel.metric.base import AveragingNamedMetric from vel.rl.api import Trajectories, OptimizerAlgoBase diff --git a/vel/rl/algo/policy_gradient/ddpg.py b/vel/rl/algo/policy_gradient/ddpg.py index 2150cab4..1e47b5a0 100644 --- a/vel/rl/algo/policy_gradient/ddpg.py +++ b/vel/rl/algo/policy_gradient/ddpg.py @@ -4,7 +4,7 @@ import torch.nn.functional as F from vel.rl.api import OptimizerAlgoBase -from vel.metric.averaging_metric import AveragingNamedMetric +from vel.metric.base import AveragingNamedMetric class DeepDeterministicPolicyGradient(OptimizerAlgoBase): diff --git a/vel/rl/algo/policy_gradient/ppo.py b/vel/rl/algo/policy_gradient/ppo.py index d835cf6a..3ef76c72 100644 --- a/vel/rl/algo/policy_gradient/ppo.py +++ b/vel/rl/algo/policy_gradient/ppo.py @@ -2,11 +2,11 @@ import numbers -from vel.math.function import explained_variance -from vel.metric.averaging_metric import AveragingNamedMetric +from vel.calc.function import explained_variance +from vel.function.constant import ConstantSchedule +from vel.metric.base import AveragingNamedMetric from vel.rl.api import OptimizerAlgoBase, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae -from vel.schedule.constant import ConstantSchedule class PpoPolicyGradient(OptimizerAlgoBase): diff --git a/vel/rl/algo/policy_gradient/trpo.py b/vel/rl/algo/policy_gradient/trpo.py index 2922e128..f4fa4206 100644 --- a/vel/rl/algo/policy_gradient/trpo.py +++ b/vel/rl/algo/policy_gradient/trpo.py @@ -4,8 +4,8 @@ import torch.nn.functional as F import torch.nn.utils -from vel.metric.averaging_metric import AveragingNamedMetric -from vel.math.function import explained_variance +from vel.calc.function import explained_variance +from vel.metric.base import AveragingNamedMetric from vel.rl.api import AlgoBase, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae diff --git a/vel/rl/command/rl_train_command.py b/vel/rl/command/rl_train_command.py index 0e852826..f0363cbb 100644 --- a/vel/rl/command/rl_train_command.py +++ b/vel/rl/command/rl_train_command.py @@ -18,14 +18,14 @@ def on_initialization(self, training_info: TrainingInfo): training_info['frames'] = 0 - def on_batch_begin(self, batch_info: BatchInfo): + def on_batch_begin(self, batch_info: BatchInfo, dataset: typing.Optional[str] = None): if 'total_frames' in batch_info.training_info: # Track progress during learning batch_info['progress'] = ( float(batch_info.training_info['frames']) / batch_info.training_info['total_frames'] ) - def on_batch_end(self, batch_info: BatchInfo): + def on_batch_end(self, batch_info: BatchInfo, dataset: typing.Optional[str] = None): batch_info.training_info['frames'] += batch_info['frames'] def write_state_dict(self, training_info: TrainingInfo, hidden_state_dict: dict): diff --git a/vel/scheduler/linear_batch_scaler.py b/vel/scheduler/linear_batch_scaler.py index 584251f4..203b0980 100644 --- a/vel/scheduler/linear_batch_scaler.py +++ b/vel/scheduler/linear_batch_scaler.py @@ -1,3 +1,5 @@ +import typing + import vel.api as base from vel.api import BatchInfo, TrainingInfo @@ -18,7 +20,7 @@ def write_state_dict(self, training_info: TrainingInfo, hidden_state_dict: dict) def load_state_dict(self, training_info: TrainingInfo, hidden_state_dict: dict): self.starting_lr = hidden_state_dict['linear_batch_scaler/starting_lr'] - def on_batch_begin(self, batch_info: BatchInfo): + def on_batch_begin(self, batch_info: BatchInfo, dataset: typing.Optional[str] = None): for starting_lr, param_group in zip(self.starting_lr, self.optimizer.param_groups): param_group['lr'] = starting_lr * (1.0 - batch_info['progress']) From 8c7aae17ae53ecb919301586bef09c9a413dc7c7 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 12 Sep 2019 12:49:18 -0700 Subject: [PATCH 067/162] Updated dependency in makefile. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3f2fc3d3..a7e44717 100644 --- a/Makefile +++ b/Makefile @@ -39,7 +39,7 @@ test: partestc: pytest -n 4 . -requirements.txt: +requirements.txt: requirements.in pip-compile requirements.in requpgrade: From eb3029992523dc6495c0d0449630f6e390577b39 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 12 Sep 2019 14:49:11 -0700 Subject: [PATCH 068/162] Updated to PyTorch 1.2 --- requirements.in | 2 +- requirements.txt | 58 ++++++++++++++++++++++++------------------------ 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/requirements.in b/requirements.in index a1fd3edb..6ffb91bd 100644 --- a/requirements.in +++ b/requirements.in @@ -6,7 +6,7 @@ opencv-python pandas pyyaml scikit-learn -torch~=1.1 +torch~=1.2 torchtext torchvision tqdm diff --git a/requirements.txt b/requirements.txt index a0cce232..fc2a4418 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,53 +4,53 @@ # # pip-compile # -atari-py==0.2.0 # via gym +atari-py==0.2.6 # via gym atomicwrites==1.3.0 # via pytest attrs==19.1.0 box2d-py==2.3.8 # via gym -certifi==2019.6.16 # via requests +certifi==2019.9.11 # via requests chardet==3.0.4 # via requests -cloudpickle==1.2.1 +cloudpickle==1.2.2 cycler==0.10.0 # via matplotlib dnspython==1.16.0 future==0.17.1 # via pyglet -gym[atari,box2d,classic_control]==0.13.0 +gym[atari,box2d,classic_control]==0.14.0 idna==2.8 # via requests -importlib-metadata==0.18 # via pluggy, pytest +importlib-metadata==0.22 # via pluggy, pytest joblib==0.13.2 # via scikit-learn kiwisolver==1.1.0 # via matplotlib -matplotlib==3.1.0 -more-itertools==7.0.0 # via pytest -numpy==1.16.4 -opencv-python==4.1.0.25 -packaging==19.0 # via pytest -pandas==0.24.2 -pillow==6.0.0 # via gym, torchvision, visdom -pluggy==0.12.0 # via pytest +matplotlib==3.1.1 +more-itertools==7.2.0 # via pytest, zipp +numpy==1.17.2 +opencv-python==4.1.1.26 +packaging==19.1 # via pytest +pandas==0.25.1 +pillow==6.1.0 # via gym, torchvision, visdom +pluggy==0.13.0 # via pytest py==1.8.0 # via pytest pyglet==1.3.2 # via gym -pymongo==3.8.0 -pyparsing==2.4.0 # via matplotlib, packaging -pytest==4.6.3 +pymongo==3.9.0 +pyparsing==2.4.2 # via matplotlib, packaging +pytest==5.1.2 python-dateutil==2.8.0 # via matplotlib, pandas -pytz==2019.1 # via pandas -pyyaml==5.1.1 -pyzmq==18.0.1 # via visdom +pytz==2019.2 # via pandas +pyyaml==5.1.2 +pyzmq==18.1.0 # via visdom requests==2.22.0 # via torchtext, visdom -scikit-learn==0.21.2 -scipy==1.3.0 # via gym, scikit-learn, visdom -six==1.12.0 # via atari-py, cycler, gym, packaging, pytest, python-dateutil, torchvision, visdom, websocket-client -torch==1.1.0 +scikit-learn==0.21.3 +scipy==1.3.1 # via gym, scikit-learn, visdom +six==1.12.0 # via atari-py, cycler, gym, packaging, python-dateutil, torchtext, torchvision, visdom, websocket-client +torch==1.2.0 torchfile==0.1.0 # via visdom -torchtext==0.3.1 -torchvision==0.3.0 -tornado==6.0.2 # via visdom -tqdm==4.32.2 +torchtext==0.4.0 +torchvision==0.4.0 +tornado==6.0.3 # via visdom +tqdm==4.35.0 urllib3==1.25.3 # via requests visdom==0.1.8.8 wcwidth==0.1.7 # via pytest websocket-client==0.56.0 # via visdom -zipp==0.5.1 # via importlib-metadata +zipp==0.6.0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -# setuptools==41.0.1 # via kiwisolver +# setuptools==41.2.0 # via kiwisolver From 6c8cd48d52241ec7b5cb83b1e2740fae6ebc387e Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 12 Sep 2019 14:55:07 -0700 Subject: [PATCH 069/162] Removing parallel pytest. --- Makefile | 3 --- setup.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Makefile b/Makefile index a7e44717..9725ace3 100644 --- a/Makefile +++ b/Makefile @@ -36,9 +36,6 @@ serve-visdom: test: pytest . -partestc: - pytest -n 4 . - requirements.txt: requirements.in pip-compile requirements.in diff --git a/setup.py b/setup.py index 0fd152f6..cdad2f8d 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ 'text': ['spacy'], 'all': [ 'visdom', 'pymongo', 'dnspython', 'gym[all]', 'pytest', 'spacy', 'ipython', 'jupyter', - 'pip-tools', 'flake8', 'pytest-xdist', 'tb-nightly' + 'pip-tools', 'flake8', 'tb-nightly' ] }, tests_require=[ From 57420cee58a98b476b46aa3f9ad6a3a3c2ec2a80 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 12 Sep 2019 15:03:04 -0700 Subject: [PATCH 070/162] Worked on fixing unit tests after the refactors. --- examples-configs/rl/atari/atari_ppo.yaml | 2 +- examples-configs/rl/atari/atari_ppo_gru.yaml | 2 +- vel/model/autoencoder/mnist_cnn_vae.py | 2 +- vel/rl/algo/distributional_dqn.py | 2 +- vel/rl/algo/dqn.py | 2 +- vel/rl/module/noise/eps_greedy.py | 2 +- vel/rl/module/noise/ou_noise.py | 2 +- vel/rl/test/test_integration.py | 4 ++-- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/examples-configs/rl/atari/atari_ppo.yaml b/examples-configs/rl/atari/atari_ppo.yaml index 4850edba..013f8ef0 100644 --- a/examples-configs/rl/atari/atari_ppo.yaml +++ b/examples-configs/rl/atari/atari_ppo.yaml @@ -39,7 +39,7 @@ reinforcer: max_grad_norm: 0.5 # Gradient clipping parameter cliprange: - name: vel.schedule.linear + name: vel.function.linear initial_value: 0.1 final_value: 0.0 diff --git a/examples-configs/rl/atari/atari_ppo_gru.yaml b/examples-configs/rl/atari/atari_ppo_gru.yaml index 43244de9..27303c92 100644 --- a/examples-configs/rl/atari/atari_ppo_gru.yaml +++ b/examples-configs/rl/atari/atari_ppo_gru.yaml @@ -41,7 +41,7 @@ reinforcer: max_grad_norm: 0.5 # Gradient clipping parameter cliprange: - name: vel.schedule.linear + name: vel.function.linear initial_value: 0.1 final_value: 0.0 diff --git a/vel/model/autoencoder/mnist_cnn_vae.py b/vel/model/autoencoder/mnist_cnn_vae.py index 258294ff..5559bcfd 100644 --- a/vel/model/autoencoder/mnist_cnn_vae.py +++ b/vel/model/autoencoder/mnist_cnn_vae.py @@ -8,7 +8,7 @@ import vel.util.network as net_util from vel.api import GradientModel, ModelFactory -from vel.metric.averaging_metric import AveragingNamedMetric +from vel.metric import AveragingNamedMetric from vel.metric.loss_metric import Loss from vel.module.layers import Flatten, Reshape diff --git a/vel/rl/algo/distributional_dqn.py b/vel/rl/algo/distributional_dqn.py index 4b05ecf2..95ca440e 100644 --- a/vel/rl/algo/distributional_dqn.py +++ b/vel/rl/algo/distributional_dqn.py @@ -2,7 +2,7 @@ import torch.nn.utils from vel.api import ModelFactory -from vel.metric.averaging_metric import AveragingNamedMetric +from vel.metric import AveragingNamedMetric from vel.rl.api import OptimizerAlgoBase diff --git a/vel/rl/algo/dqn.py b/vel/rl/algo/dqn.py index 1437a062..bd3c355e 100644 --- a/vel/rl/algo/dqn.py +++ b/vel/rl/algo/dqn.py @@ -3,7 +3,7 @@ import torch.nn.utils from vel.api import ModelFactory -from vel.metric.averaging_metric import AveragingNamedMetric +from vel.metric import AveragingNamedMetric from vel.rl.api import OptimizerAlgoBase diff --git a/vel/rl/module/noise/eps_greedy.py b/vel/rl/module/noise/eps_greedy.py index 5764a489..0f3346eb 100644 --- a/vel/rl/module/noise/eps_greedy.py +++ b/vel/rl/module/noise/eps_greedy.py @@ -5,7 +5,7 @@ from vel.api import Schedule from vel.internal.generic_factory import GenericFactory -from vel.schedule.constant import ConstantSchedule +from vel.function.constant import ConstantSchedule class EpsGreedy(nn.Module): diff --git a/vel/rl/module/noise/ou_noise.py b/vel/rl/module/noise/ou_noise.py index a87f9786..d54a67e9 100644 --- a/vel/rl/module/noise/ou_noise.py +++ b/vel/rl/module/noise/ou_noise.py @@ -2,7 +2,7 @@ import numpy as np import torch.nn as nn -from vel.math.process import OrnsteinUhlenbeckNoiseProcess +from vel.calc.process import OrnsteinUhlenbeckNoiseProcess from vel.internal.generic_factory import GenericFactory diff --git a/vel/rl/test/test_integration.py b/vel/rl/test/test_integration.py index 912debf1..ed73a972 100644 --- a/vel/rl/test/test_integration.py +++ b/vel/rl/test/test_integration.py @@ -12,8 +12,8 @@ from vel.rl.metrics import EpisodeRewardMetric from vel.rl.module.noise.eps_greedy import EpsGreedy from vel.rl.module.noise.ou_noise import OuNoise -from vel.schedule.linear import LinearSchedule -from vel.schedule.linear_and_constant import LinearAndConstantSchedule +from vel.function.linear import LinearSchedule +from vel.function.linear_and_constant import LinearAndConstantSchedule from vel.util.random import set_seed from vel.rl.env.classic_atari import ClassicAtariEnv From e072337e8cd1fb8f01d6a313c36654540a94900a Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 12 Sep 2019 22:48:33 -0700 Subject: [PATCH 071/162] Added more publications on optimizers to bibliography. --- docs/Bibliography.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/Bibliography.md b/docs/Bibliography.md index 7e7303ea..808ff7e6 100644 --- a/docs/Bibliography.md +++ b/docs/Bibliography.md @@ -16,10 +16,26 @@ on this library: - (2012) **Lecture 6.5 -- RmsProp: Divide the gradient by a running average of its recent magnitude** Geoff Hinton http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf + +- (Dec 2012) **ADADELTA: An Adaptive Learning Rate Method** + Matthew D. Zeiler + https://arxiv.org/abs/1212.5701 + +- (Dec 2014) **Adam: A Method for Stochastic Optimization** + Diederik P. Kingma, Jimmy Ba + https://arxiv.org/abs/1412.6980 - (Jun 2015) **Cyclical Learning Rates for Training Neural Networks** Leslie N. Smith https://arxiv.org/abs/1506.01186 + +- (Jul 2019) **Lookahead Optimizer: k steps forward, 1 step back** + Michael R. Zhang, James Lucas, Geoffrey Hinton, Jimmy Ba + https://arxiv.org/abs/1907.08610 + +- (Aug 2019) **On the Variance of the Adaptive Learning Rate and Beyond** + Liu, Liyuan and Jiang, Haoming and He, Pengcheng and Chen, Weizhu and Liu, Xiaodong and Gao, Jianfeng and Han, Jiawei + https://arxiv.org/abs/1908.03265 ### Residual Networks From 54bd258c54cad2725f5d3d04240f8932d3788c28 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 12 Sep 2019 22:48:44 -0700 Subject: [PATCH 072/162] New optimizers: RAdam + Ranger --- vel/optimizer/radam.py | 133 ++++++++++++++++++++++++ vel/optimizer/ranger.py | 218 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 351 insertions(+) create mode 100644 vel/optimizer/radam.py create mode 100644 vel/optimizer/ranger.py diff --git a/vel/optimizer/radam.py b/vel/optimizer/radam.py new file mode 100644 index 00000000..607b11e9 --- /dev/null +++ b/vel/optimizer/radam.py @@ -0,0 +1,133 @@ +""" +RAdam implementation from: https://github.com/LiyuanLucasLiu/RAdam/blob/master/cifar_imagenet/utils/radam.py +""" +import math +import collections +import torch.optim + +import vel.util.module_util as mu + +from vel.api import OptimizerFactory, Model + + +class RAdam(torch.optim.Optimizer): + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0): + defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) + self.buffer = [[None, None, None] for ind in range(10)] + super(RAdam, self).__init__(params, defaults) + + def __setstate__(self, state): + super(RAdam, self).__setstate__(state) + + def step(self, closure=None): + + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data.float() + if grad.is_sparse: + raise RuntimeError('RAdam does not support sparse gradients') + + p_data_fp32 = p.data.float() + + state = self.state[p] + + if len(state) == 0: + state['step'] = 0 + state['exp_avg'] = torch.zeros_like(p_data_fp32) + state['exp_avg_sq'] = torch.zeros_like(p_data_fp32) + else: + state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32) + state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32) + + exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] + beta1, beta2 = group['betas'] + + exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) + exp_avg.mul_(beta1).add_(1 - beta1, grad) + + state['step'] += 1 + buffered = self.buffer[int(state['step'] % 10)] + if state['step'] == buffered[0]: + N_sma, step_size = buffered[1], buffered[2] + else: + buffered[0] = state['step'] + beta2_t = beta2 ** state['step'] + N_sma_max = 2 / (1 - beta2) - 1 + N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t) + buffered[1] = N_sma + + # more conservative since it's an approximated value + if N_sma >= 5: + step_size = group['lr'] * math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step']) + else: + step_size = group['lr'] / (1 - beta1 ** state['step']) + buffered[2] = step_size + + if group['weight_decay'] != 0: + p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32) + + # more conservative since it's an approximated value + if N_sma >= 5: + denom = exp_avg_sq.sqrt().add_(group['eps']) + p_data_fp32.addcdiv_(-step_size, exp_avg, denom) + else: + p_data_fp32.add_(-step_size, exp_avg) + + p.data.copy_(p_data_fp32) + + return loss + + +class RAdamFactory(OptimizerFactory): + """ RAdam optimizer factory """ + + def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, layer_groups=False): + self.lr = lr + self.betas = betas + self.eps = eps + self.weight_decay = weight_decay + self.layer_groups = layer_groups + + def instantiate(self, model: Model) -> RAdam: + if self.layer_groups: + parameters = mu.to_parameter_groups(model.get_layer_groups()) + + if isinstance(self.lr, collections.Sequence): + for idx, lr in enumerate(self.lr): + parameters[idx]['lr'] = lr + + default_lr = self.lr[0] + else: + default_lr = float(self.lr) + + if isinstance(self.weight_decay, collections.Sequence): + for idx, weight_decay in enumerate(self.weight_decay): + parameters[idx]['weight_decay'] = weight_decay + + default_weight_decay = self.weight_decay[0] + else: + default_weight_decay = self.weight_decay + + return RAdam( + parameters, + lr=default_lr, betas=self.betas, eps=self.eps, weight_decay=default_weight_decay, + ) + else: + parameters = filter(lambda p: p.requires_grad, model.parameters()) + + return RAdam( + parameters, + lr=self.lr, betas=self.betas, eps=self.eps, weight_decay=self.weight_decay, + ) + + +def create(lr, betas=(0.9, 0.999), weight_decay=0, epsilon=1e-8, layer_groups=False): + """ Vel factory function """ + return RAdamFactory(lr=lr, betas=betas, weight_decay=weight_decay, eps=epsilon, layer_groups=layer_groups) diff --git a/vel/optimizer/ranger.py b/vel/optimizer/ranger.py new file mode 100644 index 00000000..3ab7f896 --- /dev/null +++ b/vel/optimizer/ranger.py @@ -0,0 +1,218 @@ +#Ranger deep learning optimizer - RAdam + Lookahead combined. +#https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer + +#Ranger has now been used to capture 12 records on the FastAI leaderboard. + +#This version = 9.3.19 + +#Credits: +#RAdam --> https://github.com/LiyuanLucasLiu/RAdam +#Lookahead --> rewritten by lessw2020, but big thanks to Github @LonePatient and @RWightman for ideas from their code. +#Lookahead paper --> MZhang,G Hinton https://arxiv.org/abs/1907.08610 + +#summary of changes: +#full code integration with all updates at param level instead of group, moves slow weights into state dict (from generic weights), +#supports group learning rates (thanks @SHolderbach), fixes sporadic load from saved model issues. +#changes 8/31/19 - fix references to *self*.N_sma_threshold; + #changed eps to 1e-5 as better default than 1e-8. + +import math +import torch +import collections + +from torch.optim.optimizer import Optimizer + + +import vel.util.module_util as mu + +from vel.api import OptimizerFactory, Model + + +class Ranger(Optimizer): + + def __init__(self, params, lr=1e-3, alpha=0.5, k=6, N_sma_threshhold=5, betas=(.95,0.999), eps=1e-5, weight_decay=0): + #parameter checks + if not 0.0 <= alpha <= 1.0: + raise ValueError(f'Invalid slow update rate: {alpha}') + if not 1 <= k: + raise ValueError(f'Invalid lookahead steps: {k}') + if not lr > 0: + raise ValueError(f'Invalid Learning Rate: {lr}') + if not eps > 0: + raise ValueError(f'Invalid eps: {eps}') + + #parameter comments: + # beta1 (momentum) of .95 seems to work better than .90... + #N_sma_threshold of 5 seems better in testing than 4. + #In both cases, worth testing on your dataset (.90 vs .95, 4 vs 5) to make sure which works best for you. + + #prep defaults and init torch.optim base + defaults = dict(lr=lr, alpha=alpha, k=k, step_counter=0, betas=betas, N_sma_threshhold=N_sma_threshhold, eps=eps, weight_decay=weight_decay) + super().__init__(params,defaults) + + #adjustable threshold + self.N_sma_threshhold = N_sma_threshhold + + #now we can get to work... + #removed as we now use step from RAdam...no need for duplicate step counting + #for group in self.param_groups: + # group["step_counter"] = 0 + #print("group step counter init") + + #look ahead params + self.alpha = alpha + self.k = k + + #radam buffer for state + self.radam_buffer = [[None,None,None] for ind in range(10)] + + #self.first_run_check=0 + + #lookahead weights + #9/2/19 - lookahead param tensors have been moved to state storage. + #This should resolve issues with load/save where weights were left in GPU memory from first load, slowing down future runs. + + #self.slow_weights = [[p.clone().detach() for p in group['params']] + # for group in self.param_groups] + + #don't use grad for lookahead weights + #for w in it.chain(*self.slow_weights): + # w.requires_grad = False + + def __setstate__(self, state): + print("set state called") + super(Ranger, self).__setstate__(state) + + + def step(self, closure=None): + loss = None + #note - below is commented out b/c I have other work that passes back the loss as a float, and thus not a callable closure. + #Uncomment if you need to use the actual closure... + + #if closure is not None: + #loss = closure() + + #Evaluate averages and grad, update param tensors + for group in self.param_groups: + + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data.float() + if grad.is_sparse: + raise RuntimeError('Ranger optimizer does not support sparse gradients') + + p_data_fp32 = p.data.float() + + state = self.state[p] #get state dict for this param + + if len(state) == 0: #if first time to run...init dictionary with our desired entries + #if self.first_run_check==0: + #self.first_run_check=1 + #print("Initializing slow buffer...should not see this at load from saved model!") + state['step'] = 0 + state['exp_avg'] = torch.zeros_like(p_data_fp32) + state['exp_avg_sq'] = torch.zeros_like(p_data_fp32) + + #look ahead weight storage now in state dict + state['slow_buffer'] = torch.empty_like(p.data) + state['slow_buffer'].copy_(p.data) + + else: + state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32) + state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32) + + #begin computations + exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] + beta1, beta2 = group['betas'] + + #compute variance mov avg + exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) + #compute mean moving avg + exp_avg.mul_(beta1).add_(1 - beta1, grad) + + state['step'] += 1 + + + buffered = self.radam_buffer[int(state['step'] % 10)] + if state['step'] == buffered[0]: + N_sma, step_size = buffered[1], buffered[2] + else: + buffered[0] = state['step'] + beta2_t = beta2 ** state['step'] + N_sma_max = 2 / (1 - beta2) - 1 + N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t) + buffered[1] = N_sma + if N_sma > self.N_sma_threshhold: + step_size = math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step']) + else: + step_size = 1.0 / (1 - beta1 ** state['step']) + buffered[2] = step_size + + if group['weight_decay'] != 0: + p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32) + + if N_sma > self.N_sma_threshhold: + denom = exp_avg_sq.sqrt().add_(group['eps']) + p_data_fp32.addcdiv_(-step_size * group['lr'], exp_avg, denom) + else: + p_data_fp32.add_(-step_size * group['lr'], exp_avg) + + p.data.copy_(p_data_fp32) + + #integrated look ahead... + #we do it at the param level instead of group level + if state['step'] % group['k'] == 0: + slow_p = state['slow_buffer'] #get access to slow param tensor + slow_p.add_(self.alpha, p.data - slow_p) #(fast weights - slow weights) * alpha + p.data.copy_(slow_p) #copy interpolated weights to RAdam param tensor + + return loss + + +class RangerFactory(OptimizerFactory): + """ RAdam optimizer factory """ + + def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, layer_groups=False): + self.lr = lr + self.betas = betas + self.eps = eps + self.weight_decay = weight_decay + self.layer_groups = layer_groups + + def instantiate(self, model: Model) -> Ranger: + if self.layer_groups: + parameters = mu.to_parameter_groups(model.get_layer_groups()) + + if isinstance(self.lr, collections.Sequence): + for idx, lr in enumerate(self.lr): + parameters[idx]['lr'] = lr + + default_lr = self.lr[0] + else: + default_lr = float(self.lr) + + if isinstance(self.weight_decay, collections.Sequence): + for idx, weight_decay in enumerate(self.weight_decay): + parameters[idx]['weight_decay'] = weight_decay + + default_weight_decay = self.weight_decay[0] + else: + default_weight_decay = self.weight_decay + + return Ranger( + parameters, + lr=default_lr, betas=self.betas, eps=self.eps, weight_decay=default_weight_decay, + ) + else: + parameters = filter(lambda p: p.requires_grad, model.parameters()) + + return Ranger( + parameters, + lr=self.lr, betas=self.betas, eps=self.eps, weight_decay=self.weight_decay, + ) + + +def create(lr, betas=(0.9, 0.999), weight_decay=0, epsilon=1e-8, layer_groups=False): + """ Vel factory function """ + return RangerFactory(lr=lr, betas=betas, weight_decay=weight_decay, eps=epsilon, layer_groups=layer_groups) From a075293ce99dca656b3088e69250694c8b979a99 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 12 Sep 2019 22:49:16 -0700 Subject: [PATCH 073/162] API to transform only a single coordinate. --- vel/api/transformation.py | 11 +++++++++++ vel/data/dataflow.py | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/vel/api/transformation.py b/vel/api/transformation.py index 2bf960bf..42f1989c 100644 --- a/vel/api/transformation.py +++ b/vel/api/transformation.py @@ -18,6 +18,10 @@ def denormalize(self, datapoint): """ Operation reverse to normalization """ return datapoint + def denormalize_item(self, datapoint_item, coordinate): + """ Denormalize only a single item of the datapoint """ + return datapoint_item + class ScopedTransformation(Transformation): """ Transformation applied only to certain keys of the datapoint """ @@ -52,3 +56,10 @@ def denormalize(self, datapoint): datapoint[name] = self.denormalization_transform(datapoint[name]) return datapoint + + def denormalize_item(self, datapoint_item, coordinate): + """ Denormalize only a single item of the datapoint """ + if coordinate in self.scope: + return self.denormalization_transform(datapoint_item) + else: + return datapoint_item diff --git a/vel/data/dataflow.py b/vel/data/dataflow.py index b0731729..8d52ab0e 100644 --- a/vel/data/dataflow.py +++ b/vel/data/dataflow.py @@ -60,6 +60,13 @@ def denormalize(self, datapoint): return datapoint + def denormalize_item(self, datapoint_item, coordinate): + """ Perform a reverse normalization of a single item (for viewing) """ + for t in self.transformations[::-1]: + datapoint_item = t.denormalize_item(datapoint_item, coordinate) + + return datapoint_item + def __len__(self): """ Length of the dataset """ return len(self.dataset) From abf7cc72e7b10f3bb6e2e5d92bed3ef00b1d6e7e Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 12 Sep 2019 22:50:14 -0700 Subject: [PATCH 074/162] Iterating on the MNIST VAE. --- .../autoencoder/mnist/mnist_cnn_vae.yaml | 14 +- .../autoencoder/mnist/mnist_fc_vae.yaml | 35 +++++ vel/model/autoencoder/mnist_cnn_vae.py | 2 - vel/model/autoencoder/mnist_fc_vae.py | 143 ++++++++++++++++++ 4 files changed, 185 insertions(+), 9 deletions(-) create mode 100644 examples-configs/autoencoder/mnist/mnist_fc_vae.yaml create mode 100644 vel/model/autoencoder/mnist_fc_vae.py diff --git a/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml b/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml index 043edd22..6dc6525f 100644 --- a/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml +++ b/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml @@ -1,4 +1,4 @@ -name: 'mnist_cnn_autoenoder' +name: 'mnist_cnn_vae' model: @@ -6,8 +6,8 @@ model: img_rows: 28 img_cols: 28 img_channels: 1 - channels: [8, 16, 16] - representation_length: 16 + channels: [64, 128, 256] + representation_length: 20 source: @@ -15,7 +15,7 @@ source: loader: name: vel.data.dataset_loader - batch_size: 128 + batch_size: 256 num_workers: 4 transformations: @@ -24,12 +24,12 @@ loader: optimizer: - name: vel.optimizer.adam - lr: 1.0e-3 + name: vel.optimizer.radam + lr: 1.0e-4 commands: train: name: vel.command.train_command - epochs: 12 + epochs: 200 diff --git a/examples-configs/autoencoder/mnist/mnist_fc_vae.yaml b/examples-configs/autoencoder/mnist/mnist_fc_vae.yaml new file mode 100644 index 00000000..a6422905 --- /dev/null +++ b/examples-configs/autoencoder/mnist/mnist_fc_vae.yaml @@ -0,0 +1,35 @@ +name: 'mnist_fc_vae' + + +model: + name: vel.model.autoencoder.mnist_fc_vae + img_rows: 28 + img_cols: 28 + img_channels: 1 + layers: [512, 256] + representation_length: 16 + + +source: + name: vel.data.source.vision.mnist + +loader: + name: vel.data.dataset_loader + batch_size: 256 + num_workers: 4 + + transformations: + - name: vel.data.transformation.image_to_tensor + - name: vel.data.transformation.unsupervised + + +optimizer: + name: vel.optimizer.radam + lr: 1.0e-3 + + +commands: + train: + name: vel.command.train_command + epochs: 100 + diff --git a/vel/model/autoencoder/mnist_cnn_vae.py b/vel/model/autoencoder/mnist_cnn_vae.py index 5559bcfd..b9222c76 100644 --- a/vel/model/autoencoder/mnist_cnn_vae.py +++ b/vel/model/autoencoder/mnist_cnn_vae.py @@ -21,8 +21,6 @@ class MnistCnnVAE(GradientModel): def __init__(self, img_rows, img_cols, img_channels, channels=None, representation_length=32): super(MnistCnnVAE, self).__init__() - assert representation_length % 2 == 0, "Representation length must be even" - if channels is None: channels = [16, 32, 32] diff --git a/vel/model/autoencoder/mnist_fc_vae.py b/vel/model/autoencoder/mnist_fc_vae.py new file mode 100644 index 00000000..fc1c327c --- /dev/null +++ b/vel/model/autoencoder/mnist_fc_vae.py @@ -0,0 +1,143 @@ +import itertools as it + +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F + +from vel.api import GradientModel, ModelFactory +from vel.metric import AveragingNamedMetric +from vel.metric.loss_metric import Loss +from vel.module.layers import Flatten, Reshape + + +class MnistCnnVAE(GradientModel): + """ + A simple MNIST variational autoencoder, containing 3 convolutional layers. + """ + + def __init__(self, img_rows, img_cols, img_channels, layers=None, representation_length=32): + super(MnistCnnVAE, self).__init__() + + if layers is None: + layers = [512, 256] + + self.representation_length = representation_length + + # self.final_width = net_util.convolutional_layer_series(img_rows, layer_series) + # self.final_height = net_util.convolutional_layer_series(img_cols, layer_series) + self.layers = layers + + input_length = img_rows * img_cols * img_channels + + self.encoder = nn.Sequential( + Flatten(), + nn.Linear(in_features=input_length, out_features=self.layers[0]), + nn.ReLU(True), + nn.Linear(in_features=self.layers[0], out_features=self.layers[1]), + nn.ReLU(True), + nn.Linear(self.layers[1], representation_length * 2) + ) + + self.decoder = nn.Sequential( + nn.Linear(representation_length, self.layers[1]), + nn.ReLU(True), + nn.Linear(self.layers[1], self.layers[0]), + nn.ReLU(True), + nn.Linear(self.layers[0], input_length), + Reshape(img_channels, img_rows, img_cols), + nn.Sigmoid() + ) + + @staticmethod + def _weight_initializer(tensor): + init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('relu')) + init.constant_(tensor.bias, 0.0) + + def reset_weights(self): + for m in it.chain(self.encoder, self.decoder): + if isinstance(m, nn.Conv2d): + self._weight_initializer(m) + elif isinstance(m, nn.ConvTranspose2d): + self._weight_initializer(m) + elif isinstance(m, nn.Linear): + self._weight_initializer(m) + + def encode(self, sample): + encoding = self.encoder(sample) + + mu = encoding[:, :self.representation_length] + # I encode std directly as a softplus, rather than exp(logstd) + std = F.softplus(encoding[:, self.representation_length:]) + + return mu + torch.randn_like(std) * std + + def decode(self, sample): + return self.decoder(sample) + + def forward(self, sample): + encoding = self.encoder(sample) + + mu = encoding[:, :self.representation_length] + # I encode std directly as a softplus, rather than exp(logstd) + std = F.softplus(encoding[:, self.representation_length:]) + + z = mu + torch.randn_like(std) * std + + decoded = self.decoder(z) + + return { + 'decoded': decoded, + 'encoding': z, + 'mu': mu, + 'std': std + } + + def calculate_gradient(self, data): + """ Calculate a gradient of loss function """ + output = self(data['x']) + + y_pred = output['decoded'] + + mu = output['mu'] + std = output['std'] + var = std ** 2 + + kl_divergence = - 0.5 * (1 + torch.log(var) - mu ** 2 - var).sum(dim=1) + kl_divergence = kl_divergence.mean() + + # reconstruction = 0.5 * F.mse_loss(y_pred, y_true) + + # We must sum over all image axis and average only on minibatch axis + reconstruction = F.binary_cross_entropy(y_pred, data['y'], reduce=False).sum(1).sum(1).sum(1).mean() + loss = reconstruction + kl_divergence + + if self.training: + loss.backward() + + return { + 'loss': loss.item(), + 'reconstruction': reconstruction.item(), + 'kl_divergence': kl_divergence.item() + } + + def metrics(self): + """ Set of metrics for this model """ + return [ + Loss(), + AveragingNamedMetric('reconstruction', scope="train"), + AveragingNamedMetric('kl_divergence', scope="train") + ] + + +def create(img_rows, img_cols, img_channels, layers=None, representation_length=32): + """ Vel factory function """ + if layers is None: + layers = [512, 256] + + def instantiate(**_): + return MnistCnnVAE( + img_rows, img_cols, img_channels, layers=layers, representation_length=representation_length + ) + + return ModelFactory.generic(instantiate) From 6b46de800d1943dc42fce55564c6e16427cacf1e Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 12 Sep 2019 22:51:23 -0700 Subject: [PATCH 075/162] Cleaned up README a bit. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5b39c7a7..f115666e 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ that are ready to run and easy to modify for other similar usecases: # Implemented models - Unsupervised learning -- Autoencoders and Variational autoencoders with examples on MNIST dataset. +- AutoEncoders (AE) and Variational AutoEncoders (VAE) with examples on MNIST dataset. # Examples From 4fa9ca453bb402c6a69449f2bc0a380bd1e67e4d Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 12 Sep 2019 23:01:20 -0700 Subject: [PATCH 076/162] Fixed a warning in VAE code. --- vel/model/autoencoder/mnist_cnn_vae.py | 2 +- vel/model/autoencoder/mnist_fc_vae.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vel/model/autoencoder/mnist_cnn_vae.py b/vel/model/autoencoder/mnist_cnn_vae.py index b9222c76..a9da7e37 100644 --- a/vel/model/autoencoder/mnist_cnn_vae.py +++ b/vel/model/autoencoder/mnist_cnn_vae.py @@ -122,7 +122,7 @@ def calculate_gradient(self, data): # reconstruction = 0.5 * F.mse_loss(y_pred, y_true) # We must sum over all image axis and average only on minibatch axis - reconstruction = F.binary_cross_entropy(y_pred, data['y'], reduce=False).sum(1).sum(1).sum(1).mean() + reconstruction = F.binary_cross_entropy(y_pred, data['y'], reduction='none').sum(1).sum(1).sum(1).mean() loss = reconstruction + kl_divergence if self.training: diff --git a/vel/model/autoencoder/mnist_fc_vae.py b/vel/model/autoencoder/mnist_fc_vae.py index fc1c327c..e79f114b 100644 --- a/vel/model/autoencoder/mnist_fc_vae.py +++ b/vel/model/autoencoder/mnist_fc_vae.py @@ -109,7 +109,7 @@ def calculate_gradient(self, data): # reconstruction = 0.5 * F.mse_loss(y_pred, y_true) # We must sum over all image axis and average only on minibatch axis - reconstruction = F.binary_cross_entropy(y_pred, data['y'], reduce=False).sum(1).sum(1).sum(1).mean() + reconstruction = F.binary_cross_entropy(y_pred, data['y'], reduction='none').sum(1).sum(1).sum(1).mean() loss = reconstruction + kl_divergence if self.training: From b579864e8cd9ded14f94e0ca76abb2de6e909d9b Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 12 Sep 2019 23:01:30 -0700 Subject: [PATCH 077/162] Implement "convert warnings to errors" option. --- vel/launcher.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vel/launcher.py b/vel/launcher.py index 3f800638..7dfa94eb 100644 --- a/vel/launcher.py +++ b/vel/launcher.py @@ -17,6 +17,7 @@ def main(): parser.add_argument('-r', '--run_number', type=int, default=0, help="A run number") parser.add_argument('-d', '--device', default='cuda', help="A device to run the model on") parser.add_argument('-s', '--seed', type=int, default=None, help="Random seed for the project") + parser.add_argument('--werr', action='store_true', default=False, help="Convert warnings to errors") parser.add_argument( '-p', '--param', type=str, metavar='NAME=VALUE', action='append', default=[], help="Configuration parameters" @@ -30,6 +31,11 @@ def main(): args = parser.parse_args() + if args.werr: + import warnings + warnings.filterwarnings('error', module='vel.*') + warnings.filterwarnings('error', module='torch\\..*') + model_config = ModelConfig.from_file( args.config, args.run_number, continue_training=getattr(args, 'continue'), device=args.device, seed=args.seed, params={k: v for (k, v) in (Parser.parse_equality(eq) for eq in args.param)} From feb2efba557c4cc1e201f7408ff1b82126ba1ce6 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 14 Sep 2019 20:33:24 -0700 Subject: [PATCH 078/162] Expanding bibliography. --- docs/Bibliography.md | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/docs/Bibliography.md b/docs/Bibliography.md index 808ff7e6..8ed79213 100644 --- a/docs/Bibliography.md +++ b/docs/Bibliography.md @@ -4,13 +4,18 @@ Below, I present a most likely incomplete list of works I referred to when I was on this library: -### Autoencoders +### Latent variable models - (Dec 2013) **Auto-Encoding Variational Bayes** Diederik P Kingma, Max Welling https://arxiv.org/abs/1312.6114 +- (Sep 2015) **Importance Weighted Autoencoders** + Yuri Burda, Roger Grosse, Ruslan Salakhutdinov + https://arxiv.org/abs/1509.00519 + + ### Learning rate and optimization - (2012) **Lecture 6.5 -- RmsProp: Divide the gradient by a running average of its recent magnitude** @@ -143,11 +148,13 @@ on this library: ### Open source repositories This repository contains various parts of functionality derived from open source code -in the following repositories: +in the following repositories (in alphabetical order): +- https://github.com/Kaixhin/Rainbow +- https://github.com/Khrylx/PyTorch-RL +- https://github.com/LiyuanLucasLiu/RAdam - https://github.com/fastai/fastai -- https://github.com/pytorch/pytorch +- https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer - https://github.com/openai/baselines -- https://github.com/tensorflow/tensorflow -- https://github.com/Kaixhin/Rainbow -- https://github.com/Khrylx/PyTorch-RL \ No newline at end of file +- https://github.com/pytorch/pytorch +- https://github.com/tensorflow/tensorflow \ No newline at end of file From cc53ae525e5ed0ee75ec83ef12bda70e0666a379 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 14 Sep 2019 22:17:35 -0700 Subject: [PATCH 079/162] Added a few extra interactive options to dataflow. --- vel/data/dataflow.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/vel/data/dataflow.py b/vel/data/dataflow.py index 8d52ab0e..de7fb4ab 100644 --- a/vel/data/dataflow.py +++ b/vel/data/dataflow.py @@ -1,5 +1,7 @@ -import typing +import collections +import torch import torch.utils.data as data +import typing from vel.api import Source, Transformation @@ -44,6 +46,32 @@ def get_raw(self, index): """ Get raw data point """ return pre_map(self.dataset[index]) + def get_batch(self, batch_idx, batch_size): + """ + Simple method to get a batch of data, mainly for interactive purposes. + For training, a DataLoader should be used. + """ + + start_idx = batch_idx * batch_size + end_idx = min(start_idx + batch_size, len(self)) + + buffer = collections.defaultdict(list) + + for i in range(start_idx, end_idx): + datapoint = self[i] + + for k, v in datapoint.items(): + buffer[k].append(v) + + return { + k: torch.stack(v, dim=0) for k, v in buffer.items() + } + + def num_batches(self, batch_size): + """ Number of batches of given batch size """ + length = len(self) + return (length + (batch_size - 1)) // batch_size + def __getitem__(self, index): """ Get data point from the dataset """ datapoint = self.get_raw(index) From 67d9d4b1569283dc4b19447509dbca1f0ffb5964 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 14 Sep 2019 22:18:03 -0700 Subject: [PATCH 080/162] Proper NLL estimation using importance sampling. --- vel/model/autoencoder/mnist_cnn_vae.py | 82 +++++++++++++++++++++----- 1 file changed, 68 insertions(+), 14 deletions(-) diff --git a/vel/model/autoencoder/mnist_cnn_vae.py b/vel/model/autoencoder/mnist_cnn_vae.py index a9da7e37..9cfa95a8 100644 --- a/vel/model/autoencoder/mnist_cnn_vae.py +++ b/vel/model/autoencoder/mnist_cnn_vae.py @@ -76,27 +76,28 @@ def reset_weights(self): elif isinstance(m, nn.Linear): self._weight_initializer(m) - def encode(self, sample): + def encoder_distribution(self, sample): encoding = self.encoder(sample) - mu = encoding[:, :self.representation_length] # I encode std directly as a softplus, rather than exp(logstd) std = F.softplus(encoding[:, self.representation_length:]) + return mu, std + + def encode(self, sample): + mu, std = self.encoder_distribution(sample) + # Sample z return mu + torch.randn_like(std) * std def decode(self, sample): + # We don't sample here, because decoder is so weak it doesn't make sense return self.decoder(sample) def forward(self, sample): - encoding = self.encoder(sample) - - mu = encoding[:, :self.representation_length] - # I encode std directly as a softplus, rather than exp(logstd) - std = F.softplus(encoding[:, self.representation_length:]) + mu, std = self.encoder_distribution(sample) + # Sample z z = mu + torch.randn_like(std) * std - decoded = self.decoder(z) return { @@ -110,30 +111,83 @@ def calculate_gradient(self, data): """ Calculate a gradient of loss function """ output = self(data['x']) + # ELBO is E_q log p(x, z) / q(z | x) + # Which can be expressed in many equivalent forms: + # (1) E_q log p(x | z) + log p(z) - log q(z | x) + # (2) E_q log p(x | z) - D_KL(p(z) || q(z | x)) + # (3) E_q log p(x) - D_KL(p(z | x) || q(z | x)Biblio) + + # Form 3 is interesting from a theoretical standpoint, but is intractable to compute directly + # While forms (1) and (2) can be computed directly. + # Positive aspect of form (2) is that KL divergence can be calculated analytically + # further reducing the variance of the gradient + y_pred = output['decoded'] mu = output['mu'] std = output['std'] var = std ** 2 + # Analytical solution of KL divergence kl_divergence = - 0.5 * (1 + torch.log(var) - mu ** 2 - var).sum(dim=1) kl_divergence = kl_divergence.mean() - # reconstruction = 0.5 * F.mse_loss(y_pred, y_true) + # Diag-gaussian likelihood + # likelihood = 0.5 * F.mse_loss(y_pred, y_true) # We must sum over all image axis and average only on minibatch axis - reconstruction = F.binary_cross_entropy(y_pred, data['y'], reduction='none').sum(1).sum(1).sum(1).mean() - loss = reconstruction + kl_divergence + # Log prob p(x | z) in the case where the output distribution is Bernoulli(p) + likelihood = F.binary_cross_entropy(y_pred, data['y'], reduction='none').sum((1, 2, 3)).mean() + + elbo = likelihood + kl_divergence + + nll = self.nll(data['x'], num_posterior_samples=5) if self.training: - loss.backward() + elbo.backward() return { - 'loss': loss.item(), - 'reconstruction': reconstruction.item(), + 'loss': elbo.item(), + 'nll': nll.mean().item(), + 'reconstruction': likelihood.item(), 'kl_divergence': kl_divergence.item() } + def logmeanexp(self, inputs, dim=1): + if inputs.size(dim) == 1: + return inputs + else: + input_max = inputs.max(dim, keepdim=True)[0] + return (inputs - input_max).exp().mean(dim).log() + input_max.squeeze(dim=dim) + + @torch.no_grad() + def nll(self, data_sample, num_posterior_samples: int = 1): + """ + Upper bound on negative log-likelihood of supplied data. + If num samples goes to infinity, the nll of data should + approach true value + """ + assert num_posterior_samples >= 1, "Need at least one posterior sample" + + buffer = [] + + mu, std = self.encoder_distribution(data_sample) + var = std ** 2 + + kl_divergence = - 0.5 * (1 + torch.log(var) - mu ** 2 - var).sum(dim=1) + + for i in range(num_posterior_samples): + z = mu + torch.randn_like(std) * std + y_pred = self.decoder(z) + + likelihood = F.binary_cross_entropy(y_pred, data_sample, reduction='none').sum((1, 2, 3)) + elbo = likelihood + kl_divergence + + buffer.append(-elbo) + + averaged = self.logmeanexp(torch.stack(buffer, dim=-1), dim=-1) + return -averaged + def metrics(self): """ Set of metrics for this model """ return [ From b7f66673ce0d164d75ad8f072495654957a7fe56 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 19 Sep 2019 15:17:54 -0700 Subject: [PATCH 081/162] Minor dependency update. --- requirements.txt | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index fc2a4418..9629fe23 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,14 +16,16 @@ dnspython==1.16.0 future==0.17.1 # via pyglet gym[atari,box2d,classic_control]==0.14.0 idna==2.8 # via requests -importlib-metadata==0.22 # via pluggy, pytest +importlib-metadata==0.23 # via pluggy, pytest joblib==0.13.2 # via scikit-learn +jsonpatch==1.24 # via visdom +jsonpointer==2.0 # via jsonpatch kiwisolver==1.1.0 # via matplotlib matplotlib==3.1.1 more-itertools==7.2.0 # via pytest, zipp numpy==1.17.2 opencv-python==4.1.1.26 -packaging==19.1 # via pytest +packaging==19.2 # via pytest pandas==0.25.1 pillow==6.1.0 # via gym, torchvision, visdom pluggy==0.13.0 # via pytest @@ -45,9 +47,9 @@ torchfile==0.1.0 # via visdom torchtext==0.4.0 torchvision==0.4.0 tornado==6.0.3 # via visdom -tqdm==4.35.0 -urllib3==1.25.3 # via requests -visdom==0.1.8.8 +tqdm==4.36.1 +urllib3==1.25.4 # via requests +visdom==0.1.8.9 wcwidth==0.1.7 # via pytest websocket-client==0.56.0 # via visdom zipp==0.6.0 # via importlib-metadata From 53628e470bcd7f5730b1be8f50dea2d6fb4b27ba Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 19 Sep 2019 20:49:21 -0700 Subject: [PATCH 082/162] Implemented fully connected MNIST VAE with results matching the IWAE paper. --- .../autoencoder/mnist/mnist_fc_vae.yaml | 59 ++++++- vel/data/augmentation/scale_min_size.py | 2 +- vel/data/dataset_loader.py | 10 +- vel/data/transformation/binarize_image.py | 16 ++ vel/data/transformation/pil_resize.py | 18 ++ vel/model/autoencoder/fc_vae.py | 106 +++++++++++ vel/model/autoencoder/mnist_fc_vae.py | 143 --------------- vel/model/autoencoder/vae_base.py | 164 ++++++++++++++++++ vel/scheduler/multi_step.py | 52 +++--- vel/storage/backend/mongodb.py | 2 +- 10 files changed, 397 insertions(+), 175 deletions(-) create mode 100644 vel/data/transformation/binarize_image.py create mode 100644 vel/data/transformation/pil_resize.py create mode 100644 vel/model/autoencoder/fc_vae.py delete mode 100644 vel/model/autoencoder/mnist_fc_vae.py create mode 100644 vel/model/autoencoder/vae_base.py diff --git a/examples-configs/autoencoder/mnist/mnist_fc_vae.yaml b/examples-configs/autoencoder/mnist/mnist_fc_vae.yaml index a6422905..647094f4 100644 --- a/examples-configs/autoencoder/mnist/mnist_fc_vae.yaml +++ b/examples-configs/autoencoder/mnist/mnist_fc_vae.yaml @@ -2,12 +2,14 @@ name: 'mnist_fc_vae' model: - name: vel.model.autoencoder.mnist_fc_vae + name: vel.model.autoencoder.fc_vae img_rows: 28 img_cols: 28 img_channels: 1 - layers: [512, 256] - representation_length: 16 + layers: [200, 200] + representation_length: 50 + max_grad_norm: 1.0 + analytical_kl_div: true source: @@ -15,10 +17,13 @@ source: loader: name: vel.data.dataset_loader - batch_size: 256 + batch_size: 128 num_workers: 4 + pin_memory: true transformations: + - name: vel.data.transformation.to_array + - name: vel.data.transformation.binarize_image - name: vel.data.transformation.image_to_tensor - name: vel.data.transformation.unsupervised @@ -26,10 +31,54 @@ loader: optimizer: name: vel.optimizer.radam lr: 1.0e-3 + eps: 1.0e-4 + + +scheduler: + name: vel.scheduler.multi_step + gamma: 0.71968 # 10 * (-1/7) + milestones: [ 1, 4, 13, 40, 121, 364, 1093, 3280] commands: train: name: vel.command.train_command - epochs: 100 + epochs: 3280 +# train: +# name: vel.command.phase_train_command +# phases: +# - name: vel.train.phase.cycle +# max_lr: 0.001 +# min_lr: 0.000 +# interpolate: 'cosine' +# cycles: 20 +# cycle_len: 1 +# - name: vel.train.phase.cycle +# max_lr: 0.001 +# min_lr: 0.0000 +# interpolate: 'cosine' +# cycles: 7 +# cycle_len: 1 +# cycle_mult: 2 +# - name: vel.train.phase.cycle +# max_lr: 0.0001 +# min_lr: 0.0000 +# interpolate: 'cosine' +# cycles: 8 +# cycle_len: 1 +# cycle_mult: 2 +# - name: vel.train.phase.cycle +# max_lr: 0.00003 +# min_lr: 0.0000 +# interpolate: 'cosine' +# cycles: 9 +# cycle_len: 1 +# cycle_mult: 2 +# - name: vel.train.phase.cycle +# max_lr: 0.00001 +# min_lr: 0.0000 +# interpolate: 'cosine' +# cycles: 10 +# cycle_len: 1 +# cycle_mult: 2 diff --git a/vel/data/augmentation/scale_min_size.py b/vel/data/augmentation/scale_min_size.py index d6ecd06b..88554a09 100644 --- a/vel/data/augmentation/scale_min_size.py +++ b/vel/data/augmentation/scale_min_size.py @@ -15,7 +15,7 @@ def __init__(self, size, scope='x', tags=None): self.size = size def transform(self, x_data): - return op.scale_min(x_data, self.size, Image.BILINEAR) + return op.scale_min(x_data, self.size) def create(size, scope='x', tags=None): diff --git a/vel/data/dataset_loader.py b/vel/data/dataset_loader.py index 8228cf52..59be7841 100644 --- a/vel/data/dataset_loader.py +++ b/vel/data/dataset_loader.py @@ -10,11 +10,12 @@ class DatasetLoader: """ Loads data from a data source to serve it to the model """ def __init__(self, source: Source, batch_size: int, num_workers: int, - transformations: typing.Optional[list] = None): + transformations: typing.Optional[list] = None, pin_memory=False): self.source = source self.batch_size = batch_size self.num_workers = num_workers self.transformations = transformations + self.pin_memory = pin_memory if transformations is not None: self.transformed_source = DataFlow.transform(self.source, transformations) @@ -23,11 +24,12 @@ def __init__(self, source: Source, batch_size: int, num_workers: int, self.train_loader = data.DataLoader( self.transformed_source.train, batch_size=batch_size, shuffle=True, num_workers=num_workers, - drop_last=True + pin_memory=pin_memory, drop_last=True ) self.val_loader = data.DataLoader( self.transformed_source.validation, batch_size=batch_size, shuffle=False, num_workers=num_workers, + pin_memory=pin_memory ) if self.transformed_source.test is not None: @@ -63,11 +65,13 @@ def size(self): return self._loader_sizes -def create(source: Source, batch_size: int, num_workers: int = 0, transformations: typing.Optional[list] = None): +def create(source: Source, batch_size: int, num_workers: int = 0, transformations: typing.Optional[list] = None, + pin_memory=False): """ Vel factory function """ return DatasetLoader( source=source, batch_size=batch_size, + pin_memory=pin_memory, num_workers=num_workers, transformations=transformations ) diff --git a/vel/data/transformation/binarize_image.py b/vel/data/transformation/binarize_image.py new file mode 100644 index 00000000..38c13610 --- /dev/null +++ b/vel/data/transformation/binarize_image.py @@ -0,0 +1,16 @@ +import numpy as np + +import vel.api as api + + +class BinarizeImage(api.ScopedTransformation): + """ Convert [0,1] image into a binary {0, 1} representation """ + + def transform(self, x_data): + # Sample image from a Bernoulli distribution + return np.random.binomial(1, x_data).astype(np.float32) + + +def create(scope='x', tags=None): + """ Vel factory function """ + return BinarizeImage(scope, tags) diff --git a/vel/data/transformation/pil_resize.py b/vel/data/transformation/pil_resize.py new file mode 100644 index 00000000..326a34b2 --- /dev/null +++ b/vel/data/transformation/pil_resize.py @@ -0,0 +1,18 @@ +import PIL.Image as Image + +import vel.api as api + + +class PilResize(api.ScopedTransformation): + """ Scale the PIL image """ + def __init__(self, shape, scope='x', tags=None): + super().__init__(scope, tags) + self.shape = shape + + def transform(self, x_data): + return x_data.resize(self.shape, Image.LANCZOS) + + +def create(shape, scope='x', tags=None): + """ Vel factory function """ + return PilResize(shape, scope, tags) diff --git a/vel/model/autoencoder/fc_vae.py b/vel/model/autoencoder/fc_vae.py new file mode 100644 index 00000000..49e91fbd --- /dev/null +++ b/vel/model/autoencoder/fc_vae.py @@ -0,0 +1,106 @@ +import torch +import torch.distributions as dist +import torch.nn as nn +import torch.nn.functional as F +import torch.nn.init as init + +from vel.api import ModelFactory +from vel.module.layers import Flatten, Reshape + +from vel.model.autoencoder.vae_base import VaeBase + + +class FcVae(VaeBase): + """ + A simple variational autoencoder, containing 2 fully connected layers. + """ + + def __init__(self, img_rows, img_cols, img_channels, layers=None, representation_length=32, + analytical_kl_div=False, max_grad_norm=None): + super().__init__(analytical_kl_div=analytical_kl_div, max_grad_norm=max_grad_norm) + + if layers is None: + layers = [512, 256] + + self.representation_length = representation_length + self.layers = layers + + input_length = img_rows * img_cols * img_channels + + self.encoder = nn.Sequential( + Flatten(), + nn.Linear(in_features=input_length, out_features=self.layers[0]), + nn.Tanh(), + nn.Linear(in_features=self.layers[0], out_features=self.layers[1]), + nn.Tanh(), + nn.Linear(self.layers[1], representation_length * 2) + ) + + self.decoder = nn.Sequential( + nn.Linear(representation_length, self.layers[1]), + nn.Tanh(), + nn.Linear(in_features=self.layers[1], out_features=self.layers[0]), + nn.Tanh(), + nn.Linear(in_features=self.layers[0], out_features=input_length), + Reshape(img_channels, img_rows, img_cols), + nn.Sigmoid() + ) + + self.register_buffer('prior_mean', torch.tensor([[0.0]])) + self.register_buffer('prior_std', torch.tensor([[1.0]])) + + def encoder_network(self, sample: torch.Tensor) -> torch.Tensor: + """ Transform input sample into an encoded representation """ + return self.encoder(sample) + + def encoder_distribution(self, encoded: torch.Tensor) -> dist.Distribution: + """ Create a pytorch distribution object representing the encoder distribution (approximate posterior) """ + mu = encoded[:, :self.representation_length] + std = F.softplus(encoded[:, self.representation_length:]) + + return dist.Independent(dist.Normal(mu, std), 1) + + def decoder_network(self, z: torch.Tensor) -> torch.Tensor: + """ Transform encoded value into a decoded representation """ + return self.decoder(z) + + def decoder_distribution(self, decoded: torch.Tensor) -> dist.Distribution: + """ Create a pytorch distribution object representing the decoder distribution (likelihood) """ + return dist.Independent(dist.Bernoulli(probs=decoded), 3) + + def prior_distribution(self) -> dist.Distribution: + """ Return a prior distribution object """ + return dist.Independent(dist.Normal(self.prior_mean, self.prior_std), 1) + + def decoder_sample(self, decoded: torch.Tensor) -> torch.Tensor: + """ Sample from a decoder distribution - we ignore that since it's so weak in this case """ + return self.decoder_network(decoded) + +# @staticmethod +# def _weight_initializer(tensor): +# init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('tanh')) +# init.constant_(tensor.bias, 0.01) +# +# def reset_weights(self): +# for m in it.chain(self.encoder, self.decoder): +# if isinstance(m, nn.Conv2d): +# self._weight_initializer(m) +# elif isinstance(m, nn.ConvTranspose2d): +# self._weight_initializer(m) +# elif isinstance(m, nn.Linear): +# self._weight_initializer(m) + + +def create(img_rows, img_cols, img_channels, layers=None, representation_length=32, max_grad_norm=None, + analytical_kl_div=True): + """ Vel factory function """ + if layers is None: + layers = [512, 256] + + def instantiate(**_): + return FcVae( + img_rows, img_cols, img_channels, layers=layers, representation_length=representation_length, + max_grad_norm=max_grad_norm, analytical_kl_div=analytical_kl_div + ) + + return ModelFactory.generic(instantiate) diff --git a/vel/model/autoencoder/mnist_fc_vae.py b/vel/model/autoencoder/mnist_fc_vae.py deleted file mode 100644 index e79f114b..00000000 --- a/vel/model/autoencoder/mnist_fc_vae.py +++ /dev/null @@ -1,143 +0,0 @@ -import itertools as it - -import torch -import torch.nn as nn -import torch.nn.init as init -import torch.nn.functional as F - -from vel.api import GradientModel, ModelFactory -from vel.metric import AveragingNamedMetric -from vel.metric.loss_metric import Loss -from vel.module.layers import Flatten, Reshape - - -class MnistCnnVAE(GradientModel): - """ - A simple MNIST variational autoencoder, containing 3 convolutional layers. - """ - - def __init__(self, img_rows, img_cols, img_channels, layers=None, representation_length=32): - super(MnistCnnVAE, self).__init__() - - if layers is None: - layers = [512, 256] - - self.representation_length = representation_length - - # self.final_width = net_util.convolutional_layer_series(img_rows, layer_series) - # self.final_height = net_util.convolutional_layer_series(img_cols, layer_series) - self.layers = layers - - input_length = img_rows * img_cols * img_channels - - self.encoder = nn.Sequential( - Flatten(), - nn.Linear(in_features=input_length, out_features=self.layers[0]), - nn.ReLU(True), - nn.Linear(in_features=self.layers[0], out_features=self.layers[1]), - nn.ReLU(True), - nn.Linear(self.layers[1], representation_length * 2) - ) - - self.decoder = nn.Sequential( - nn.Linear(representation_length, self.layers[1]), - nn.ReLU(True), - nn.Linear(self.layers[1], self.layers[0]), - nn.ReLU(True), - nn.Linear(self.layers[0], input_length), - Reshape(img_channels, img_rows, img_cols), - nn.Sigmoid() - ) - - @staticmethod - def _weight_initializer(tensor): - init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('relu')) - init.constant_(tensor.bias, 0.0) - - def reset_weights(self): - for m in it.chain(self.encoder, self.decoder): - if isinstance(m, nn.Conv2d): - self._weight_initializer(m) - elif isinstance(m, nn.ConvTranspose2d): - self._weight_initializer(m) - elif isinstance(m, nn.Linear): - self._weight_initializer(m) - - def encode(self, sample): - encoding = self.encoder(sample) - - mu = encoding[:, :self.representation_length] - # I encode std directly as a softplus, rather than exp(logstd) - std = F.softplus(encoding[:, self.representation_length:]) - - return mu + torch.randn_like(std) * std - - def decode(self, sample): - return self.decoder(sample) - - def forward(self, sample): - encoding = self.encoder(sample) - - mu = encoding[:, :self.representation_length] - # I encode std directly as a softplus, rather than exp(logstd) - std = F.softplus(encoding[:, self.representation_length:]) - - z = mu + torch.randn_like(std) * std - - decoded = self.decoder(z) - - return { - 'decoded': decoded, - 'encoding': z, - 'mu': mu, - 'std': std - } - - def calculate_gradient(self, data): - """ Calculate a gradient of loss function """ - output = self(data['x']) - - y_pred = output['decoded'] - - mu = output['mu'] - std = output['std'] - var = std ** 2 - - kl_divergence = - 0.5 * (1 + torch.log(var) - mu ** 2 - var).sum(dim=1) - kl_divergence = kl_divergence.mean() - - # reconstruction = 0.5 * F.mse_loss(y_pred, y_true) - - # We must sum over all image axis and average only on minibatch axis - reconstruction = F.binary_cross_entropy(y_pred, data['y'], reduction='none').sum(1).sum(1).sum(1).mean() - loss = reconstruction + kl_divergence - - if self.training: - loss.backward() - - return { - 'loss': loss.item(), - 'reconstruction': reconstruction.item(), - 'kl_divergence': kl_divergence.item() - } - - def metrics(self): - """ Set of metrics for this model """ - return [ - Loss(), - AveragingNamedMetric('reconstruction', scope="train"), - AveragingNamedMetric('kl_divergence', scope="train") - ] - - -def create(img_rows, img_cols, img_channels, layers=None, representation_length=32): - """ Vel factory function """ - if layers is None: - layers = [512, 256] - - def instantiate(**_): - return MnistCnnVAE( - img_rows, img_cols, img_channels, layers=layers, representation_length=representation_length - ) - - return ModelFactory.generic(instantiate) diff --git a/vel/model/autoencoder/vae_base.py b/vel/model/autoencoder/vae_base.py new file mode 100644 index 00000000..0ff0de27 --- /dev/null +++ b/vel/model/autoencoder/vae_base.py @@ -0,0 +1,164 @@ +import torch +import torch.distributions as dist + +from vel.api import GradientModel +from vel.metric import AveragingNamedMetric +from vel.metric.loss_metric import Loss + + +class VaeBase(GradientModel): + """ Base module for variational auto-encoder implementations """ + def __init__(self, analytical_kl_div=True, max_grad_norm=1.0): + super().__init__() + + self.analytical_kl_div = analytical_kl_div + self.max_grad_norm = max_grad_norm + + #################################################################################################################### + # Interface methods + def encoder_network(self, sample: torch.Tensor) -> torch.Tensor: + """ Transform input sample into an encoded representation """ + raise NotImplementedError + + def encoder_distribution(self, encoded: torch.Tensor) -> dist.Distribution: + """ Create a pytorch distribution object representing the encoder distribution (approximate posterior) """ + raise NotImplementedError + + def decoder_network(self, z: torch.Tensor) -> torch.Tensor: + """ Transform encoded value into a decoded representation """ + raise NotImplementedError + + def decoder_distribution(self, decoded: torch.Tensor) -> dist.Distribution: + """ Create a pytorch distribution object representing the decoder distribution (likelihood) """ + raise NotImplementedError + + def prior_distribution(self) -> dist.Distribution: + """ Return a prior distribution object """ + raise NotImplementedError + + #################################################################################################################### + # Other useful methods + def encode(self, sample: torch.Tensor) -> torch.Tensor: + """ Encode incoming data into a latent representation """ + encoded = self.encoder_network(sample) + return self.encoder_rsample(encoded) + + def encoder_rsample(self, encoded: torch.Tensor) -> torch.Tensor: + """ Sample with "reparametrization trick" encoder sample """ + return self.encoder_distribution(encoded).rsample() + + def decoder_sample(self, decoded: torch.Tensor) -> torch.Tensor: + """ Sample from a decoder distribution """ + return self.decoder_distribution(decoded).sample() + + def forward(self, sample: torch.Tensor) -> torch.Tensor: + """ Simple forward pass through the module """ + encoded = self.encoder_network(sample) + z = self.encoder_rsample(encoded) + decoded = self.decoder_sample(z) + return decoded + + def calculate_gradient(self, data: dict) -> dict: + """ Calculate model gradient for given data sample """ + encoded = self.encoder_network(data['x']) + z_dist = self.encoder_distribution(encoded) + z = z_dist.rsample() + + decoded = self.decoder_network(z) + x_dist = self.decoder_distribution(decoded) + prior = self.prior_distribution() + + kl_divergence = self.kl_divergence(z, z_dist, prior).mean() + reconstruction = x_dist.log_prob(data['y']).mean() + + elbo = reconstruction - kl_divergence + + loss = -elbo + + if self.training: + loss.backward() + + if self.max_grad_norm is not None: + grad_norm = torch.nn.utils.clip_grad_norm_( + filter(lambda p: p.requires_grad, self.parameters()), + max_norm=self.max_grad_norm + ) + else: + grad_norm = 0.0 + else: + grad_norm = 0.0 + + return { + 'loss': loss.item(), + + 'grad_norm': grad_norm, + 'reconstruction': -reconstruction.item(), + 'kl_divergence': kl_divergence.item() + } + + def kl_divergence(self, z, encoder_distribution, prior) -> torch.Tensor: + """ Calculate the kl divergence between q(z|x) and p(z) """ + if self.analytical_kl_div: + kl_divergence = dist.kl_divergence(encoder_distribution, prior) + else: + lpz = prior.log_prob(z) + lqzx = encoder_distribution.log_prob(z) + kl_divergence = -lpz + lqzx + + return kl_divergence + + def metrics(self): + """ Set of metrics for this model """ + return [ + Loss(), + AveragingNamedMetric('reconstruction', scope="train"), + AveragingNamedMetric('kl_divergence', scope="train"), + AveragingNamedMetric('grad_norm', scope="train") + ] + + @torch.no_grad() + def nll(self, sample: torch.Tensor, num_posterior_samples: int = 1): + """ + Upper bound on negative log-likelihood of supplied data. + If num samples goes to infinity, the nll of data should + approach true value + """ + assert num_posterior_samples >= 1, "Need at least one posterior sample" + + buffer = [] + + encoded = self.encoder_network(sample) + z_dist = self.encoder_distribution(encoded) + prior = self.prior_distribution() + + if self.analytical_kl_div: + kl_divergence = dist.kl_divergence(z_dist, prior) + + for i in range(num_posterior_samples): + z = z_dist.rsample() + decoded = self.decoder_network(z) + x_dist = self.decoder_distribution(decoded) + + if not self.analytical_kl_div: + lpz = prior.log_prob(z) + lqzx = z_dist.log_prob(z) + kl_divergence = -lpz + lqzx + + likelihood = x_dist.log_prob(sample) + elbo = likelihood - kl_divergence + + buffer.append(elbo) + + averaged = self.log_mean_exp(torch.stack(buffer, dim=-1), dim=-1) + + return -averaged + + #################################################################################################################### + # Utility methods + def log_mean_exp(self, inputs, dim=1): + """ Perform log(mean(exp(data))) in a numerically stable way """ + if inputs.size(dim) == 1: + return inputs + else: + input_max = inputs.max(dim, keepdim=True)[0] + return (inputs - input_max).exp().mean(dim).log() + input_max.squeeze(dim=dim) diff --git a/vel/scheduler/multi_step.py b/vel/scheduler/multi_step.py index eeeb735d..6ab8110b 100644 --- a/vel/scheduler/multi_step.py +++ b/vel/scheduler/multi_step.py @@ -1,22 +1,30 @@ -# import torch.optim.lr_scheduler as scheduler - -# class MultiStepScheduler: -# def __init__(self, optimizer, milestones, gamma, last_epoch): -# self.scheduler = scheduler.MultiStepLR(optimizer, milestones, gamma, last_epoch=last_epoch) -# -# def get_lr(self): -# return self.scheduler.get_lr() -# -# def pre_epoch_step(self, epoch_idx): -# self.scheduler.step() -# -# def post_epoch_step(self, epoch_idx, metrics): -# pass -# -# -# def create(milestones, gamma=0.1): -# """ Create a multi-step scheduler """ -# def scheduler_fn(optimizer, last_epoch=-1): -# return MultiStepScheduler(optimizer, milestones, gamma, last_epoch=last_epoch) -# -# return scheduler_fn +import torch.optim.lr_scheduler as scheduler + +from vel.api import Callback, SchedulerFactory, EpochInfo + + +class MultiStepScheduler(Callback): + """ Scheduler multiplying the learning rate given number after given number of epochs """ + def __init__(self, optimizer, milestones, gamma, last_epoch): + self.scheduler = scheduler.MultiStepLR(optimizer, milestones, gamma, last_epoch=last_epoch) + + def get_lr(self): + return self.scheduler.get_lr() + + def on_epoch_end(self, epoch_info: EpochInfo) -> None: + self.scheduler.step(epoch=epoch_info.global_epoch_idx) + + +class MultiStepSchedulerFactory(SchedulerFactory): + """ Factory class for ladder scheduler """ + def __init__(self, milestones, gamma): + self.milestones = milestones + self.gamma = gamma + + def instantiate(self, optimizer, last_epoch=-1) -> MultiStepScheduler: + return MultiStepScheduler(optimizer, self.milestones, self.gamma, last_epoch) + + +def create(milestones, gamma=0.1): + """ Create a multi-step scheduler """ + return MultiStepSchedulerFactory(milestones, gamma) diff --git a/vel/storage/backend/mongodb.py b/vel/storage/backend/mongodb.py index ff663737..77b8f6da 100644 --- a/vel/storage/backend/mongodb.py +++ b/vel/storage/backend/mongodb.py @@ -34,7 +34,7 @@ def get_frame(self): return pd.DataFrame(metric_items).drop(['_id', 'model_name'], axis=1).set_index('epoch_idx') def store(self, metrics): - augmented_metrics = metrics.copy() + augmented_metrics = {'{}/{}/{}'.format(k.scope, k.dataset, k.name): v for k, v in metrics.items()} model_name = self.model_config.name run_name = self.model_config.run_name From 00178dd8a987d7db62de2eda444d2d9519f1d227 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Fri, 20 Sep 2019 10:34:18 -0700 Subject: [PATCH 083/162] Implemented CNN-VAE for MNIST. --- .../autoencoder/mnist/mnist_cnn_ae.yaml | 2 +- .../autoencoder/mnist/mnist_cnn_vae.yaml | 40 +++- .../autoencoder/mnist/mnist_fc_vae.yaml | 40 +--- vel/model/autoencoder/cnn_vae.py | 158 +++++++++++++ vel/model/autoencoder/mnist_cnn_vae.py | 210 ------------------ vel/model/autoencoder/vae_base.py | 10 + 6 files changed, 205 insertions(+), 255 deletions(-) create mode 100644 vel/model/autoencoder/cnn_vae.py delete mode 100644 vel/model/autoencoder/mnist_cnn_vae.py diff --git a/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml b/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml index d4ce36c7..2591cc04 100644 --- a/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml +++ b/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml @@ -2,7 +2,7 @@ name: 'mnist_cnn_ae' model: - name: vel.model.autoencoder.mnist_cnn_autoencoder + name: vel.model.autoencoder.cnn_autoencoder img_rows: 28 img_cols: 28 img_channels: 1 diff --git a/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml b/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml index 6dc6525f..8994602b 100644 --- a/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml +++ b/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml @@ -2,34 +2,64 @@ name: 'mnist_cnn_vae' model: - name: vel.model.autoencoder.mnist_cnn_vae + name: vel.model.autoencoder.cnn_vae img_rows: 28 img_cols: 28 img_channels: 1 channels: [64, 128, 256] - representation_length: 20 + representation_length: 50 + max_grad_norm: 1.0 + analytical_kl_div: true source: name: vel.data.source.vision.mnist + loader: name: vel.data.dataset_loader batch_size: 256 num_workers: 4 + pin_memory: true transformations: + - name: vel.data.transformation.to_array +# - name: vel.data.augmentation.random_scale +# tags: train +# size: 28 +# max_zoom: 1.1 +# - name: vel.data.augmentation.random_rotate +# tags: train +# deg: 15.0 +# - name: vel.data.augmentation.random_crop +# tags: train +# width: 28 +# height: 28 +# padding: 3 +# padding_mode: 'constant' + - name: vel.data.transformation.binarize_image - name: vel.data.transformation.image_to_tensor - name: vel.data.transformation.unsupervised optimizer: name: vel.optimizer.radam - lr: 1.0e-4 + lr: 1.0e-3 + eps: 1.0e-4 + + +scheduler: + name: vel.scheduler.multi_step + gamma: 0.71968 # 10 * (-1/7) + milestones: [ 1, 4, 13, 40, 121, 364, 1093, 3280] commands: + augvis: + name: vel.command.augvis_command + samples: 10 + cases: 5 + train: name: vel.command.train_command - epochs: 200 - + epochs: 3280 diff --git a/examples-configs/autoencoder/mnist/mnist_fc_vae.yaml b/examples-configs/autoencoder/mnist/mnist_fc_vae.yaml index 647094f4..ed0977ae 100644 --- a/examples-configs/autoencoder/mnist/mnist_fc_vae.yaml +++ b/examples-configs/autoencoder/mnist/mnist_fc_vae.yaml @@ -43,42 +43,4 @@ scheduler: commands: train: name: vel.command.train_command - epochs: 3280 - -# train: -# name: vel.command.phase_train_command -# phases: -# - name: vel.train.phase.cycle -# max_lr: 0.001 -# min_lr: 0.000 -# interpolate: 'cosine' -# cycles: 20 -# cycle_len: 1 -# - name: vel.train.phase.cycle -# max_lr: 0.001 -# min_lr: 0.0000 -# interpolate: 'cosine' -# cycles: 7 -# cycle_len: 1 -# cycle_mult: 2 -# - name: vel.train.phase.cycle -# max_lr: 0.0001 -# min_lr: 0.0000 -# interpolate: 'cosine' -# cycles: 8 -# cycle_len: 1 -# cycle_mult: 2 -# - name: vel.train.phase.cycle -# max_lr: 0.00003 -# min_lr: 0.0000 -# interpolate: 'cosine' -# cycles: 9 -# cycle_len: 1 -# cycle_mult: 2 -# - name: vel.train.phase.cycle -# max_lr: 0.00001 -# min_lr: 0.0000 -# interpolate: 'cosine' -# cycles: 10 -# cycle_len: 1 -# cycle_mult: 2 + epochs: 3280 \ No newline at end of file diff --git a/vel/model/autoencoder/cnn_vae.py b/vel/model/autoencoder/cnn_vae.py new file mode 100644 index 00000000..da57eacd --- /dev/null +++ b/vel/model/autoencoder/cnn_vae.py @@ -0,0 +1,158 @@ +import itertools as it + +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F +import torch.distributions as dist + +import vel.util.network as net_util + +from vel.api import ModelFactory +from vel.module.layers import Flatten, Reshape +from vel.model.autoencoder.vae_base import VaeBase + + +class CnnVAE(VaeBase): + """ + A simple variational autoencoder, containing 3 convolutional layers. + """ + + def __init__(self, img_rows, img_cols, img_channels, channels=None, representation_length=32, + analytical_kl_div=True, max_grad_norm=0.5): + super().__init__(analytical_kl_div=analytical_kl_div, max_grad_norm=max_grad_norm) + + if channels is None: + channels = [16, 32, 32] + + layer_series = [ + (3, 1, 1), + (3, 1, 2), + (3, 1, 2), + ] + + self.representation_length = representation_length + + self.final_width = net_util.convolutional_layer_series(img_rows, layer_series) + self.final_height = net_util.convolutional_layer_series(img_cols, layer_series) + self.channels = channels + + self.encoder = nn.Sequential( + nn.Conv2d(in_channels=img_channels, out_channels=channels[0], kernel_size=(3, 3), padding=1), + # nn.ReLU(True), + nn.SELU(True), + nn.LayerNorm([ + channels[0], + net_util.convolutional_layer_series(img_rows, layer_series[:1]), + net_util.convolutional_layer_series(img_cols, layer_series[:1]), + ]), + nn.Conv2d(in_channels=channels[0], out_channels=channels[1], kernel_size=(3, 3), stride=2, padding=1), + # nn.ReLU(True), + nn.SELU(True), + nn.LayerNorm([ + channels[1], + net_util.convolutional_layer_series(img_rows, layer_series[:2]), + net_util.convolutional_layer_series(img_cols, layer_series[:2]), + ]), + nn.Conv2d(in_channels=channels[1], out_channels=channels[2], kernel_size=(3, 3), stride=2, padding=1), + # nn.ReLU(True), + nn.SELU(True), + nn.LayerNorm([ + channels[2], + net_util.convolutional_layer_series(img_rows, layer_series), + net_util.convolutional_layer_series(img_cols, layer_series), + ]), + Flatten(), + nn.Linear(self.final_width * self.final_height * channels[2], representation_length * 2) + ) + + self.decoder = nn.Sequential( + nn.Linear(representation_length, self.final_width * self.final_height * channels[2]), + # nn.ReLU(True), + nn.SELU(True), + Reshape(channels[2], self.final_width, self.final_height), + nn.LayerNorm([ + channels[2], + net_util.convolutional_layer_series(img_rows, layer_series), + net_util.convolutional_layer_series(img_cols, layer_series), + ]), + nn.ConvTranspose2d( + in_channels=channels[2], out_channels=channels[1], kernel_size=3, stride=2, padding=1, output_padding=1 + ), + # nn.ReLU(True), + nn.SELU(True), + nn.LayerNorm([ + channels[1], + net_util.convolutional_layer_series(img_rows, layer_series[:2]), + net_util.convolutional_layer_series(img_cols, layer_series[:2]), + ]), + nn.ConvTranspose2d( + in_channels=channels[1], out_channels=channels[0], kernel_size=3, stride=2, padding=1, output_padding=1 + ), + # nn.ReLU(True), + nn.SELU(True), + nn.LayerNorm([ + channels[0], + net_util.convolutional_layer_series(img_rows, layer_series[:1]), + net_util.convolutional_layer_series(img_cols, layer_series[:1]), + ]), + nn.ConvTranspose2d(in_channels=channels[0], out_channels=img_channels, kernel_size=3, padding=1), + nn.Sigmoid() + ) + + self.register_buffer('prior_mean', torch.tensor([[0.0]])) + self.register_buffer('prior_std', torch.tensor([[1.0]])) + + @staticmethod + def _weight_initializer(tensor): + init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('relu')) + init.constant_(tensor.bias, 0.0) + + def reset_weights(self): + for m in it.chain(self.encoder, self.decoder): + if isinstance(m, nn.Conv2d): + self._weight_initializer(m) + elif isinstance(m, nn.ConvTranspose2d): + self._weight_initializer(m) + elif isinstance(m, nn.Linear): + self._weight_initializer(m) + + def encoder_network(self, sample: torch.Tensor) -> torch.Tensor: + """ Transform input sample into an encoded representation """ + return self.encoder(sample) + + def encoder_distribution(self, encoded: torch.Tensor) -> dist.Distribution: + """ Create a pytorch distribution object representing the encoder distribution (approximate posterior) """ + mu = encoded[:, :self.representation_length] + std = F.softplus(encoded[:, self.representation_length:]) + + return dist.Independent(dist.Normal(mu, std), 1) + + def decoder_network(self, z: torch.Tensor) -> torch.Tensor: + """ Transform encoded value into a decoded representation """ + return self.decoder(z) + + def decoder_distribution(self, decoded: torch.Tensor) -> dist.Distribution: + """ Create a pytorch distribution object representing the decoder distribution (likelihood) """ + return dist.Independent(dist.Bernoulli(probs=decoded), 3) + + def prior_distribution(self) -> dist.Distribution: + """ Return a prior distribution object """ + return dist.Independent(dist.Normal(self.prior_mean, self.prior_std), 1) + + def decoder_sample(self, decoded: torch.Tensor) -> torch.Tensor: + """ Sample from a decoder distribution - we ignore that since it's so weak in this case """ + return self.decoder_network(decoded) + + +def create(img_rows, img_cols, img_channels, channels=None, representation_length=32): + """ Vel factory function """ + if channels is None: + channels = [16, 32, 32] + + def instantiate(**_): + return CnnVAE( + img_rows, img_cols, img_channels, channels=channels, representation_length=representation_length + ) + + return ModelFactory.generic(instantiate) diff --git a/vel/model/autoencoder/mnist_cnn_vae.py b/vel/model/autoencoder/mnist_cnn_vae.py deleted file mode 100644 index 9cfa95a8..00000000 --- a/vel/model/autoencoder/mnist_cnn_vae.py +++ /dev/null @@ -1,210 +0,0 @@ -import itertools as it - -import torch -import torch.nn as nn -import torch.nn.init as init -import torch.nn.functional as F - -import vel.util.network as net_util - -from vel.api import GradientModel, ModelFactory -from vel.metric import AveragingNamedMetric -from vel.metric.loss_metric import Loss -from vel.module.layers import Flatten, Reshape - - -class MnistCnnVAE(GradientModel): - """ - A simple MNIST variational autoencoder, containing 3 convolutional layers. - """ - - def __init__(self, img_rows, img_cols, img_channels, channels=None, representation_length=32): - super(MnistCnnVAE, self).__init__() - - if channels is None: - channels = [16, 32, 32] - - layer_series = [ - (3, 1, 1), - (3, 1, 2), - (3, 1, 2), - ] - - self.representation_length = representation_length - - self.final_width = net_util.convolutional_layer_series(img_rows, layer_series) - self.final_height = net_util.convolutional_layer_series(img_cols, layer_series) - self.channels = channels - - self.encoder = nn.Sequential( - nn.Conv2d(in_channels=img_channels, out_channels=channels[0], kernel_size=(3, 3), padding=1), - nn.ReLU(True), - nn.Conv2d(in_channels=channels[0], out_channels=channels[1], kernel_size=(3, 3), stride=2, padding=1), - nn.ReLU(True), - nn.Conv2d(in_channels=channels[1], out_channels=channels[2], kernel_size=(3, 3), stride=2, padding=1), - Flatten(), - nn.Linear(self.final_width * self.final_height * channels[2], representation_length * 2) - ) - - self.decoder = nn.Sequential( - nn.Linear(representation_length, self.final_width * self.final_height * channels[2]), - nn.ReLU(True), - Reshape(channels[2], self.final_width, self.final_height), - nn.ConvTranspose2d( - in_channels=channels[2], out_channels=channels[1], kernel_size=3, stride=2, padding=1, output_padding=1 - ), - nn.ReLU(True), - nn.ConvTranspose2d( - in_channels=channels[1], out_channels=channels[0], kernel_size=3, stride=2, padding=1, output_padding=1 - ), - nn.ReLU(True), - nn.ConvTranspose2d(in_channels=channels[0], out_channels=img_channels, kernel_size=3, padding=1), - nn.Sigmoid() - ) - - @staticmethod - def _weight_initializer(tensor): - init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('relu')) - init.constant_(tensor.bias, 0.0) - - def reset_weights(self): - for m in it.chain(self.encoder, self.decoder): - if isinstance(m, nn.Conv2d): - self._weight_initializer(m) - elif isinstance(m, nn.ConvTranspose2d): - self._weight_initializer(m) - elif isinstance(m, nn.Linear): - self._weight_initializer(m) - - def encoder_distribution(self, sample): - encoding = self.encoder(sample) - mu = encoding[:, :self.representation_length] - # I encode std directly as a softplus, rather than exp(logstd) - std = F.softplus(encoding[:, self.representation_length:]) - - return mu, std - - def encode(self, sample): - mu, std = self.encoder_distribution(sample) - # Sample z - return mu + torch.randn_like(std) * std - - def decode(self, sample): - # We don't sample here, because decoder is so weak it doesn't make sense - return self.decoder(sample) - - def forward(self, sample): - mu, std = self.encoder_distribution(sample) - - # Sample z - z = mu + torch.randn_like(std) * std - decoded = self.decoder(z) - - return { - 'decoded': decoded, - 'encoding': z, - 'mu': mu, - 'std': std - } - - def calculate_gradient(self, data): - """ Calculate a gradient of loss function """ - output = self(data['x']) - - # ELBO is E_q log p(x, z) / q(z | x) - # Which can be expressed in many equivalent forms: - # (1) E_q log p(x | z) + log p(z) - log q(z | x) - # (2) E_q log p(x | z) - D_KL(p(z) || q(z | x)) - # (3) E_q log p(x) - D_KL(p(z | x) || q(z | x)Biblio) - - # Form 3 is interesting from a theoretical standpoint, but is intractable to compute directly - # While forms (1) and (2) can be computed directly. - # Positive aspect of form (2) is that KL divergence can be calculated analytically - # further reducing the variance of the gradient - - y_pred = output['decoded'] - - mu = output['mu'] - std = output['std'] - var = std ** 2 - - # Analytical solution of KL divergence - kl_divergence = - 0.5 * (1 + torch.log(var) - mu ** 2 - var).sum(dim=1) - kl_divergence = kl_divergence.mean() - - # Diag-gaussian likelihood - # likelihood = 0.5 * F.mse_loss(y_pred, y_true) - - # We must sum over all image axis and average only on minibatch axis - # Log prob p(x | z) in the case where the output distribution is Bernoulli(p) - likelihood = F.binary_cross_entropy(y_pred, data['y'], reduction='none').sum((1, 2, 3)).mean() - - elbo = likelihood + kl_divergence - - nll = self.nll(data['x'], num_posterior_samples=5) - - if self.training: - elbo.backward() - - return { - 'loss': elbo.item(), - 'nll': nll.mean().item(), - 'reconstruction': likelihood.item(), - 'kl_divergence': kl_divergence.item() - } - - def logmeanexp(self, inputs, dim=1): - if inputs.size(dim) == 1: - return inputs - else: - input_max = inputs.max(dim, keepdim=True)[0] - return (inputs - input_max).exp().mean(dim).log() + input_max.squeeze(dim=dim) - - @torch.no_grad() - def nll(self, data_sample, num_posterior_samples: int = 1): - """ - Upper bound on negative log-likelihood of supplied data. - If num samples goes to infinity, the nll of data should - approach true value - """ - assert num_posterior_samples >= 1, "Need at least one posterior sample" - - buffer = [] - - mu, std = self.encoder_distribution(data_sample) - var = std ** 2 - - kl_divergence = - 0.5 * (1 + torch.log(var) - mu ** 2 - var).sum(dim=1) - - for i in range(num_posterior_samples): - z = mu + torch.randn_like(std) * std - y_pred = self.decoder(z) - - likelihood = F.binary_cross_entropy(y_pred, data_sample, reduction='none').sum((1, 2, 3)) - elbo = likelihood + kl_divergence - - buffer.append(-elbo) - - averaged = self.logmeanexp(torch.stack(buffer, dim=-1), dim=-1) - return -averaged - - def metrics(self): - """ Set of metrics for this model """ - return [ - Loss(), - AveragingNamedMetric('reconstruction', scope="train"), - AveragingNamedMetric('kl_divergence', scope="train") - ] - - -def create(img_rows, img_cols, img_channels, channels=None, representation_length=32): - """ Vel factory function """ - if channels is None: - channels = [16, 32, 32] - - def instantiate(**_): - return MnistCnnVAE( - img_rows, img_cols, img_channels, channels=channels, representation_length=representation_length - ) - - return ModelFactory.generic(instantiate) diff --git a/vel/model/autoencoder/vae_base.py b/vel/model/autoencoder/vae_base.py index 0ff0de27..efaf62ab 100644 --- a/vel/model/autoencoder/vae_base.py +++ b/vel/model/autoencoder/vae_base.py @@ -71,6 +71,16 @@ def calculate_gradient(self, data: dict) -> dict: kl_divergence = self.kl_divergence(z, z_dist, prior).mean() reconstruction = x_dist.log_prob(data['y']).mean() + # ELBO is E_q log p(x, z) / q(z | x) + # Which can be expressed in many equivalent forms: + # (1) E_q log p(x | z) + log p(z) - log q(z | x) + # (2) E_q log p(x | z) - D_KL(p(z) || q(z | x)) + # (3) E_q log p(x) - D_KL(p(z | x) || q(z | x)Biblio) + + # Form 3 is interesting from a theoretical standpoint, but is intractable to compute directly + # While forms (1) and (2) can be computed directly. + # Positive aspect of form (2) is that KL divergence can be calculated analytically + # further reducing the variance of the gradient elbo = reconstruction - kl_divergence loss = -elbo From b61bbcd0a658228d72d54204e06bbdb80b8c6ce1 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Fri, 20 Sep 2019 20:57:17 -0700 Subject: [PATCH 084/162] Added omniglot dataset. --- vel/data/source/vision/mnist.py | 2 +- vel/data/source/vision/omniglot.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 vel/data/source/vision/omniglot.py diff --git a/vel/data/source/vision/mnist.py b/vel/data/source/vision/mnist.py index 16640cac..58f8ee90 100644 --- a/vel/data/source/vision/mnist.py +++ b/vel/data/source/vision/mnist.py @@ -4,7 +4,7 @@ def create(model_config): - """ Create a MNIST dataset, normalized """ + """ Create a MNIST dataset """ path = model_config.data_dir('mnist') train_dataset = datasets.MNIST(path, train=True, download=True) diff --git a/vel/data/source/vision/omniglot.py b/vel/data/source/vision/omniglot.py new file mode 100644 index 00000000..659f53b4 --- /dev/null +++ b/vel/data/source/vision/omniglot.py @@ -0,0 +1,16 @@ +from torchvision import datasets + +from vel.api import Source + + +def create(model_config): + """ Create an Omniglot dataset """ + path = model_config.data_dir('omniglot') + + train_dataset = datasets.Omniglot(path, background=True, download=True) + test_dataset = datasets.Omniglot(path, background=False, download=True) + + return Source( + train=train_dataset, + validation=test_dataset, + ) From b39347090b13bccafa0c93fba2383275a9f08385 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Fri, 20 Sep 2019 20:57:34 -0700 Subject: [PATCH 085/162] Added omniglot VAE configs. --- .../omniglot/omniglot_cnn_vae.yaml | 49 +++++++++++++++++ .../autoencoder/omniglot/omniglot_fc_vae.yaml | 54 +++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 examples-configs/autoencoder/omniglot/omniglot_cnn_vae.yaml create mode 100644 examples-configs/autoencoder/omniglot/omniglot_fc_vae.yaml diff --git a/examples-configs/autoencoder/omniglot/omniglot_cnn_vae.yaml b/examples-configs/autoencoder/omniglot/omniglot_cnn_vae.yaml new file mode 100644 index 00000000..dae887c8 --- /dev/null +++ b/examples-configs/autoencoder/omniglot/omniglot_cnn_vae.yaml @@ -0,0 +1,49 @@ +name: 'omniglot_cnn_vae' + + +model: + name: vel.model.autoencoder.cnn_vae + img_rows: 28 + img_cols: 28 + img_channels: 1 + channels: [64, 128, 256] + representation_length: 50 + max_grad_norm: 1.0 + analytical_kl_div: true + + +source: + name: vel.data.source.vision.omniglot + + +loader: + name: vel.data.dataset_loader + batch_size: 128 + num_workers: 4 + pin_memory: true + + transformations: + - name: vel.data.transformation.pil_resize + shape: [28, 28] + - name: vel.data.transformation.to_array + - name: vel.data.transformation.binarize_image + - name: vel.data.transformation.image_to_tensor + - name: vel.data.transformation.unsupervised + + +optimizer: + name: vel.optimizer.radam + lr: 1.0e-3 + eps: 1.0e-4 + + +commands: + augvis: + name: vel.command.augvis_command + samples: 5 + cases: 3 + + train: + name: vel.command.train_command + epochs: 3280 + diff --git a/examples-configs/autoencoder/omniglot/omniglot_fc_vae.yaml b/examples-configs/autoencoder/omniglot/omniglot_fc_vae.yaml new file mode 100644 index 00000000..1b627400 --- /dev/null +++ b/examples-configs/autoencoder/omniglot/omniglot_fc_vae.yaml @@ -0,0 +1,54 @@ +name: 'omniglot_fc_vae' + + +model: + name: vel.model.autoencoder.fc_vae + img_rows: 28 + img_cols: 28 + img_channels: 1 + layers: [200, 200] + representation_length: 50 + max_grad_norm: 1.0 + analytical_kl_div: true + + +source: + name: vel.data.source.vision.omniglot + + +loader: + name: vel.data.dataset_loader + batch_size: 128 + num_workers: 4 + pin_memory: true + + transformations: + - name: vel.data.transformation.pil_resize + shape: [28, 28] + - name: vel.data.transformation.to_array + - name: vel.data.transformation.binarize_image + - name: vel.data.transformation.image_to_tensor + - name: vel.data.transformation.unsupervised + + +optimizer: + name: vel.optimizer.radam + lr: 1.0e-3 + eps: 1.0e-4 + + +scheduler: + name: vel.scheduler.multi_step + gamma: 0.71968 # 10 * (-1/7) + milestones: [ 1, 4, 13, 40, 121, 364, 1093, 3280] + + +commands: + augvis: + name: vel.command.augvis_command + samples: 5 + cases: 3 + + train: + name: vel.command.train_command + epochs: 3280 From 509e2c7cdef52962197a3a492638a03c26fb840d Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 22 Sep 2019 21:42:24 -0700 Subject: [PATCH 086/162] Renamed cnn autoencode. --- .../autoencoder/{mnist_cnn_autoencoder.py => cnn_autoencoder.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename vel/model/autoencoder/{mnist_cnn_autoencoder.py => cnn_autoencoder.py} (100%) diff --git a/vel/model/autoencoder/mnist_cnn_autoencoder.py b/vel/model/autoencoder/cnn_autoencoder.py similarity index 100% rename from vel/model/autoencoder/mnist_cnn_autoencoder.py rename to vel/model/autoencoder/cnn_autoencoder.py From d0c56e8fcb64d87f899515f814744e4ac5170a85 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 22 Sep 2019 21:47:00 -0700 Subject: [PATCH 087/162] Reorganized latent variable models. --- .../{autoencoder => latent}/mnist/mnist_cnn_vae.yaml | 2 +- .../{autoencoder => latent}/mnist/mnist_fc_vae.yaml | 2 +- .../{autoencoder => latent}/omniglot/omniglot_cnn_vae.yaml | 2 +- .../{autoencoder => latent}/omniglot/omniglot_fc_vae.yaml | 2 +- vel/model/{autoencoder => latent}/cnn_vae.py | 0 vel/model/{autoencoder => latent}/fc_vae.py | 0 vel/model/{autoencoder => latent}/vae_base.py | 0 7 files changed, 4 insertions(+), 4 deletions(-) rename examples-configs/{autoencoder => latent}/mnist/mnist_cnn_vae.yaml (97%) rename examples-configs/{autoencoder => latent}/mnist/mnist_fc_vae.yaml (95%) rename examples-configs/{autoencoder => latent}/omniglot/omniglot_cnn_vae.yaml (95%) rename examples-configs/{autoencoder => latent}/omniglot/omniglot_fc_vae.yaml (96%) rename vel/model/{autoencoder => latent}/cnn_vae.py (100%) rename vel/model/{autoencoder => latent}/fc_vae.py (100%) rename vel/model/{autoencoder => latent}/vae_base.py (100%) diff --git a/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml b/examples-configs/latent/mnist/mnist_cnn_vae.yaml similarity index 97% rename from examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml rename to examples-configs/latent/mnist/mnist_cnn_vae.yaml index 8994602b..76cc3f90 100644 --- a/examples-configs/autoencoder/mnist/mnist_cnn_vae.yaml +++ b/examples-configs/latent/mnist/mnist_cnn_vae.yaml @@ -2,7 +2,7 @@ name: 'mnist_cnn_vae' model: - name: vel.model.autoencoder.cnn_vae + name: vel.model.latent.cnn_vae img_rows: 28 img_cols: 28 img_channels: 1 diff --git a/examples-configs/autoencoder/mnist/mnist_fc_vae.yaml b/examples-configs/latent/mnist/mnist_fc_vae.yaml similarity index 95% rename from examples-configs/autoencoder/mnist/mnist_fc_vae.yaml rename to examples-configs/latent/mnist/mnist_fc_vae.yaml index ed0977ae..96653a6e 100644 --- a/examples-configs/autoencoder/mnist/mnist_fc_vae.yaml +++ b/examples-configs/latent/mnist/mnist_fc_vae.yaml @@ -2,7 +2,7 @@ name: 'mnist_fc_vae' model: - name: vel.model.autoencoder.fc_vae + name: vel.model.latent.fc_vae img_rows: 28 img_cols: 28 img_channels: 1 diff --git a/examples-configs/autoencoder/omniglot/omniglot_cnn_vae.yaml b/examples-configs/latent/omniglot/omniglot_cnn_vae.yaml similarity index 95% rename from examples-configs/autoencoder/omniglot/omniglot_cnn_vae.yaml rename to examples-configs/latent/omniglot/omniglot_cnn_vae.yaml index dae887c8..2df6f80b 100644 --- a/examples-configs/autoencoder/omniglot/omniglot_cnn_vae.yaml +++ b/examples-configs/latent/omniglot/omniglot_cnn_vae.yaml @@ -2,7 +2,7 @@ name: 'omniglot_cnn_vae' model: - name: vel.model.autoencoder.cnn_vae + name: vel.model.latent.cnn_vae img_rows: 28 img_cols: 28 img_channels: 1 diff --git a/examples-configs/autoencoder/omniglot/omniglot_fc_vae.yaml b/examples-configs/latent/omniglot/omniglot_fc_vae.yaml similarity index 96% rename from examples-configs/autoencoder/omniglot/omniglot_fc_vae.yaml rename to examples-configs/latent/omniglot/omniglot_fc_vae.yaml index 1b627400..263a72eb 100644 --- a/examples-configs/autoencoder/omniglot/omniglot_fc_vae.yaml +++ b/examples-configs/latent/omniglot/omniglot_fc_vae.yaml @@ -2,7 +2,7 @@ name: 'omniglot_fc_vae' model: - name: vel.model.autoencoder.fc_vae + name: vel.model.latent.fc_vae img_rows: 28 img_cols: 28 img_channels: 1 diff --git a/vel/model/autoencoder/cnn_vae.py b/vel/model/latent/cnn_vae.py similarity index 100% rename from vel/model/autoencoder/cnn_vae.py rename to vel/model/latent/cnn_vae.py diff --git a/vel/model/autoencoder/fc_vae.py b/vel/model/latent/fc_vae.py similarity index 100% rename from vel/model/autoencoder/fc_vae.py rename to vel/model/latent/fc_vae.py diff --git a/vel/model/autoencoder/vae_base.py b/vel/model/latent/vae_base.py similarity index 100% rename from vel/model/autoencoder/vae_base.py rename to vel/model/latent/vae_base.py From b988946389274ed0fb33d6d0d186faf0c38aacdd Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 22 Sep 2019 22:46:22 -0700 Subject: [PATCH 088/162] Make Reshape more flexible. --- vel/module/layers.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vel/module/layers.py b/vel/module/layers.py index 08fadb0a..9f9de68d 100644 --- a/vel/module/layers.py +++ b/vel/module/layers.py @@ -41,13 +41,18 @@ def forward(self, x): class Reshape(nn.Module): """ Flatten input vector """ - def __init__(self, *sizes): + def __init__(self, *sizes, batch_dims=1): super().__init__() self.sizes = sizes + self.batch_dims = batch_dims def forward(self, x): - return x.view(x.size(0), *self.sizes) + return x.view(x.shape[:self.batch_dims] + self.sizes) + + def extra_repr(self) -> str: + """ Extra representation of this module """ + return f"sizes={self.sizes}, batch_dims={self.batch_dims}" class OneHotEncode(nn.Module): From e51146215a6a60d16ca251c0350270db79f8df4a Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 22 Sep 2019 22:46:37 -0700 Subject: [PATCH 089/162] Clean up VAE implementation. --- vel/model/latent/vae_base.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/vel/model/latent/vae_base.py b/vel/model/latent/vae_base.py index efaf62ab..51bbb070 100644 --- a/vel/model/latent/vae_base.py +++ b/vel/model/latent/vae_base.py @@ -38,11 +38,6 @@ def prior_distribution(self) -> dist.Distribution: #################################################################################################################### # Other useful methods - def encode(self, sample: torch.Tensor) -> torch.Tensor: - """ Encode incoming data into a latent representation """ - encoded = self.encoder_network(sample) - return self.encoder_rsample(encoded) - def encoder_rsample(self, encoded: torch.Tensor) -> torch.Tensor: """ Sample with "reparametrization trick" encoder sample """ return self.encoder_distribution(encoded).rsample() @@ -51,11 +46,23 @@ def decoder_sample(self, decoded: torch.Tensor) -> torch.Tensor: """ Sample from a decoder distribution """ return self.decoder_distribution(decoded).sample() + def encode(self, sample: torch.Tensor) -> torch.Tensor: + """ Encode incoming data into a latent representation """ + encoded = self.encoder_network(sample) + return self.encoder_rsample(encoded) + + def decode(self, z: torch.Tensor) -> torch.Tensor: + """ + Decode latent representation back into data domain. + Sample from p(x | z) + """ + decoded = self.decoder_network(z) + return self.decoder_sample(decoded) + def forward(self, sample: torch.Tensor) -> torch.Tensor: """ Simple forward pass through the module """ - encoded = self.encoder_network(sample) - z = self.encoder_rsample(encoded) - decoded = self.decoder_sample(z) + z = self.encode(sample) + decoded = self.decode(z) return decoded def calculate_gradient(self, data: dict) -> dict: From 28bc997c4153b85527a3231090ac15652a787e79 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 22 Sep 2019 22:47:47 -0700 Subject: [PATCH 090/162] IWAE implementation. --- README.md | 5 +- .../latent/mnist/mnist_fc_iwae.yaml | 47 ++++++++ vel/model/latent/cnn_vae.py | 4 +- vel/model/latent/fc_iwae.py | 106 ++++++++++++++++++ vel/model/latent/fc_vae.py | 6 +- vel/model/latent/iwae.py | 73 ++++++++++++ 6 files changed, 235 insertions(+), 6 deletions(-) create mode 100644 examples-configs/latent/mnist/mnist_fc_iwae.yaml create mode 100644 vel/model/latent/fc_iwae.py create mode 100644 vel/model/latent/iwae.py diff --git a/README.md b/README.md index f115666e..ffed0177 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,10 @@ that are ready to run and easy to modify for other similar usecases: # Implemented models - Unsupervised learning -- AutoEncoders (AE) and Variational AutoEncoders (VAE) with examples on MNIST dataset. +- A simple AutoEncoder (AE) with example on MNIST dataset. +- Latent variable models: + - Variational AutoEncoders (VAE) + - Importance Weighted AutoEncoder (IWAE) # Examples diff --git a/examples-configs/latent/mnist/mnist_fc_iwae.yaml b/examples-configs/latent/mnist/mnist_fc_iwae.yaml new file mode 100644 index 00000000..29979c21 --- /dev/null +++ b/examples-configs/latent/mnist/mnist_fc_iwae.yaml @@ -0,0 +1,47 @@ +name: 'mnist_fc_vae' + + +model: + name: vel.model.latent.fc_iwae + img_rows: 28 + img_cols: 28 + img_channels: 1 + layers: [200, 200] + representation_length: 50 + max_grad_norm: 1.0 + analytical_kl_div: true + k: 5 + + +source: + name: vel.data.source.vision.mnist + +loader: + name: vel.data.dataset_loader + batch_size: 128 + num_workers: 4 + pin_memory: true + + transformations: + - name: vel.data.transformation.to_array + - name: vel.data.transformation.binarize_image + - name: vel.data.transformation.image_to_tensor + - name: vel.data.transformation.unsupervised + + +optimizer: + name: vel.optimizer.radam + lr: 1.0e-3 + eps: 1.0e-4 + + +scheduler: + name: vel.scheduler.multi_step + gamma: 0.71968 # 10 * (-1/7) + milestones: [ 1, 4, 13, 40, 121, 364, 1093, 3280] + + +commands: + train: + name: vel.command.train_command + epochs: 3280 \ No newline at end of file diff --git a/vel/model/latent/cnn_vae.py b/vel/model/latent/cnn_vae.py index da57eacd..958877bf 100644 --- a/vel/model/latent/cnn_vae.py +++ b/vel/model/latent/cnn_vae.py @@ -10,7 +10,7 @@ from vel.api import ModelFactory from vel.module.layers import Flatten, Reshape -from vel.model.autoencoder.vae_base import VaeBase +from vel.model.latent.vae_base import VaeBase class CnnVAE(VaeBase): @@ -142,7 +142,7 @@ def prior_distribution(self) -> dist.Distribution: def decoder_sample(self, decoded: torch.Tensor) -> torch.Tensor: """ Sample from a decoder distribution - we ignore that since it's so weak in this case """ - return self.decoder_network(decoded) + return decoded def create(img_rows, img_cols, img_channels, channels=None, representation_length=32): diff --git a/vel/model/latent/fc_iwae.py b/vel/model/latent/fc_iwae.py new file mode 100644 index 00000000..ab6d8602 --- /dev/null +++ b/vel/model/latent/fc_iwae.py @@ -0,0 +1,106 @@ +import torch +import torch.distributions as dist +import torch.nn as nn +import torch.nn.functional as F +import torch.nn.init as init + +from vel.api import ModelFactory +from vel.module.layers import Flatten, Reshape + +from vel.model.latent.iwae import IWAE + + +class FcIwae(IWAE): + """ + A simple IWAE, containing 2 fully connected layers. + """ + + def __init__(self, img_rows, img_cols, img_channels, k=5, layers=None, representation_length=32, + analytical_kl_div=False, max_grad_norm=None): + super().__init__(k=k, analytical_kl_div=analytical_kl_div, max_grad_norm=max_grad_norm) + + if layers is None: + layers = [512, 256] + + self.representation_length = representation_length + self.layers = layers + + input_length = img_rows * img_cols * img_channels + + self.encoder = nn.Sequential( + Flatten(), + nn.Linear(in_features=input_length, out_features=self.layers[0]), + nn.Tanh(), + nn.Linear(in_features=self.layers[0], out_features=self.layers[1]), + nn.Tanh(), + nn.Linear(self.layers[1], representation_length * 2) + ) + + self.decoder = nn.Sequential( + nn.Linear(in_features=representation_length, out_features=self.layers[1]), + nn.Tanh(), + nn.Linear(in_features=self.layers[1], out_features=self.layers[0]), + nn.Tanh(), + nn.Linear(in_features=self.layers[0], out_features=input_length), + Reshape(img_channels, img_rows, img_cols), + nn.Sigmoid() + ) + + self.register_buffer('prior_mean', torch.tensor([[0.0]])) + self.register_buffer('prior_std', torch.tensor([[1.0]])) + + def encoder_network(self, sample: torch.Tensor) -> torch.Tensor: + """ Transform input sample into an encoded representation """ + return self.encoder(sample) + + def encoder_distribution(self, encoded: torch.Tensor) -> dist.Distribution: + """ Create a pytorch distribution object representing the encoder distribution (approximate posterior) """ + mu = encoded[:, :self.representation_length] + std = F.softplus(encoded[:, self.representation_length:]) + + return dist.Independent(dist.Normal(mu, std), 1) + + def decoder_network(self, z: torch.Tensor) -> torch.Tensor: + """ Transform encoded value into a decoded representation """ + return self.decoder(z) + + def decoder_distribution(self, decoded: torch.Tensor) -> dist.Distribution: + """ Create a pytorch distribution object representing the decoder distribution (likelihood) """ + return dist.Independent(dist.Bernoulli(probs=decoded), 3) + + def prior_distribution(self) -> dist.Distribution: + """ Return a prior distribution object """ + return dist.Independent(dist.Normal(self.prior_mean, self.prior_std), 1) + + def decoder_sample(self, decoded: torch.Tensor) -> torch.Tensor: + """ Sample from a decoder distribution - we ignore that since it's so weak in this case """ + return decoded + +# @staticmethod +# def _weight_initializer(tensor): +# init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('tanh')) +# init.constant_(tensor.bias, 0.01) +# +# def reset_weights(self): +# for m in it.chain(self.encoder, self.decoder): +# if isinstance(m, nn.Conv2d): +# self._weight_initializer(m) +# elif isinstance(m, nn.ConvTranspose2d): +# self._weight_initializer(m) +# elif isinstance(m, nn.Linear): +# self._weight_initializer(m) + + +def create(img_rows, img_cols, img_channels, k=5, layers=None, representation_length=32, max_grad_norm=None, + analytical_kl_div=True): + """ Vel factory function """ + if layers is None: + layers = [512, 256] + + def instantiate(**_): + return FcIwae( + img_rows, img_cols, img_channels, k=k, layers=layers, representation_length=representation_length, + max_grad_norm=max_grad_norm, analytical_kl_div=analytical_kl_div + ) + + return ModelFactory.generic(instantiate) diff --git a/vel/model/latent/fc_vae.py b/vel/model/latent/fc_vae.py index 49e91fbd..50d7d99c 100644 --- a/vel/model/latent/fc_vae.py +++ b/vel/model/latent/fc_vae.py @@ -7,7 +7,7 @@ from vel.api import ModelFactory from vel.module.layers import Flatten, Reshape -from vel.model.autoencoder.vae_base import VaeBase +from vel.model.latent.vae_base import VaeBase class FcVae(VaeBase): @@ -37,7 +37,7 @@ def __init__(self, img_rows, img_cols, img_channels, layers=None, representation ) self.decoder = nn.Sequential( - nn.Linear(representation_length, self.layers[1]), + nn.Linear(in_features=representation_length, out_features=self.layers[1]), nn.Tanh(), nn.Linear(in_features=self.layers[1], out_features=self.layers[0]), nn.Tanh(), @@ -74,7 +74,7 @@ def prior_distribution(self) -> dist.Distribution: def decoder_sample(self, decoded: torch.Tensor) -> torch.Tensor: """ Sample from a decoder distribution - we ignore that since it's so weak in this case """ - return self.decoder_network(decoded) + return decoded # @staticmethod # def _weight_initializer(tensor): diff --git a/vel/model/latent/iwae.py b/vel/model/latent/iwae.py new file mode 100644 index 00000000..46359c41 --- /dev/null +++ b/vel/model/latent/iwae.py @@ -0,0 +1,73 @@ +import torch.nn.utils + +from vel.model.latent.vae_base import VaeBase + + +class IWAE(VaeBase): + """ + Importance-Weighted Auto-Encoder https://arxiv.org/abs/1509.00519 + """ + + def __init__(self, k: int = 5, analytical_kl_div=True, max_grad_norm=1.0): + super().__init__(analytical_kl_div=analytical_kl_div, max_grad_norm=max_grad_norm) + + self.k = k + + def calculate_gradient(self, data: dict) -> dict: + """ Calculate model gradient for given data sample """ + encoded = self.encoder_network(data['x']) + z_dist = self.encoder_distribution(encoded) + + bs = encoded.size(0) + # Encode importance samples into batch dimension for the decoded network + z = z_dist.rsample([self.k]).reshape([bs * self.k, -1]) + + decoded = self.decoder_network(z) + decoded = decoded.reshape([self.k, bs] + list(decoded.shape[1:])) + + # Unpack to make distribution efficient for broadcasting + x_dist = self.decoder_distribution(decoded) + prior = self.prior_distribution() + + kl_divergence = self.kl_divergence(z, z_dist, prior) + reconstruction = x_dist.log_prob(data['y']) + + # ELBO is E_q log p(x, z) / q(z | x) + # Which can be expressed in many equivalent forms: + # (1) E_q log p(x | z) + log p(z) - log q(z | x) + # (2) E_q log p(x | z) - D_KL(p(z) || q(z | x)) + # (3) E_q log p(x) - D_KL(p(z | x) || q(z | x)Biblio) + + # Form 3 is interesting from a theoretical standpoint, but is intractable to compute directly + # While forms (1) and (2) can be computed directly. + # Positive aspect of form (2) is that KL divergence can be calculated analytically + # further reducing the variance of the gradient + elbo = reconstruction - kl_divergence + + # Perform log-mean-exp on the axis of importance samples + # Then mean across batch + elbo = self.log_mean_exp(elbo, 0).mean() + + loss = -elbo + + if self.training: + loss.backward() + + if self.max_grad_norm is not None: + grad_norm = torch.nn.utils.clip_grad_norm_( + filter(lambda p: p.requires_grad, self.parameters()), + max_norm=self.max_grad_norm + ) + else: + grad_norm = 0.0 + else: + grad_norm = 0.0 + + with torch.no_grad(): + return { + 'loss': loss.item(), + + 'grad_norm': grad_norm, + 'reconstruction': -reconstruction.mean().item(), + 'kl_divergence': kl_divergence.mean().item() + } From 6ffbb4cb29270123ce1403ca79dcdb166f3d3ef0 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Tue, 24 Sep 2019 20:35:33 -0700 Subject: [PATCH 091/162] IWAE implementation. --- .../latent/mnist/mnist_cnn_iwae.yaml | 48 ++++++ .../latent/mnist/mnist_fc_iwae.yaml | 5 +- vel/api/model.py | 6 +- vel/model/latent/cnn_iwae.py | 158 ++++++++++++++++++ vel/model/latent/vae_base.py | 1 + 5 files changed, 213 insertions(+), 5 deletions(-) create mode 100644 examples-configs/latent/mnist/mnist_cnn_iwae.yaml create mode 100644 vel/model/latent/cnn_iwae.py diff --git a/examples-configs/latent/mnist/mnist_cnn_iwae.yaml b/examples-configs/latent/mnist/mnist_cnn_iwae.yaml new file mode 100644 index 00000000..df3164fe --- /dev/null +++ b/examples-configs/latent/mnist/mnist_cnn_iwae.yaml @@ -0,0 +1,48 @@ +name: 'mnist_cnn_iwae' + + +model: + name: vel.model.latent.cnn_iwae + img_rows: 28 + img_cols: 28 + img_channels: 1 + channels: [64, 128, 256] + representation_length: 50 + max_grad_norm: 1.0 + analytical_kl_div: true + k: 5 # It's hard to sample many samples for this slightly larger network + + +source: + name: vel.data.source.vision.mnist + + +loader: + name: vel.data.dataset_loader + batch_size: 128 + num_workers: 4 + pin_memory: true + + transformations: + - name: vel.data.transformation.to_array + - name: vel.data.transformation.binarize_image + - name: vel.data.transformation.image_to_tensor + - name: vel.data.transformation.unsupervised + + +optimizer: + name: vel.optimizer.radam + lr: 1.0e-3 + eps: 1.0e-4 + + +scheduler: + name: vel.scheduler.multi_step + gamma: 0.71968 # 10 * (-1/7) + milestones: [ 1, 4, 13, 40, 121, 364, 1093, 3280] + + +commands: + train: + name: vel.command.train_command + epochs: 3280 \ No newline at end of file diff --git a/examples-configs/latent/mnist/mnist_fc_iwae.yaml b/examples-configs/latent/mnist/mnist_fc_iwae.yaml index 29979c21..e4ca4abb 100644 --- a/examples-configs/latent/mnist/mnist_fc_iwae.yaml +++ b/examples-configs/latent/mnist/mnist_fc_iwae.yaml @@ -1,4 +1,4 @@ -name: 'mnist_fc_vae' +name: 'mnist_fc_iwae' model: @@ -10,12 +10,13 @@ model: representation_length: 50 max_grad_norm: 1.0 analytical_kl_div: true - k: 5 + k: 50 # Because it's such a small network we can try many importance samples source: name: vel.data.source.vision.mnist + loader: name: vel.data.dataset_loader batch_size: 128 diff --git a/vel/api/model.py b/vel/api/model.py index 53406a3e..9cb31443 100644 --- a/vel/api/model.py +++ b/vel/api/model.py @@ -15,7 +15,7 @@ def metrics(self) -> list: return [] def train(self, mode=True): - r""" + """ Sets the module in training mode. This has any effect only on certain modules. See documentations of @@ -68,8 +68,8 @@ class GradientModel(Model): def calculate_gradient(self, data: dict) -> dict: """ - Calculate gradient for given batch of supervised learning. - Returns a dictionary of metrics + Calculate gradient for given batch of training data. + Returns a dictionary of metrics. """ raise NotImplementedError diff --git a/vel/model/latent/cnn_iwae.py b/vel/model/latent/cnn_iwae.py new file mode 100644 index 00000000..c4b79ded --- /dev/null +++ b/vel/model/latent/cnn_iwae.py @@ -0,0 +1,158 @@ +import itertools as it + +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F +import torch.distributions as dist + +import vel.util.network as net_util + +from vel.api import ModelFactory +from vel.module.layers import Flatten, Reshape +from vel.model.latent.iwae import IWAE + + +class CnnIWAE(IWAE): + """ + A simple IWAE, containing 3 convolutional layers + """ + + def __init__(self, img_rows, img_cols, img_channels, k=5, channels=None, representation_length=32, + analytical_kl_div=True, max_grad_norm=0.5): + super().__init__(k=k, analytical_kl_div=analytical_kl_div, max_grad_norm=max_grad_norm) + + if channels is None: + channels = [16, 32, 32] + + layer_series = [ + (3, 1, 1), + (3, 1, 2), + (3, 1, 2), + ] + + self.representation_length = representation_length + + self.final_width = net_util.convolutional_layer_series(img_rows, layer_series) + self.final_height = net_util.convolutional_layer_series(img_cols, layer_series) + self.channels = channels + + self.encoder = nn.Sequential( + nn.Conv2d(in_channels=img_channels, out_channels=channels[0], kernel_size=(3, 3), padding=1), + # nn.ReLU(True), + nn.SELU(True), + nn.LayerNorm([ + channels[0], + net_util.convolutional_layer_series(img_rows, layer_series[:1]), + net_util.convolutional_layer_series(img_cols, layer_series[:1]), + ]), + nn.Conv2d(in_channels=channels[0], out_channels=channels[1], kernel_size=(3, 3), stride=2, padding=1), + # nn.ReLU(True), + nn.SELU(True), + nn.LayerNorm([ + channels[1], + net_util.convolutional_layer_series(img_rows, layer_series[:2]), + net_util.convolutional_layer_series(img_cols, layer_series[:2]), + ]), + nn.Conv2d(in_channels=channels[1], out_channels=channels[2], kernel_size=(3, 3), stride=2, padding=1), + # nn.ReLU(True), + nn.SELU(True), + nn.LayerNorm([ + channels[2], + net_util.convolutional_layer_series(img_rows, layer_series), + net_util.convolutional_layer_series(img_cols, layer_series), + ]), + Flatten(), + nn.Linear(self.final_width * self.final_height * channels[2], representation_length * 2) + ) + + self.decoder = nn.Sequential( + nn.Linear(representation_length, self.final_width * self.final_height * channels[2]), + # nn.ReLU(True), + nn.SELU(True), + Reshape(channels[2], self.final_width, self.final_height), + nn.LayerNorm([ + channels[2], + net_util.convolutional_layer_series(img_rows, layer_series), + net_util.convolutional_layer_series(img_cols, layer_series), + ]), + nn.ConvTranspose2d( + in_channels=channels[2], out_channels=channels[1], kernel_size=3, stride=2, padding=1, output_padding=1 + ), + # nn.ReLU(True), + nn.SELU(True), + nn.LayerNorm([ + channels[1], + net_util.convolutional_layer_series(img_rows, layer_series[:2]), + net_util.convolutional_layer_series(img_cols, layer_series[:2]), + ]), + nn.ConvTranspose2d( + in_channels=channels[1], out_channels=channels[0], kernel_size=3, stride=2, padding=1, output_padding=1 + ), + # nn.ReLU(True), + nn.SELU(True), + nn.LayerNorm([ + channels[0], + net_util.convolutional_layer_series(img_rows, layer_series[:1]), + net_util.convolutional_layer_series(img_cols, layer_series[:1]), + ]), + nn.ConvTranspose2d(in_channels=channels[0], out_channels=img_channels, kernel_size=3, padding=1), + nn.Sigmoid() + ) + + self.register_buffer('prior_mean', torch.tensor([[0.0]])) + self.register_buffer('prior_std', torch.tensor([[1.0]])) + + @staticmethod + def _weight_initializer(tensor): + init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('relu')) + init.constant_(tensor.bias, 0.0) + + def reset_weights(self): + for m in it.chain(self.encoder, self.decoder): + if isinstance(m, nn.Conv2d): + self._weight_initializer(m) + elif isinstance(m, nn.ConvTranspose2d): + self._weight_initializer(m) + elif isinstance(m, nn.Linear): + self._weight_initializer(m) + + def encoder_network(self, sample: torch.Tensor) -> torch.Tensor: + """ Transform input sample into an encoded representation """ + return self.encoder(sample) + + def encoder_distribution(self, encoded: torch.Tensor) -> dist.Distribution: + """ Create a pytorch distribution object representing the encoder distribution (approximate posterior) """ + mu = encoded[:, :self.representation_length] + std = F.softplus(encoded[:, self.representation_length:]) + + return dist.Independent(dist.Normal(mu, std), 1) + + def decoder_network(self, z: torch.Tensor) -> torch.Tensor: + """ Transform encoded value into a decoded representation """ + return self.decoder(z) + + def decoder_distribution(self, decoded: torch.Tensor) -> dist.Distribution: + """ Create a pytorch distribution object representing the decoder distribution (likelihood) """ + return dist.Independent(dist.Bernoulli(probs=decoded), 3) + + def prior_distribution(self) -> dist.Distribution: + """ Return a prior distribution object """ + return dist.Independent(dist.Normal(self.prior_mean, self.prior_std), 1) + + def decoder_sample(self, decoded: torch.Tensor) -> torch.Tensor: + """ Sample from a decoder distribution - we ignore that since it's so weak in this case """ + return decoded + + +def create(img_rows, img_cols, img_channels, k=5, channels=None, representation_length=32): + """ Vel factory function """ + if channels is None: + channels = [16, 32, 32] + + def instantiate(**_): + return CnnIWAE( + img_rows, img_cols, img_channels, k=k, channels=channels, representation_length=representation_length + ) + + return ModelFactory.generic(instantiate) diff --git a/vel/model/latent/vae_base.py b/vel/model/latent/vae_base.py index 51bbb070..03fa88f7 100644 --- a/vel/model/latent/vae_base.py +++ b/vel/model/latent/vae_base.py @@ -8,6 +8,7 @@ class VaeBase(GradientModel): """ Base module for variational auto-encoder implementations """ + def __init__(self, analytical_kl_div=True, max_grad_norm=1.0): super().__init__() From 0b9d7313dcd8159d0f565313a435713b73bb08d2 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Tue, 24 Sep 2019 21:16:25 -0700 Subject: [PATCH 092/162] Added VQ-VAE repo to bibliograpgy. --- docs/Bibliography.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/Bibliography.md b/docs/Bibliography.md index 8ed79213..2490513a 100644 --- a/docs/Bibliography.md +++ b/docs/Bibliography.md @@ -157,4 +157,5 @@ in the following repositories (in alphabetical order): - https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer - https://github.com/openai/baselines - https://github.com/pytorch/pytorch +- https://github.com/ritheshkumar95/pytorch-vqvae/ - https://github.com/tensorflow/tensorflow \ No newline at end of file From 5a7c2980db697ee8bff5fcd57567caf1921ab8f6 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Tue, 24 Sep 2019 21:16:39 -0700 Subject: [PATCH 093/162] Run tag support, and cleaned up training info. --- README.md | 14 +-- examples-scripts/rl/atari/a2c/breakout_a2c.py | 101 ---------------- .../rl/atari/a2c/breakout_a2c_evaluate.py | 66 ---------- examples-scripts/rl/atari/ppo/qbert_ppo.py | 114 ------------------ .../rl/mujoco/ddpg/half_cheetah_ddpg.py | 110 ----------------- vel/api/info.py | 3 +- vel/api/model_config.py | 114 ++++++++++++++---- vel/command/phase_train_command.py | 1 - vel/command/rnn/generate_text.py | 3 +- vel/command/train_command.py | 1 - vel/data/source/nlp/text_url.py | 2 +- vel/internal/provider.py | 3 +- vel/launcher.py | 3 +- vel/notebook/loader.py | 10 +- vel/rl/command/enjoy.py | 3 +- vel/rl/command/evaluate_env_command.py | 2 +- vel/rl/command/record_movie_command.py | 3 +- vel/storage/streaming/stdout.py | 16 ++- 18 files changed, 122 insertions(+), 447 deletions(-) delete mode 100644 examples-scripts/rl/atari/a2c/breakout_a2c.py delete mode 100644 examples-scripts/rl/atari/a2c/breakout_a2c_evaluate.py delete mode 100644 examples-scripts/rl/atari/ppo/qbert_ppo.py delete mode 100644 examples-scripts/rl/mujoco/ddpg/half_cheetah_ddpg.py diff --git a/README.md b/README.md index ffed0177..5f678ffd 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,7 @@ Bring **velocity** to deep-learning research. This project hosts a collection of **highly modular** deep learning components that are tested to be working well together. -A simple yaml-based system ties these modules together declaratively using configuration files, -but everything that can be defined using config files can be coded directly in the python script as well. +A simple yaml-based system ties these modules together declaratively using configuration files. This is still an early version and a hobby project so documentation is unfortunately nonexistent. I've tried to make the @@ -33,7 +32,7 @@ into a structure that is designed to be reused rather than copied over. As a goal, it should be enough to write a config file that wires existing components together and defines their hyperparameters for most common applications. -If that's not the case few bits of custom glue code should do the jobatari. +If that's not the case few bits of custom glue code should do the job. This repository is still in an early stage of that journey but it will grow @@ -54,7 +53,7 @@ pip install -e . ``` from the repository root directory. -This project requires Python at least 3.6 and PyTorch 1.1. +This project requires Python at least 3.6 and PyTorch 1.2. If you want to run YAML config examples, you'll also need a **project configuration file** `.velproject.yaml`. An example is included in this repository. @@ -86,7 +85,7 @@ To use it, just rename it to `.velproject.yaml`. Several models are already implemented in the framework and have example config files that are ready to run and easy to modify for other similar usecases: -- State-of-the art results on Cifar10 dataset using residual networks +- Residual networks (resnets) trained on Cifar10 dataset replicating published performance - Cats vs dogs classification using transfer learning from a resnet34 model pretrained on ImageNet @@ -99,8 +98,8 @@ that are ready to run and easy to modify for other similar usecases: # Implemented models - Reinforcement learning - Continuous and discrete action spaces -- Basic support for LSTM policies for A2C and PPO -- Following published policy gradient reinforcement learning algorithms: +- Basic support for recurrent policies for A2C and PPO +- Following policy gradient reinforcement learning algorithms: - Advantage Actor-Critic (A2C) - Deep Deterministic Policy Gradient (DDPG) - Proximal Policy Optimization (PPO) @@ -122,6 +121,7 @@ that are ready to run and easy to modify for other similar usecases: - Latent variable models: - Variational AutoEncoders (VAE) - Importance Weighted AutoEncoder (IWAE) + - Vector-Quantised Variational AutoEncoder (VQ-VAE) # Examples diff --git a/examples-scripts/rl/atari/a2c/breakout_a2c.py b/examples-scripts/rl/atari/a2c/breakout_a2c.py deleted file mode 100644 index 4cb9560a..00000000 --- a/examples-scripts/rl/atari/a2c/breakout_a2c.py +++ /dev/null @@ -1,101 +0,0 @@ -import torch -import torch.optim as optim - -from vel.rl.metrics import EpisodeRewardMetric -from vel.storage.streaming.stdout import StdoutStreaming -from vel.util.random import set_seed - -from vel.rl.env.classic_atari import ClassicAtariEnv -from vel.rl.vecenv.subproc import SubprocVecEnvWrapper - -from vel.module.input.image_to_tensor import ImageToTensorFactory -from vel.rl.policy.stochastic_policy import StochasticPolicy -from vel.rl.backbone.nature_cnn import NatureCnnFactory - - -from vel.rl.reinforcer.on_policy_iteration_reinforcer import ( - OnPolicyIterationReinforcer, OnPolicyIterationReinforcerSettings -) - -from vel.rl.algo.policy_gradient.a2c import A2CPolicyGradient -from vel.rl.env_roller.step_env_roller import StepEnvRoller - -from vel.api.info import TrainingInfo, EpochInfo - - -def breakout_a2c(): - device = torch.device('cuda:0') - seed = 1001 - - # Set random seed in python std lib, numpy and pytorch - set_seed(seed) - - # Create 16 environments evaluated in parallel in sub processess with all usual DeepMind wrappers - # These are just helper functions for that - vec_env = SubprocVecEnvWrapper( - ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 - ).instantiate(parallel_envs=16, seed=seed) - - # Again, use a helper to create a model - # But because model is owned by the reinforcer, model should not be accessed using this variable - # but from reinforcer.model property - model = StochasticPolicy( - input_block=ImageToTensorFactory(), - backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) - ).instantiate(action_space=vec_env.action_space) - - # Reinforcer - an object managing the learning process - reinforcer = OnPolicyIterationReinforcer( - device=device, - settings=OnPolicyIterationReinforcerSettings( - batch_size=256, - number_of_steps=5, - ), - model=model, - algo=A2CPolicyGradient( - entropy_coefficient=0.01, - value_coefficient=0.5, - max_grad_norm=0.5, - discount_factor=0.99, - ), - env_roller=StepEnvRoller( - environment=vec_env, - device=device, - ) - ) - - # Model optimizer - optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=7.0e-4, eps=1e-3) - - # Overall information store for training information - training_info = TrainingInfo( - metrics=[ - EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode - ], - callbacks=[StdoutStreaming()] # Print live metrics every epoch to standard output - ) - - # A bit of training initialization bookkeeping... - training_info.initialize() - reinforcer.initialize_training(training_info) - training_info.on_train_begin() - - # Let's make 100 batches per epoch to average metrics nicely - num_epochs = int(1.1e7 / (5 * 16) / 100) - - # Normal handrolled training loop - for i in range(1, num_epochs+1): - epoch_info = EpochInfo( - training_info=training_info, - global_epoch_idx=i, - batches_per_epoch=100, - optimizer=optimizer - ) - - reinforcer.train_epoch(epoch_info) - - training_info.on_train_end() - - -if __name__ == '__main__': - breakout_a2c() diff --git a/examples-scripts/rl/atari/a2c/breakout_a2c_evaluate.py b/examples-scripts/rl/atari/a2c/breakout_a2c_evaluate.py deleted file mode 100644 index 3c31f6ab..00000000 --- a/examples-scripts/rl/atari/a2c/breakout_a2c_evaluate.py +++ /dev/null @@ -1,66 +0,0 @@ -import torch -import pandas as pd -import numpy as np - -from vel.modules.input.image_to_tensor import ImageToTensorFactory -from vel.openai.baselines.common.atari_wrappers import FrameStack -from vel.rl.env.classic_atari import ClassicAtariEnv -from vel.rl.models.backbone.nature_cnn import NatureCnnFactory -from vel.rl.models.stochastic_policy_model import StochasticPolicyModelFactory - - -def breakout_a2c_evaluate(checkpoint_file_path, takes=10): - model_checkpoint = torch.load(checkpoint_file_path) - device = torch.device('cuda:0') - - env = FrameStack( - ClassicAtariEnv('BreakoutNoFrameskip-v4').instantiate(preset='record'), k=4 - ) - - model = StochasticPolicyModelFactory( - input_block=ImageToTensorFactory(), - backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) - ).instantiate(action_space=env.action_space) - - model.load_state_dict(model_checkpoint) - model = model.to(device) - - model.eval() - - rewards = [] - lengths = [] - - for i in range(takes): - result = record_take(model, env, device) - rewards.append(result['r']) - lengths.append(result['l']) - - print(pd.DataFrame({'lengths': lengths, 'rewards': rewards}).describe()) - - -@torch.no_grad() -def record_take(model, env_instance, device): - frames = [] - - observation = env_instance.reset() - - frames.append(env_instance.render('rgb_array')) - - print("Evaluating environment...") - - while True: - observation_array = np.expand_dims(np.array(observation), axis=0) - observation_tensor = torch.from_numpy(observation_array).to(device) - actions = model.step(observation_tensor, deterministic=True)['actions'] - - observation, reward, done, epinfo = env_instance.step(actions.item()) - - frames.append(env_instance.render('rgb_array')) - - if 'episode' in epinfo: - # End of an episode - return epinfo['episode'] - - -if __name__ == '__main__': - breakout_a2c_evaluate("checkpoint_00001375.data", takes=2) diff --git a/examples-scripts/rl/atari/ppo/qbert_ppo.py b/examples-scripts/rl/atari/ppo/qbert_ppo.py deleted file mode 100644 index 98388dd1..00000000 --- a/examples-scripts/rl/atari/ppo/qbert_ppo.py +++ /dev/null @@ -1,114 +0,0 @@ -import torch -import torch.optim as optim - -from vel.rl.metrics import EpisodeRewardMetric -from vel.storage.streaming.stdout import StdoutStreaming -from vel.util.random import set_seed -from vel.api.info import TrainingInfo, EpochInfo - -from vel.modules.input.image_to_tensor import ImageToTensorFactory -from vel.rl.env.classic_atari import ClassicAtariEnv -from vel.rl.vecenv.subproc import SubprocVecEnvWrapper -from vel.rl.models.stochastic_policy_model import StochasticPolicyModelFactory -from vel.rl.models.backbone.nature_cnn import NatureCnnFactory - -from vel.rl.reinforcers.on_policy_iteration_reinforcer import ( - OnPolicyIterationReinforcer, OnPolicyIterationReinforcerSettings -) - -from vel.rl.algo.policy_gradient.ppo import PpoPolicyGradient -from vel.rl.env_roller.step_env_roller import StepEnvRoller -from vel.rl.commands.rl_train_command import FrameTracker - -from vel.schedules.linear import LinearSchedule - - -def qbert_ppo(): - device = torch.device('cuda:0') - seed = 1001 - - # Set random seed in python std lib, numpy and pytorch - set_seed(seed) - - # Create 16 environments evaluated in parallel in sub processess with all usual DeepMind wrappers - # These are just helper functions for that - vec_env = SubprocVecEnvWrapper( - ClassicAtariEnv('QbertNoFrameskip-v4'), frame_history=4 - ).instantiate(parallel_envs=8, seed=seed) - - # Again, use a helper to create a model - # But because model is owned by the reinforcer, model should not be accessed using this variable - # but from reinforcer.model property - model = StochasticPolicyModelFactory( - input_block=ImageToTensorFactory(), - backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) - ).instantiate(action_space=vec_env.action_space) - - # Set schedule for gradient clipping. - cliprange = LinearSchedule( - initial_value=0.1, - final_value=0.0 - ) - - # Reinforcer - an object managing the learning process - reinforcer = OnPolicyIterationReinforcer( - device=device, - settings=OnPolicyIterationReinforcerSettings( - batch_size=256, - experience_replay=4, - number_of_steps=128 - ), - model=model, - algo=PpoPolicyGradient( - entropy_coefficient=0.01, - value_coefficient=0.5, - max_grad_norm=0.5, - discount_factor=0.99, - gae_lambda=0.95, - cliprange=cliprange - ), - env_roller=StepEnvRoller( - environment=vec_env, - device=device, - ) - ) - - # Model optimizer - optimizer = optim.Adam(reinforcer.model.parameters(), lr=2.5e-4, eps=1.0e-5) - - # Overall information store for training information - training_info = TrainingInfo( - metrics=[ - EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode - ], - callbacks=[ - StdoutStreaming(), # Print live metrics every epoch to standard output - FrameTracker(1.1e7) # We need frame tracker to track the progress of learning - ] - ) - - # A bit of training initialization bookkeeping... - training_info.initialize() - reinforcer.initialize_training(training_info) - training_info.on_train_begin() - - # Let's make 10 batches per epoch to average metrics nicely - # Rollout size is 8 environments times 128 steps - num_epochs = int(1.1e7 / (128 * 8) / 10) - - # Normal handrolled training loop - for i in range(1, num_epochs+1): - epoch_info = EpochInfo( - training_info=training_info, - global_epoch_idx=i, - batches_per_epoch=10, - optimizer=optimizer - ) - - reinforcer.train_epoch(epoch_info) - - training_info.on_train_end() - - -if __name__ == '__main__': - qbert_ppo() diff --git a/examples-scripts/rl/mujoco/ddpg/half_cheetah_ddpg.py b/examples-scripts/rl/mujoco/ddpg/half_cheetah_ddpg.py deleted file mode 100644 index 49900c0e..00000000 --- a/examples-scripts/rl/mujoco/ddpg/half_cheetah_ddpg.py +++ /dev/null @@ -1,110 +0,0 @@ -import torch -import torch.optim - -from vel.api import TrainingInfo, EpochInfo -from vel.module.input.normalize_observations import NormalizeObservationsFactory -from vel.rl.buffer.circular_replay_buffer import CircularReplayBuffer -from vel.rl.env_roller.transition_replay_env_roller import TransitionReplayEnvRoller -from vel.rl.metrics import EpisodeRewardMetric -from vel.rl.module.noise.ou_noise import OuNoise -from vel.storage.streaming.stdout import StdoutStreaming -from vel.util.random import set_seed -from vel.rl.env.mujoco import MujocoEnv -from vel.rl.model.deterministic_policy_model import DeterministicPolicyModelFactory -from vel.rl.backbone.mlp import MLPFactory -from vel.rl.reinforcer.buffered_off_policy_iteration_reinforcer import ( - BufferedOffPolicyIterationReinforcer, BufferedOffPolicyIterationReinforcerSettings -) -from vel.rl.algo.policy_gradient.ddpg import DeepDeterministicPolicyGradient -from vel.rl.vecenv.dummy import DummyVecEnvWrapper -from vel.optimizer.adam import AdamFactory - - -def half_cheetah_ddpg(): - device = torch.device('cuda:0') - seed = 1002 - - # Set random seed in python std lib, numpy and pytorch - set_seed(seed) - - vec_env = DummyVecEnvWrapper( - MujocoEnv('HalfCheetah-v2') - ).instantiate(parallel_envs=1, seed=seed) - - model_factory = DeterministicPolicyModelFactory( - input_block=NormalizeObservationsFactory(input_shape=17), - policy_backbone=MLPFactory(input_length=17, hidden_layers=[64, 64], activation='tanh'), - value_backbone=MLPFactory(input_length=23, hidden_layers=[64, 64], activation='tanh'), - ) - - model = model_factory.instantiate(action_space=vec_env.action_space) - - reinforcer = BufferedOffPolicyIterationReinforcer( - device=device, - environment=vec_env, - settings=BufferedOffPolicyIterationReinforcerSettings( - rollout_steps=2, - training_steps=64, - ), - model=model, - algo=DeepDeterministicPolicyGradient( - model_factory=model_factory, - discount_factor=0.99, - tau=0.01, - ), - env_roller=TransitionReplayEnvRoller( - environment=vec_env, - device=device, - action_noise=OuNoise(std_dev=0.2, environment=vec_env), - replay_buffer=CircularReplayBuffer( - buffer_capacity=1_000_000, - buffer_initial_size=2_000, - num_envs=vec_env.num_envs, - observation_space=vec_env.observation_space, - action_space=vec_env.action_space - ), - normalize_returns=True, - discount_factor=0.99 - ), - ) - - # Optimizer helper - A weird regularization settings I've copied from OpenAI code - adam_optimizer = AdamFactory( - lr=[1.0e-4, 1.0e-3, 1.0e-3], - weight_decay=[0.0, 0.0, 0.001], - eps=1.0e-4, - layer_groups=True - ).instantiate(model) - - # Overall information store for training information - training_info = TrainingInfo( - metrics=[ - EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode - ], - callbacks=[StdoutStreaming()] # Print live metrics every epoch to standard output - ) - - # A bit of training initialization bookkeeping... - training_info.initialize() - reinforcer.initialize_training(training_info) - training_info.on_train_begin() - - # Let's make 20 batches per epoch to average metrics nicely - num_epochs = int(1.0e6 / 2 / 1000) - - # Normal handrolled training loop - for i in range(1, num_epochs+1): - epoch_info = EpochInfo( - training_info=training_info, - global_epoch_idx=i, - batches_per_epoch=1000, - optimizer=adam_optimizer - ) - - reinforcer.train_epoch(epoch_info) - - training_info.on_train_end() - - -if __name__ == '__main__': - half_cheetah_ddpg() diff --git a/vel/api/info.py b/vel/api/info.py index 4e5957d4..11544cff 100644 --- a/vel/api/info.py +++ b/vel/api/info.py @@ -33,13 +33,12 @@ class TrainingInfo(abc.MutableMapping): Data dict is any extra information processes may want to store """ - def __init__(self, start_epoch_idx=0, run_name: typing.Optional[str] = None, metrics=None, callbacks=None): + def __init__(self, start_epoch_idx=0, metrics=None, callbacks=None): self.data_dict = {} self.start_epoch_idx = start_epoch_idx self.metrics = metrics if metrics is not None else [] self.callbacks = callbacks if callbacks is not None else [] - self.run_name = run_name self.history = TrainingHistory() self.optimizer_initial_state = None diff --git a/vel/api/model_config.py b/vel/api/model_config.py index bd100eeb..b593e878 100644 --- a/vel/api/model_config.py +++ b/vel/api/model_config.py @@ -1,5 +1,7 @@ import datetime as dtm +import json import os.path +import pathlib import typing from vel.exception import VelInitializationException @@ -16,6 +18,7 @@ class ModelConfig: """ PROJECT_FILE_NAME = '.velproject.yaml' + META_FILE_NAME = 'meta.json' @staticmethod def find_project_directory(start_path) -> str: @@ -39,7 +42,7 @@ def from_project_directory(path) -> str: @classmethod def from_file(cls, filename: str, run_number: int = 1, continue_training: bool = False, seed: int = None, - device: str = 'cuda', params=None): + device: str = 'cuda', parameters: typing.Optional[dict] = None, tag: typing.Optional[str] = None): """ Create model config from file """ with open(filename, 'r') as fp: model_config_contents = Parser.parse(fp) @@ -62,12 +65,14 @@ def from_file(cls, filename: str, run_number: int = 1, continue_training: bool = continue_training=continue_training, seed=seed, device=device, - parameters=params + parameters=parameters, + tag=tag ) @classmethod def script(cls, model_name: str = 'script', configuration: typing.Optional[dict] = None, run_number: int = 1, - continue_training=False, seed: int = None, device: str = 'cuda', params=None): + continue_training=False, seed: int = None, device: str = 'cuda', + parameters: typing.Optional[dict] = None, tag: typing.Optional[str] = None): """ Create model config from supplied data """ if configuration is None: configuration = {} @@ -92,11 +97,13 @@ def script(cls, model_name: str = 'script', configuration: typing.Optional[dict] continue_training=continue_training, seed=seed, device=device, - parameters=params + parameters=parameters, + tag=tag ) def __init__(self, filename: str, configuration: dict, run_number: int, project_dir: str, - continue_training=False, seed: int = None, device: str = 'cuda', parameters=None): + continue_training=False, seed: int = None, device: str = 'cuda', + parameters: typing.Optional[dict] = None, tag: typing.Optional[str] = None): self.filename = filename self.device = device self.continue_training = continue_training @@ -121,13 +128,48 @@ def __init__(self, filename: str, configuration: dict, run_number: int, project_ self._model_name = self.provider.get("name") + if continue_training: + self._meta = self._load_meta() + + if tag is None: + self._tag = self._meta['tag'] + else: + if self._tag != self._meta['tag']: + raise VelInitializationException("Model tag mismatch") + else: + self._tag = tag + self._meta = self._create_meta() + self._write_meta() + + #################################################################################################################### + # INTERNAL FUNCTIONS def _prepare_environment(self) -> dict: """ Return full environment for dependency injection """ return {**self.contents, 'run_number': self.run_number} - def render_configuration(self) -> dict: - """ Return a nice and picklable run configuration """ - return self.provider.render_configuration() + def _load_meta(self) -> dict: + """ Load previously written metadata about the project """ + if not os.path.exists(self.meta_dir(self.META_FILE_NAME)): + raise VelInitializationException("Previous run does not exist") + + with open(self.meta_dir(self.META_FILE_NAME), 'rt') as fp: + return json.load(fp) + + def _write_meta(self) -> None: + """ Write metadata to a file """ + pathlib.Path(self.meta_dir()).mkdir(parents=True, exist_ok=True) + + with open(self.meta_dir(self.META_FILE_NAME), 'wt') as fp: + return json.dump(self.meta, fp) + + def _create_meta(self) -> dict: + """ Metadata for this model/config """ + return { + 'run_name': self.run_name, + 'tag': self.tag, + 'created': dtm.datetime.now().strftime("%Y/%m/%d - %H:%M:%S"), + 'config': self.render_configuration() + } #################################################################################################################### # COMMAND UTILITIES @@ -142,29 +184,29 @@ def run_command(self, command_name, varargs): #################################################################################################################### # MODEL DIRECTORIES - def checkpoint_dir(self, *args) -> str: - """ Return checkpoint directory for this model """ - return self.output_dir('checkpoints', self.run_name, *args) + def project_top_dir(self, *args) -> str: + """ Project top-level directory """ + return os.path.join(self.project_dir, *args) - def data_dir(self, *args) -> str: - """ Return data directory for given dataset """ - return self.project_data_dir(*args) + def output_dir(self, *args) -> str: + """ Directory where to store output """ + return os.path.join(self.project_dir, self.output_directory_name, *args) - def openai_dir(self) -> str: + def meta_dir(self, *args) -> str: """ Return directory for openai output files for this model """ - return self.output_dir('openai', self.run_name) + return self.output_dir('meta', self.run_name, *args) - def project_data_dir(self, *args) -> str: + def data_dir(self, *args) -> str: """ Directory where to store data """ return os.path.normpath(os.path.join(self.project_dir, 'data', *args)) - def output_dir(self, *args) -> str: - """ Directory where to store output """ - return os.path.join(self.project_dir, self.output_directory_name, *args) + def checkpoint_dir(self, *args) -> str: + """ Return checkpoint directory for this model """ + return self.output_dir('checkpoints', self.run_name, *args) - def project_top_dir(self, *args) -> str: - """ Project top-level directory """ - return os.path.join(self.project_dir, *args) + def openai_dir(self, *args) -> str: + """ Return directory for openai output files for this model """ + return self.output_dir('openai', self.run_name, *args) #################################################################################################################### # NAME UTILITIES @@ -178,6 +220,16 @@ def name(self) -> str: """ Return name of the model """ return self._model_name + @property + def meta(self) -> dict: + """ Return name of the model """ + return self._meta + + @property + def tag(self) -> typing.Optional[str]: + """ Tag for this model/run number """ + return self._tag + #################################################################################################################### # MISC GETTERS def torch_device(self): @@ -185,6 +237,10 @@ def torch_device(self): import torch return torch.device(self.device) + def render_configuration(self) -> dict: + """ Return a nice and picklable run configuration """ + return self.provider.render_configuration() + #################################################################################################################### # PROVIDER API def provide(self, name): @@ -204,7 +260,16 @@ def banner(self, command_name) -> None: print("=" * 80) print(f"Pytorch version: {torch.__version__} cuda version {torch.version.cuda} cudnn version {torch.backends.cudnn.version()}") # noqa - print("Running model {}, run {} -- command {} -- device {}".format(self._model_name, self.run_number, command_name, self.device)) # noqa + + if self.tag: + print("Running model {}, run {} ({}) -- command {} -- device {}".format( + self._model_name, self.run_number, self.tag, command_name, self.device) + ) + else: + print("Running model {}, run {} -- command {} -- device {}".format( + self._model_name, self.run_number, command_name, self.device) + ) + if device.type == 'cuda': device_idx = 0 if device.index is None else device.index print(f"CUDA Device name {torch.cuda.get_device_name(device_idx)}") @@ -237,7 +302,6 @@ def load_trained_model(self): training_info = TrainingInfo( start_epoch_idx=last_epoch_idx, - run_name=self.run_name, ) model_state, hidden_state = storage.load(training_info) diff --git a/vel/command/phase_train_command.py b/vel/command/phase_train_command.py index 18566ebf..2318d7bf 100644 --- a/vel/command/phase_train_command.py +++ b/vel/command/phase_train_command.py @@ -129,7 +129,6 @@ def resume_training(self, learner, callbacks, metrics) -> (api.TrainingInfo, dic training_info = api.TrainingInfo( start_epoch_idx=start_epoch, - run_name=self.model_config.run_name, metrics=metrics, callbacks=callbacks ) diff --git a/vel/command/rnn/generate_text.py b/vel/command/rnn/generate_text.py index 7dda3b53..99b56647 100644 --- a/vel/command/rnn/generate_text.py +++ b/vel/command/rnn/generate_text.py @@ -28,8 +28,7 @@ def run(self): start_epoch = self.storage.last_epoch_idx() training_info = TrainingInfo( - start_epoch_idx=start_epoch, - run_name=self.model_config.run_name, + start_epoch_idx=start_epoch ) model_state, hidden_state = self.storage.load(training_info) diff --git a/vel/command/train_command.py b/vel/command/train_command.py index 9d1a1965..5d3ca3ce 100644 --- a/vel/command/train_command.py +++ b/vel/command/train_command.py @@ -87,7 +87,6 @@ def resume_training(self, learner, callbacks, metrics) -> api.TrainingInfo: training_info = api.TrainingInfo( start_epoch_idx=start_epoch, - run_name=self.model_config.run_name, metrics=metrics, callbacks=callbacks ) diff --git a/vel/data/source/nlp/text_url.py b/vel/data/source/nlp/text_url.py index 5f3d61eb..fea44f95 100644 --- a/vel/data/source/nlp/text_url.py +++ b/vel/data/source/nlp/text_url.py @@ -80,7 +80,7 @@ def download(self) -> dict: def create(model_config, url, local_dir, train_val_split=0.8): """ Vel factory function """ if not os.path.isabs(local_dir): - local_dir = model_config.project_data_dir(local_dir) + local_dir = model_config.data_dir(local_dir) return TextUrlSource( url, diff --git a/vel/internal/provider.py b/vel/internal/provider.py index d694ad24..e1060f2d 100644 --- a/vel/internal/provider.py +++ b/vel/internal/provider.py @@ -1,5 +1,6 @@ import importlib import inspect +import typing from vel.internal.parser import Variable from vel.internal.generic_factory import GenericFactory @@ -7,7 +8,7 @@ class Provider: """ Dependency injection resolver for the configuration file """ - def __init__(self, environment, instances=None, parameters=None): + def __init__(self, environment: dict, instances: typing.Optional[dict] = None, parameters: typing.Optional[dict] = None): self.environment = environment self.parameters = parameters if parameters is not None else {} diff --git a/vel/launcher.py b/vel/launcher.py index 7dfa94eb..18a4f687 100644 --- a/vel/launcher.py +++ b/vel/launcher.py @@ -17,6 +17,7 @@ def main(): parser.add_argument('-r', '--run_number', type=int, default=0, help="A run number") parser.add_argument('-d', '--device', default='cuda', help="A device to run the model on") parser.add_argument('-s', '--seed', type=int, default=None, help="Random seed for the project") + parser.add_argument('-t', '--tag', type=str, default=None, help="String tag for a given run") parser.add_argument('--werr', action='store_true', default=False, help="Convert warnings to errors") parser.add_argument( '-p', '--param', type=str, metavar='NAME=VALUE', action='append', default=[], @@ -38,7 +39,7 @@ def main(): model_config = ModelConfig.from_file( args.config, args.run_number, continue_training=getattr(args, 'continue'), device=args.device, seed=args.seed, - params={k: v for (k, v) in (Parser.parse_equality(eq) for eq in args.param)} + parameters={k: v for (k, v) in (Parser.parse_equality(eq) for eq in args.param)} ) if model_config.project_dir not in sys.path: diff --git a/vel/notebook/loader.py b/vel/notebook/loader.py index d28c048d..ca0db31b 100644 --- a/vel/notebook/loader.py +++ b/vel/notebook/loader.py @@ -1,19 +1,21 @@ from vel.api import ModelConfig -def load_config(config_path, run_number=0, device='cuda:0'): +def load_config(config_path, run_number=0, device='cuda:0', continue_training=True): """ Load a ModelConfig from filename """ return ModelConfig.from_file( ModelConfig.from_project_directory(config_path), run_number=run_number, - device=device + device=device, + continue_training=continue_training ) -def script(model_name: str = 'script', run_number=0, device='cuda:0'): +def script(model_name: str = 'script', run_number=0, device='cuda:0', continue_training=True): """ Create an ad-hoc script model config """ return ModelConfig.script( model_name=model_name, run_number=run_number, - device=device + device=device, + continue_training=continue_training ) diff --git a/vel/rl/command/enjoy.py b/vel/rl/command/enjoy.py index a0f056bc..14da7fa2 100644 --- a/vel/rl/command/enjoy.py +++ b/vel/rl/command/enjoy.py @@ -29,8 +29,7 @@ def run(self): model = self.model_factory.instantiate(action_space=env.action_space).to(device) training_info = TrainingInfo( - start_epoch_idx=self.storage.last_epoch_idx(), - run_name=self.model_config.run_name + start_epoch_idx=self.storage.last_epoch_idx() ) self.storage.load(training_info, model) diff --git a/vel/rl/command/evaluate_env_command.py b/vel/rl/command/evaluate_env_command.py index 33f7f4dc..3bf0eec7 100644 --- a/vel/rl/command/evaluate_env_command.py +++ b/vel/rl/command/evaluate_env_command.py @@ -38,7 +38,7 @@ def run(self): action_noise = None training_info = TrainingInfo( - start_epoch_idx=self.storage.last_epoch_idx(), run_name=self.model_config.run_name + start_epoch_idx=self.storage.last_epoch_idx() ) model_state, hidden_state = self.storage.load(training_info) diff --git a/vel/rl/command/record_movie_command.py b/vel/rl/command/record_movie_command.py index 6b6f3c4c..a7a14d78 100644 --- a/vel/rl/command/record_movie_command.py +++ b/vel/rl/command/record_movie_command.py @@ -31,8 +31,7 @@ def run(self): model = self.model_factory.instantiate(action_space=env.action_space).to(device) training_info = TrainingInfo( - start_epoch_idx=self.storage.last_epoch_idx(), - run_name=self.model_config.run_name + start_epoch_idx=self.storage.last_epoch_idx() ) model_state, hidden_state = self.storage.load(training_info) diff --git a/vel/storage/streaming/stdout.py b/vel/storage/streaming/stdout.py index d83e8f9d..7ef02893 100644 --- a/vel/storage/streaming/stdout.py +++ b/vel/storage/streaming/stdout.py @@ -1,13 +1,17 @@ -from vel.api import EpochInfo, Callback +from vel.api import EpochInfo, Callback, ModelConfig class StdoutStreaming(Callback): """ Stream results to stdout """ + def __init__(self, model_config: ModelConfig): + self.model_config = model_config + def on_epoch_end(self, epoch_info: EpochInfo): - if epoch_info.training_info.run_name: - print(f"=>>>>>>>>>> EPOCH {epoch_info.global_epoch_idx} [{epoch_info.training_info.run_name}]") + if self.model_config.tag: + tag = self.model_config.tag + print(f"=>>>>>>>>>> EPOCH {epoch_info.global_epoch_idx} [{self.model_config.run_name} - {tag}]") else: - print(f"=>>>>>>>>>> EPOCH {epoch_info.global_epoch_idx}") + print(f"=>>>>>>>>>> EPOCH {epoch_info.global_epoch_idx} [{self.model_config.run_name}]") if any(x.dataset is None for x in epoch_info.result.keys()): self._print_metrics_line(epoch_info.result, dataset=None) @@ -37,6 +41,6 @@ def _print_metrics_line(metrics, dataset=None): print('{0: <10}'.format(dataset.capitalize()), " ".join(metrics_list)) -def create(): +def create(model_config): """ Vel factory function """ - return StdoutStreaming() + return StdoutStreaming(model_config) From 118454f4786986bd8808d3db7252130cff8326c1 Mon Sep 17 00:00:00 2001 From: Federico Galatolo Date: Thu, 26 Sep 2019 00:00:32 +0200 Subject: [PATCH 094/162] Configurable Evaluator cache (#52) * Added configurable cache to the Evaluator * Added tests for configurable cache --- vel/rl/api/evaluator.py | 34 ++++++++++++++++++------- vel/rl/test/test_evaluator_cache.py | 39 +++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 9 deletions(-) create mode 100644 vel/rl/test/test_evaluator_cache.py diff --git a/vel/rl/api/evaluator.py b/vel/rl/api/evaluator.py index c8a98307..e0c15d1c 100644 --- a/vel/rl/api/evaluator.py +++ b/vel/rl/api/evaluator.py @@ -2,14 +2,19 @@ class EvaluatorMeta(type): """ Metaclass for Evaluator - gathers all provider methods in a class attribute """ def __new__(mcs, name, bases, attributes): providers = {} + use_cache = {} for name, attr in attributes.items(): if callable(attr): proper_name = getattr(attr, '_vel_evaluator_provides', None) - if proper_name is not None: providers[proper_name] = attr + + cache = getattr(attr, '_vel_use_cache', None) + if cache is not None: + use_cache[proper_name] = cache + attributes['_use_cache'] = use_cache attributes['_providers'] = providers return super().__new__(mcs, name, bases, attributes) @@ -88,10 +93,12 @@ class Evaluator(metaclass=EvaluatorMeta): """ @staticmethod - def provides(name): + def provides(name, cache=True): """ Function decorator - value provided by the evaluator """ def decorator(func): func._vel_evaluator_provides = name + func._vel_use_cache = cache + return func return decorator @@ -112,7 +119,7 @@ def is_provided(self, name): else: return False - def get(self, name): + def get(self, name, cache=True): """ Return a value from this evaluator. @@ -120,18 +127,27 @@ def get(self, name): with and without no_grad() context. It is advised in such cases to not use no_grad and stick to .detach() + + If you want to disable the cache you can pass 'cache=False' to the decorator to disable it + for the attribute or to the get() function to disable it just for that call """ - if name in self._storage: - return self._storage[name] + if name in self._use_cache and not self._use_cache[name]: + cache = False + + if name in self._storage and cache: + value = self._storage[name] elif name in self._providers: - value = self._storage[name] = self._providers[name](self) - return value + value = self._providers[name](self) elif name.startswith('rollout:'): rollout_name = name[8:] - value = self._storage[name] = self.rollout.batch_tensor(rollout_name) - return value + value = self.rollout.batch_tensor(rollout_name) else: raise RuntimeError(f"Key {name} is not provided by this evaluator") + + if cache: + self._storage[name] = value + + return value def provide(self, name, value): """ Provide given value under specified name """ diff --git a/vel/rl/test/test_evaluator_cache.py b/vel/rl/test/test_evaluator_cache.py new file mode 100644 index 00000000..1f0b3724 --- /dev/null +++ b/vel/rl/test/test_evaluator_cache.py @@ -0,0 +1,39 @@ +from vel.rl.api import Evaluator, Rollout + +calls = { + "a": 0, + "b": 0, + "c": 0, +} + +class TestEvaluator(Evaluator): + @Evaluator.provides('test:a') + def test_a(self): + calls["a"] += 1 + + @Evaluator.provides('test:b', cache=False) + def test_b(self): + calls["b"] += 1 + + @Evaluator.provides('test:c') + def test_c(self): + calls["c"] += 1 + + +def test_evaluator(): + e = TestEvaluator(Rollout()) + e.get("test:a") + e.get("test:a") + e.get("test:a") + + e.get("test:b") + e.get("test:b") + e.get("test:b") + + e.get("test:c") + e.get("test:c") + e.get("test:c", cache=False) + + assert calls["a"] == 1 # test:a is cached so just one call + assert calls["b"] == 3 # test:b is never cached so three calls + assert calls["c"] == 2 # test:c is cached but one call is not so two calls \ No newline at end of file From 09f14f9886e0728d93778190dcafcd35f024a6ee Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 26 Sep 2019 14:19:24 -0700 Subject: [PATCH 095/162] Added a global list command for the models. --- .velproject.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.velproject.yaml b/.velproject.yaml index 2b6bbabd..8127a339 100644 --- a/.velproject.yaml +++ b/.velproject.yaml @@ -24,3 +24,9 @@ visdom_settings: server: 'http://localhost' port: 8097 + +# List of commands that are shared among all models in this project +global_commands: + list: + name: vel.command.list_command + From 521912c7a80b5f39b173741cf0801c1172ebfa7d Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 26 Sep 2019 14:19:43 -0700 Subject: [PATCH 096/162] Canonical MIST-VAE notebook. --- .../autoencoders/mnist/mnist-vae.ipynb | 340 ++++++++++++++---- 1 file changed, 276 insertions(+), 64 deletions(-) diff --git a/examples-notebooks/autoencoders/mnist/mnist-vae.ipynb b/examples-notebooks/autoencoders/mnist/mnist-vae.ipynb index 4a00f5da..652fd859 100644 --- a/examples-notebooks/autoencoders/mnist/mnist-vae.ipynb +++ b/examples-notebooks/autoencoders/mnist/mnist-vae.ipynb @@ -8,6 +8,7 @@ "source": [ "import os\n", "import torch\n", + "import tqdm\n", "import numpy as np\n", "import matplotlib.pyplot as plt" ] @@ -31,24 +32,16 @@ "metadata": {}, "outputs": [], "source": [ - "config = nb.load_config('examples-configs/autoencoders/mnist/mnist_cnn_vae.yaml', run_number=2, device='cpu')" + "config = nb.load_config('examples-configs/latent/mnist/mnist_fc_vae.yaml', run_number=1, device='cuda:0')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:Setting up a new session...\n" - ] - } - ], + "outputs": [], "source": [ - "model = config.load_trained_model()" + "model = config.load_trained_model().to(config.device)" ] }, { @@ -60,30 +53,27 @@ "name": "stdout", "output_type": "stream", "text": [ - "MnistCnnVAE(\n", + "FcVae(\n", " (encoder): Sequential(\n", - " (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (1): ReLU(inplace)\n", - " (2): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", - " (3): ReLU(inplace)\n", - " (4): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", - " (5): Flatten()\n", - " (6): Linear(in_features=784, out_features=32, bias=True)\n", + " (0): Flatten()\n", + " (1): Linear(in_features=784, out_features=200, bias=True)\n", + " (2): Tanh()\n", + " (3): Linear(in_features=200, out_features=200, bias=True)\n", + " (4): Tanh()\n", + " (5): Linear(in_features=200, out_features=100, bias=True)\n", " )\n", " (decoder): Sequential(\n", - " (0): Linear(in_features=16, out_features=784, bias=True)\n", - " (1): ReLU(inplace)\n", - " (2): Reshape()\n", - " (3): ConvTranspose2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))\n", - " (4): ReLU(inplace)\n", - " (5): ConvTranspose2d(16, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))\n", - " (6): ReLU(inplace)\n", - " (7): ConvTranspose2d(8, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (8): Sigmoid()\n", + " (0): Linear(in_features=50, out_features=200, bias=True)\n", + " (1): Tanh()\n", + " (2): Linear(in_features=200, out_features=200, bias=True)\n", + " (3): Tanh()\n", + " (4): Linear(in_features=200, out_features=784, bias=True)\n", + " (5): Reshape(sizes=(1, 28, 28), batch_dims=1)\n", + " (6): Sigmoid()\n", " )\n", ")\n", "----------------------------------------------------------------------------------------------------\n", - "Number of model parameters: 45,569\n", + "Number of model parameters: 425,284\n", "----------------------------------------------------------------------------------------------------\n" ] } @@ -98,8 +88,10 @@ "metadata": {}, "outputs": [], "source": [ - "data_source = config.provide('source')\n", - "train_dataset = data_source.train_dataset" + "data_loader = config.provide('loader')\n", + "data_source = data_loader.transformed_source\n", + "train_dataset = data_source.train\n", + "validation_dataset = data_source.validation" ] }, { @@ -109,7 +101,7 @@ "outputs": [], "source": [ "def get_sample(idx):\n", - " return train_dataset[idx][0]" + " return train_dataset[idx]['x'].to(config.device)" ] }, { @@ -119,7 +111,7 @@ "outputs": [], "source": [ "def show_image(axis, sample):\n", - " axis.imshow(train_dataset.denormalize(sample)[:, :, 0], cmap='gray')" + " axis.imshow(train_dataset.denormalize_item(sample, 'x'), cmap='gray')" ] }, { @@ -129,7 +121,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAACxCAYAAABAxMXKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAS3UlEQVR4nO3dT6htV30H8O+vRifVQVLb8IixkZJJcKAQxEEGdtCSZpI4ER0FWngOFBScBDuo0Km1o1JIMbwMrCJoa5BSmwYhjsRERPOnmlQMJjwTQgbGkY2uDt4RX5J739n37n3OWWufzwcO79zzzp+19vmefffvrr3WqdZaAAAAOKw/OHQDAAAAUJwBAAB0QXEGAADQAcUZAABABxRnAAAAHVCcAQAAdGBWcVZVd1bVj6vq2aq6b6lGwa7ILCOSW0Yjs4xGZulFnfd7zqrqLUl+kuQvkjyf5HtJPtZae+oaj/GlaszSWqvzPlZmOYQ5mU3OnluZZQEvt9b++LwPllkOYK+Z3TxGbpnltOODOSNnH0jybGvtp621Xyf5SpK7Zzwf7JrMMiK5Zd+em/l4mWXfZJbVmFOc3ZTk51f9/PzmNuiVzDIiuWU0MstoZJZuXLfrF6iqi0ku7vp1YCkyy2hkltHILCOSW/ZhTnH2QpKbr/r5XZvbXqe1dn+S+xPn53JwMsuItuZWZumMzDIaxwd0Y85pjd9LcmtVvaeq3pbko0keWqZZsBMyy4jkltHILKORWbpx7pGz1tprVfXJJN9K8pYkD7TWnlysZbAwmWVEcstoZJbRyCw9OfdS+ud6MUPAzDR3WfKzklnmklkG9Hhr7fZ9vZjMsoC9ZjaRW+bbxVL6AAAALERxBgAA0AHFGQAAQAcUZwAAAB1QnAEAAHRAcQYAANABxRkAAEAHFGcAAAAdUJwBAAB0QHEGAADQAcUZAABABxRnAAAAHVCcAQAAdOC6QzcAOA6ttUn3q6rJjz3pvgAAozJyBgAA0AHFGQAAQAcUZwAAAB2YNeesqn6W5NUkv0nyWmvt9iUaBbskt4xGZhmNzDIamaUXSywI8uettZcXeB5mOmnRhLMsmDB1wYapOl+sQW53aE6Wls7hisjsCsxZGGdAMtupufvZleTzJDLLwTmtEQAAoANzi7OW5L+q6vGqurhEg2AP5JbRyCyjkVlGI7N0Ye5pjXe01l6oqj9J8nBV/U9r7dGr77AJuJDTk2vmVmbpkMwyGpllNI5p6UItNb+jqj6X5Fettc9f4z4mk+zQMcw5a60t+qTbciuz57OveWMjzHuQWa42yJyzx5dcDEFm+7PCOWd7zezmPnLLLKcdH5z7tMaq+sOqesfvrif5yyRPnPf5etFaO/Fylvse6nJIVfWmS4/WmttDmZvFUXJzSDK7LseQeZllNDJLT+ac1nhjkn/b/GK5Lsm/ttb+c5FWwe7ILaORWUYjs4xGZunGYqc1TnqxAYaAT9seJ/1189CjVVPs67TGff31d+lTxLYZIbOHtItTY8468tY7mWWbuaek78Cip4htI7P757TG+eSWuRY/rREAAIDlKM4AAAA6MHcp/aMxwimMU809dbPD0xnYg118Bqbm61qL8kx5PMuz7c9nTb9LGIPMsWu7yNjU3ydnOaYdhZEzAACADijOAAAAOqA4AwAA6IDiDAAAoAMWBBnA0t+xdpZJkiNPqGSaqVka5bv+LFSxvB7fZ2CaY/g+SfZnX99vesy/d4ycAQAAdEBxBgAA0AHFGQAAQAcUZwAAAB1QnAEAAHTAao1vcNqqRPtYNcYqivTskCsnneVz6bMBHKt9reTMcZizmvNZjLIa9L4YOQMAAOiA4gwAAKADijMAAIAObC3OquqBqnqpqp646rYbqurhqnpm8+/1u20mnI3cMhqZZTQyy2hklhFMGTm7lOTON9x2X5JHWmu3Jnlk8/OqVdW5L1Ofj0VdypHntrU26TJHj5+DffR7Ry7lyDO7JgPlbo5LkdmDmZOvIz4GuRSZPdER7K+GsbU4a609muSVN9x8d5IHN9cfTHLPwu2CWeSW0cgso5FZRiOzjOC8c85ubK1d3lz/RZIbF2oP7JLcMhqZZTQyy2hklq7M/p6z1lqrqlPHPqvqYpKLc18HlnSt3MosPZJZRiOzjMYxLT0478jZi1V1IUk2/7502h1ba/e31m5vrd1+zteCpUzKrczSEZllNDLLaBzT0pXzFmcPJbl3c/3eJN9YpjnjM6Gya0eV2zkTvucs8rGL9hyxbjPr/VzOyrZlt5k9VivL1y6sNrNTF8g69GIyS7dn9IzXtg1QVV9O8qEk70zyYpK/S/LvSb6a5N1JnkvykdbaGydYnvRcq69UpgZq9OAcSmtt0oZbKrdry+za8jnnjx/76uMaMnvSdh4lI4dy1oOLzjw+ZWSg58weg7Xtz2faa2Y3z9V9bncxQLCLPI3wu3wXTjs+2FqcLWmEIM9lZ7lbUw90l7K2zK4tnyPs0NeQWcXZ2R1DcbaUte1n92Vt+/OZ9prZZIzcKs76dtrxwXlPawQAAGBBijMAAIAOzF5Kn9ebOrzqNCGWMve0hZEze9LrW4RnHtvv7Gwzdkm+mGIXxwKydxhGzgAAADqgOAMAAOiA4gwAAKADijMAAIAOWBBkD6ZOqDztfodedAGSPnPoe37YpVG+IwjgLKbuh+bsr/a1WNkaGTkDAADogOIMAACgA4ozAACADijOAAAAOmBBkD2YOqnxtMmTcyZVHvOESt7sWPNw0mfoWLfFUg65Tae+9i4W9DjJIV+bcVlwhhHZt+2ekTMAAIAOKM4AAAA6oDgDAADogOIMAACgA1uLs6p6oKpeqqonrrrtc1X1QlX9YHO5a7fNhOlklhHJLaORWUYjs4xgysjZpSR3nnD7P7bW3re5/MeyzTpOVXXiZY7W2psuR+BSVprZs7yfS2eJnbuUDnK7j33Q3MvU15nb76mXI93PJp1kFs7gUmR28f0vy9panLXWHk3yyh7aAouQWUYkt4xGZhmNzDKCOXPOPllVP9wMEV9/2p2q6mJVPVZVj814LViCzDKirbmVWTojs4zG8QHdqClDlFV1S5Jvttbeu/n5xiQvJ2lJ/j7JhdbaX094HuOh57D0MPLIp7e11iY1fq2ZPUsWRn6fpxrhC9qnZjZZJre7yOzU7TzylzHPycMKP5ePt9Zun3LHXjM7Al9Cvai9ZnbzuK5yO8q+dqpjyPJpxwfnGjlrrb3YWvtNa+23Sf4lyQfmNA52TWYZkdwyGpllNDJLb647z4Oq6kJr7fLmxw8neeJa92eepf8afdpj1/xXCpk9bqNme7TcnrRv2cVo2qjv5zEYLbMj8zlYxloy29u+dm0jefu0tTirqi8n+VCSd1bV80n+LsmHqup9uTIE/LMkH99hG+FMZJYRyS2jkVlGI7OMYNKcs8VerLPzc0d2rOeqn2X+zhJ6y+wK57bMMmcu1L6sIbNLz+1b28jZCj+Xk+fvLKG3/ey+rO1zcGB7zWwyRm5HHjk7hnwvOucMAACAZSnOAAAAOnCuBUE4vJGXrGYa7+fvrfC0saEsvU2P4T06hj4ynf05hzDKfmiUdu6LkTMAAIAOKM4AAAA6oDgDAADogOIMAACgAxYEGdTS3ztEf4510ReLf7AGp+VYZtfP72dgDiNnAAAAHVCcAQAAdEBxBgAA0AHFGQAAQAcsCHIgx7CwA8fLhHjWamq25RiA8zByBgAA0AHFGQAAQAcUZwAAAB3YWpxV1c1V9e2qeqqqnqyqT21uv6GqHq6qZzb/Xr/75sJ2MstoZJYRyS2jkVlGMGXk7LUkn2mt3Zbkg0k+UVW3JbkvySOttVuTPLL5GXogs4xGZhmR3DIamaV7W4uz1trl1tr3N9dfTfJ0kpuS3J3kwc3dHkxyz64aObrW2psu7M6aM1tVb7qcZmru5uTzpMee9vipbT/Gz8uaM7s2Z/kMrp3cHuf+amQyuxtzPwc+R693pjlnVXVLkvcn+W6SG1trlzf/9YskNy7aMliAzDIamWVEcstoZJZeTf6es6p6e5KvJfl0a+2XV/+1sLXWqurEMreqLia5OLehcFYyy2hklhGdJ7cyyyHZ19KzSSNnVfXWXAnxl1prX9/c/GJVXdj8/4UkL5302Nba/a2121trty/RYJhCZhmNzDKi8+ZWZjkU+1p6N2W1xkryxSRPt9a+cNV/PZTk3s31e5N8Y/nmwdnJLKORWUYkt4xGZhlBbZt0V1V3JPlOkh8l+e3m5s/myjm6X03y7iTPJflIa+2VLc+1qhl+h5yweKyT0FtrWzu+5szuInMnZWnu60x9zmPI8bFndm2mfjYGz/bjU0YGlsrtyJmds68cPCO92WtmN881bG6XtotjhmNw2vHB1uJsSWsLsuJs/6Yc6C6pt8wqzsZz7JldG8XZ8kbOrOKsG3vNbDJ2bpemODuf044PzrRaIwAAALuhOAMAAOiA4gwAAKADk7/nbI16mzN2WnuO9Vxc9mNfcybkmGNifw7AeRg5AwAA6IDiDAAAoAOKMwAAgA4ozgAAADqwygVBDrnQxxwmirPNLr4wes5rAyfzeVmXUY8rYB/Osr/zWdrOyBkAAEAHFGcAAAAdUJwBAAB0QHEGAADQgVUuCDLH3EmNJoGzbzIH+zN1UZ7TJr37vI7JggdwNj4H52fkDAAAoAOKMwAAgA4ozgAAADqwtTirqpur6ttV9VRVPVlVn9rc/rmqeqGqfrC53LX75sJ2MstoZJbRyCwjkltGUNsm7FXVhSQXWmvfr6p3JHk8yT1JPpLkV621z09+sSqzA5mltbZ1VrbM0hOZZUCPt9Zuv9YdZJbObM1sIrc9mLNQyNoWVDrt+GDrao2ttctJLm+uv1pVTye5adnmwXJkltHILKORWUYkt4zgTHPOquqWJO9P8t3NTZ+sqh9W1QNVdf3CbYPZZJbRyCyjkVlGJLf0anJxVlVvT/K1JJ9urf0yyT8n+bMk78uVv0L8wymPu1hVj1XVYwu0FyaTWUYjs4xGZhmR3NKzrXPOkqSq3prkm0m+1Vr7wgn/f0uSb7bW3rvleZyfyyxT5u8kMks/ZJYBTZ2/I7P0YlJmE7k9NHPOfu+044MpqzVWki8mefrqEG8mVf7Oh5M8MbeRsASZZTQyy2hklhHJLSOYslrjHUm+k+RHSX67ufmzST6WK8O/LcnPknx8M9HyWs/lrwzMMnHlO5mlGzLLgKas1iiz9GTqaK/cdmjqaNqxjJxNOq1xKYLMXFNPEVuKzDKXzDKgyaeILUFmWcBeM5vI7ZIUZ693ptUaAQAA2A3FGQAAQAcUZwAAAB247tANAAAAjtPa5pLNZeQMAACgA4ozAACADijOAAAAOqA4AwAA6MC+FwR5Oclzm+vv3Py8BmvqS9Jvf/70AK8ps2PotT8yu5w19SXpuz/7zu1aM5usqz899+WQ+9qet8t5rKk/Pffl1MzW1G/lXlpVPbbvb3PflTX1JVlff5aypu2ypr4k6+vPUta0XdbUl2R9/VnK2rbLmvqzpr4saW3bZU39GbUvTmsEAADogOIMAACgA4cszu4/4GsvbU19SdbXn6WsabusqS/J+vqzlDVtlzX1JVlff5aytu2ypv6sqS9LWtt2WVN/huzLweacAQAA8HtOawQAAOjA3ouzqrqzqn5cVc9W1X37fv25quqBqnqpqp646rYbqurhqnpm8+/1h2zjVFV1c1V9u6qeqqonq+pTm9uH7M+uyGw/ZHYame2HzE43cm7XlNlEbqcaObPJunK7pszutTirqrck+ackf5XktiQfq6rb9tmGBVxKcucbbrsvySOttVuTPLL5eQSvJflMa+22JB9M8onN+zFqfxYns92R2S1ktjsyO8EKcnsp68lsIrdbrSCzybpyu5rM7nvk7ANJnm2t/bS19uskX0ly957bMEtr7dEkr7zh5ruTPLi5/mCSe/baqHNqrV1urX1/c/3VJE8nuSmD9mdHZLYjMjuJzHZEZicbOrdrymwitxMNndlkXbldU2b3XZzdlOTnV/38/Oa20d3YWru8uf6LJDcesjHnUVW3JHl/ku9mBf1ZkMx2SmZPJbOdktlrWmNuV/Eey+2p1pjZZAXv8eiZtSDIwtqV5S+HWgKzqt6e5GtJPt1a++XV/zdifzibEd9jmT1uI77HMnvcRn2P5fa4jfgeryGz+y7OXkhy81U/v2tz2+herKoLSbL596UDt2eyqnprroT4S621r29uHrY/OyCznZHZrWS2MzI7yRpzO/R7LLdbrTGzycDv8Voyu+/i7HtJbq2q91TV25J8NMlDe27DLjyU5N7N9XuTfOOAbZmsqirJF5M83Vr7wlX/NWR/dkRmOyKzk8hsR2R2sjXmdtj3WG4nWWNmk0Hf41VltrW210uSu5L8JMn/Jvnbfb/+Au3/cpLLSf4vV84v/pskf5QrK8A8k+S/k9xw6HZO7MsduTK8+8MkP9hc7hq1PzvcTjLbyUVmJ28nme3kIrNn2lbD5nZNmd30R26nbadhM7tp/2pyu6bM1qZDAAAAHJAFQQAAADqgOAMAAOiA4gwAAKADijMAAIAOKM4AAAA6oDgDAADogOIMAACgA4ozAACADvw/hU+ueKcDcG8AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] @@ -145,7 +137,7 @@ "fig, axes = plt.subplots(1, 5)\n", "\n", "for index in range(5):\n", - " show_image(axes[index], get_sample(index))" + " show_image(axes[index], get_sample(index).cpu())" ] }, { @@ -154,19 +146,24 @@ "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "tensor([[ 0.7010, 0.7096, 0.2029, -0.8527, -0.1471, 0.1670, -0.0375, 1.2047,\n", - " -1.9497, -0.1735, 2.7477, 0.9634, -1.8239, -1.0749, 0.8230, 0.0965]])" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[ 0.5881, -1.6104, 0.4107, 1.0962, 1.7997, 1.0594, 0.0516, 1.5038,\n", + " 2.1698, 0.0210, -0.3525, 0.4182, 1.5991, 0.0826, 1.5369, 0.0597,\n", + " 2.2284, 0.1273, -1.6312, -0.1385, 1.0510, 0.0295, -0.2376, -0.0673,\n", + " 0.5040, 0.7474, -0.1831, 0.7763, 0.1418, 1.6395, -2.4175, 0.7683,\n", + " 0.1352, -2.3757, -0.4010, -2.0714, 0.0345, -0.5530, 1.6734, -1.6953,\n", + " 2.1401, 1.4186, 0.7702, 0.0874, -0.8720, -1.3320, -1.0852, -0.1516,\n", + " -1.0706, 1.0755]], device='cuda:0')\n", + "torch.Size([1, 50])\n" + ] } ], "source": [ - " model.encode(get_sample(0)[None])" + "x = model.encode(get_sample(0)[None])\n", + "print(x)\n", + "print(x.shape)" ] }, { @@ -176,7 +173,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -193,9 +190,9 @@ "\n", "for index in range(5):\n", " sample = get_sample(index)\n", - " decoded = model(sample[None])['decoded'][0].detach()\n", - " show_image(axes[0, index], sample)\n", - " show_image(axes[1, index], decoded)" + " decoded = model(sample[None])[0].detach()\n", + " show_image(axes[0, index], sample.cpu())\n", + " show_image(axes[1, index], decoded.cpu())" ] }, { @@ -205,9 +202,9 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ - "
" + "
" ] }, "metadata": { @@ -217,13 +214,41 @@ } ], "source": [ - "samples = torch.randn(5, model.representation_length)\n", + "samples = torch.randn(5, 5, model.representation_length)\n", "\n", - "fig, axes = plt.subplots(1, 5)\n", + "fig, axes = plt.subplots(5, 5)\n", "\n", - "for index in range(5):\n", - " decoded = model.decoder(samples[index][None])[0].detach()\n", - " show_image(axes[index], decoded)" + "for i in range(5):\n", + " for j in range(5):\n", + " decoded = model.decode(samples[i, j][None].to(config.device))[0].detach()\n", + " show_image(axes[i, j], decoded.cpu())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 10/10 [00:01<00:00, 7.01it/s]\n" + ] + } + ], + "source": [ + "bs = 1_024\n", + "\n", + "results = []\n", + "\n", + "for i in tqdm.trange(validation_dataset.num_batches(bs)):\n", + " nll = model.nll(validation_dataset.get_batch(i, bs)['x'].to(config.device))\n", + "\n", + " results.append(nll.cpu().numpy())\n", + "\n", + "\n", + "full_results = np.concatenate(results)" ] }, { @@ -234,8 +259,7 @@ { "data": { "text/plain": [ - "tensor([[ 0.8606, 0.9047, 0.1575, -0.7448, -0.3117, 0.0745, -0.3145, 1.4116,\n", - " -1.5365, -0.6043, 2.6963, 0.4136, -1.0794, -0.8664, 0.7766, -0.4429]])" + "90.79018" ] }, "execution_count": 14, @@ -244,17 +268,205 @@ } ], "source": [ - " model.encode(get_sample(0)[None])" + "np.mean(full_results)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 10/10 [00:01<00:00, 6.69it/s]\n" + ] + } + ], + "source": [ + "bs = 1_024\n", + "\n", + "results = []\n", + "\n", + "for i in tqdm.trange(validation_dataset.num_batches(bs)):\n", + " nll = model.nll(validation_dataset.get_batch(i, bs)['x'].to(config.device), num_posterior_samples=5)\n", + "\n", + " results.append(nll.cpu().numpy())\n", + "\n", + "\n", + "full_results = np.concatenate(results)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "88.15819" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.mean(full_results)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 10/10 [00:01<00:00, 5.54it/s]\n" + ] + } + ], + "source": [ + "bs = 1_024\n", + "\n", + "results = []\n", + "\n", + "for i in tqdm.trange(validation_dataset.num_batches(bs)):\n", + " nll = model.nll(validation_dataset.get_batch(i, bs)['x'].to(config.device), num_posterior_samples=50)\n", + "\n", + " results.append(nll.cpu().numpy())\n", + "\n", + "\n", + "full_results = np.concatenate(results)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "87.12366" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.mean(full_results)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 10/10 [00:04<00:00, 2.24it/s]\n" + ] + } + ], + "source": [ + "bs = 1_024\n", + "\n", + "results = []\n", + "\n", + "for i in tqdm.trange(validation_dataset.num_batches(bs)):\n", + " nll = model.nll(validation_dataset.get_batch(i, bs)['x'].to(config.device), num_posterior_samples=500)\n", + "\n", + " results.append(nll.cpu().numpy())\n", + "\n", + "\n", + "full_results = np.concatenate(results)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "86.84696" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.mean(full_results)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 10/10 [00:12<00:00, 1.28s/it]\n" + ] + } + ], + "source": [ + "bs = 1_024\n", + "\n", + "results = []\n", + "\n", + "for i in tqdm.trange(validation_dataset.num_batches(bs)):\n", + " nll = model.nll(validation_dataset.get_batch(i, bs)['x'].to(config.device), num_posterior_samples=2000)\n", + "\n", + " results.append(nll.cpu().numpy())\n", + "\n", + "\n", + "full_results = np.concatenate(results)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "86.82053" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.mean(full_results)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAABpCAYAAAC+qVuIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO2de9xmU/n/3zulo+RQyGGQkRBhnKeInEYZlFHE0EEhftIBSUhR5ItEMQjlnEgo52NFEXLO5BQpRiqlc/v3xzyfta9t9jPzzHOv+773PfN5v15e9lzPae/PvfZae6/rs65VlGWJMcYYY4wxxpj+8pJ+n4AxxhhjjDHGGL+cGWOMMcYYY0wr8MuZMcYYY4wxxrQAv5wZY4wxxhhjTAvwy5kxxhhjjDHGtAC/nBljjDHGGGNMC+jo5awois2LoniwKIqpRVHsn+uk5masaX6saXewrvmxpvmxpvmxpvmxpvmxpvmxpr2hGO0+Z0VRzAP8GtgEeAL4BfCBsizvy3d6cxfWND/WtDtY1/xY0/xY0/xY0/xY0/xY0/xY097RSeZsLWBqWZYPl2X5L+BcYGKe05prsab5sabdwbrmx5rmx5rmx5rmx5rmx5rmx5r2iJd28LOLA78N/34CWHtmP1AUxejSdHMH04DdsaY5sab5mVaW5euZzfvfms4Ua5ofa5qfUWkK1nVmlGVZYE1z47E/P9Y0P+pTZ6CTl7MRURTFbsBu3f47cwCPjfQbremIsab5sab5sab5sab5GbGmYF27gTUdMb7/82NN8zOspp28nD0JLBn+vcRQrEZZlicDJ4PfoEeANc2PNe0Os9TVms421jQ/1jQ/7lPzY03zY03zY017RCdrzn4BjC2KYpmiKOYF3g9ckue05lqsaX6saXewrvmxpvmxpvmxpvmxpvmxpvmxpj1i1Jmzsiz/UxTFJ4ArgHmA08qyvDfbmc2FWNP8WNPuYF3zY03zY03zY03zY03zY03zY017x6hL6Y/qjzm9OTNuL8ty3Oz+kDWdKdY0P9Y0P9Y0P9Y0P6PSFKzrzBgqCDLbWNOZ4vs/P9Y0P8Nq2tEm1MYYY4wxxhhj8uCXM2OMMcYYY4xpAV0vpT+oFEUxw3G0gPbSDjroSL+XvKSaC9Dxf//73xT73//+19sTG2Ck30tfWt3C0vnf//53ilnTkSNN55lnnhm+9p///Ccd+94fOWqTsT+VftbRGGOMmRFnzowxxhhjjDGmBcy1mbM4k7vYYoul46OPPhqA8ePHp5iyDxdffHGKHXDAAQC88MILXT3PQSJqOmbMmHR8zDHHADBuXLXu8e9//zsAJ510Uoodd9xxQD1LMbcTs43LLLNMOj7yyCMBWG211VLsmWeeAeCQQw5JsR//+MeAsxSR2E6XWGKJdHzggQcCsMYaa6TY3XffDcAXv/jFFHv00Ue7fIaDR9T0ta99bTreZpttAFh55ZVT7JprrgHg2muvTbF//vOf3T7FgSZmyBdffHEA5ptvvhR77LHpe5k+//zzvT2xASe223nnnReo97n/+Mc/APefoyXqK6ylMbPGmTNjjDHGGGOMaQF+OTPGGGOMMcaYFjDX2hqjlfHUU09NxxtssAFQWRygKlqx5ZZbptiUKVMAuPfeav+9uTVdL+vCUkstlWLf/OY30/E73vEOAF7+8penmIpW7LDDDil29tlnA/DUU09172QHBGkarYzf+MY30rFst694xStS7PWvfz0AH//4x1PspptuAuCvf/1r9052QJBdabnllksxWW4B3v72twN1Td/0pjcB8OSTT6aYLI6x8MrcijRddtllU0yWW4ANN9wQgFe+8pUptu222wKw1157pdjll18OzL19aESaRsvtQQcdlI4nTJgAwKtf/eoUu/POOwHYZZddUsz22zrqU1/3utelWNRrxx13BGD++edPsfPPPx+AL3/5yynmpQwzEp+X1lxzzXS8/fbbA7DQQgulmJYy3HzzzSnmwlUzEu21GtsBVl99daBuH7/uuusAePrpp3t0doNJtNnG51G1z5e97GUp9vvf/x6orM29xpkzY4wxxhhjjGkBc13mTDMQF1xwQYqtssoq6VizFf/6179STJmzuCh7k002AeDXv/51isWfmZtQxuyiiy5KsTe/+c3pWLo1FfqIM2rS9KyzzkqxWGp/TifO6qy00koAnHfeeSm29NJLp2NpGjMNmvVZYYUVUkxFWG644YYUm5uyE3H2cd111wXgtNNOS7Ell1wyHUvT+Dm86lWvAqq2CXD66acD8Jvf/Cb/CQ8ATf3g17/+9RSLGZ/4vWLRRRcF4MMf/nCK/fSnPwXgueeey3uyA0LMPCizePjhh6fYG9/4xnQcZ3eFshX77rtvin32s58F+jfz2wZixnbrrbcG6sV94v0fPwOx9957A/V2eeyxxwJzb+GqqOmmm24K1DUdO3ZsOlZ2Io45yvxGh4cylHPT2BSJWRyN2fH+j4W/ov7i2WefBWDSpEkpduONN2Y/z0Eijj16djr00ENTLI7pykLG9ie3zFZbbZVi9913X1fOtQlnzowxxhhjjDGmBfjlzBhjjDHGGGNaQNHLNHJRFH3JWS+88MLpWAsno+0uavCnP/0JqPaNgWpPrgUXXDDFVLQiLmqXxXGUmt5eluW4WX9bnX5puvzyy6dj7VskuxLUF/iqGIW0jUSLztSpUwHYeeedU+yJJ57o5DQHQlPZ6FSMBuDCCy8E6ovTo8Xzb3/7G1DfH0pp/Kj97bffDtQLrzR9DrPBQGg6zzzzANUif6gKqsRCCtGaJE2jfrI6RZ0vu+wyAD72sY+lWIf7dA2EptJCdjmA/fbbD6jsn1AvlCJNmxZi62tQ2SKPOOKIFOvQ0jwQmmqvMu2vCVWbjYVpmjSNth19Nn/+859TTGPT9773vRTrcLwflabQG12b9i6N+2hutNFGQN1CFtuYxvk4JknjadOmpdjEiRMBuO2227Kcd1mWM24GNgJ6oWm0hWv5x/HHH59ib3vb24B6W41tTO1W/XH8nb/73e9STEXDHn/88Vyn3tr7P9pnZQs9+OCDU0zPptG+GNu22mz8bMTDDz+cjtdee22g4/E+0jpNpUvc81F7a8bxecUVVwTqY3/UT2N+1Fnt+J577kmx9dZbD6j6igwMq6kzZ8YYY4wxxhjTAubogiBamH7VVVelmMpoxzfk559/Ph3/8pe/BOBHP/pRimnWRwtZoSqxvccee6TYZz7zGWDOKwwStdLC1KiPMpNxxixqquyNyj1DNbOpmQioZjc+8pGPpJgWGs9ppXbjrI1mYs8888wU0wxPvO4//vGP6Viztir3CpV+MSu81lprAdVsElQFLea0xddxxnufffYB4LDDDksxzZjHbFkskX/LLbcA9dn0ddZZB6gXZNCWGmussUaKqaDFnEacaVQhhFh+XJmFmDmMi6a1lUMsX/6ud70LqBcD+uhHPwrUtzWZU7fUWGSRRdKxCv5oGweo+oa4/YXaJsD1118PwJgxY1JMxS6iu+PTn/40AJdcckmKdZjhbSXSS4UUAM4991ygXkRJxEyCtAT4yU9+AtQLKr33ve8F6u4bjflxnJrTxidld/bcc88U+8IXvgDUS7jruuPYpHse4I477gBg5ZVXTjE9R8X7QH1q3IJnTiA+O6kY3RlnnJFi2mokZtM0PkVN9QwF8MADDwBV0TConqPiOKWvq13PKUTHwBZbbAHUtxnSs2V8xtKY/pe//CXFpCNUGce3vOUtKabnqbhNzBve8Aag7qzrFs6cGWOMMcYYY0wL8MuZMcYYY4wxxrSAWdoai6I4DXg38HRZlisPxRYEzgOWBh4FJpVl2dcNapQ+jnuWaSG09uGCKmX8zDPPpFhc2KsU8N13351iskPF3600/eabb55iJ554IgAPPvjgaC5hnqIorqJFmiotrEWrUKXko31G++jEfZ/ivlqyM8bFvsssswxQpY6hShlHC56sVKNd1Nq2tiqLbLTEHHnkkUB9AbDS77FtXnrppelYafi4MHXjjTcGKm0BFlhgAQAmT56cYtpHbrT227Zpqvsz7rWjYgjR6vj0008DdR0vvvjidPyHP/wBaF40HPc6UZGWXXfdNcV+9rOfAaO3irZNU1mXvv3tb6fYu9/9bqBu1dE9P2XKlBSLlmdZ86ItTPYeFWiAyuIU956JNt/R0AZNo1ayIf7whz9MMfV/sd3Iwvi1r30txdS+oLpvoy1Me0utv/76KaaiTYsvvniKxYIBo6ENmkK9wISsTbG9qN+LfZzu+6OOOirForVJFr1oDVt11VWBqvAFVPslRitaJ3vJtUXTWNRHGsluDFVfGu226h/07APNRbyivVTPUdEuJlvvt771rRTr0Hbf1+cp3ffx2fPyyy8H6tY5fV98vjnuuOOAenvW2BR/Jtpv9awbreL6OxltjX3VVPd8fD485ZRTgLrVVmi8h+o58pxzzmn8usZ5FVEBOPvss4F6ASH1K22xNZ4ObP6i2P7ANWVZjgWuGfq36YzFsKbdwG01P9Y0P9Y0P9Y0P9Y0P9Y0P36eyo817SGzzJyVZXljURRLvyg8Edhw6PgM4Hpgv4znNduoQEdc/KzS7nFWS6X0Y5GQWNJZpYhjZk0zOCqVD9Wsb5wRXn311Wf4vtmY/Xkd07WEPmoaZ3q1yPQ73/lOiilr8Nxz1YSJZs3jjHks/azFmHF2SAVD4gyEirXEmUvNuMViIrNJ39tqXJg6adIkoMqWQTVjGTOLBxxwAFAvBBALVcTPSWiGR4uroZrp0cw6VMUZ4szRbNJ3TeOiYJVzj9taaJY3LqTefffdAXjooYdSLC7k18xcLAt99dVXA1VWEiqdYzEbxTqYQe+7pjFze8IJJwD1tiQuuuiidPzJT34SqLel2Oep7cf+QltvxCyP/rYKWwB897vfBToqttB3TeP4oOIfceZcmR1tJQBw+OGHA/UMRdRU934s7nHzzTcDVQEgqDI7se12mjmjBZpCXUMVkYhFZ6Td/vtXz48qNjNcURTpGjM/d911F1AvaPGa17wGqI9THeraV0113cqMA+y0005AvZ/V+P2hD30oxZSNjEWWmtpq1FTHMZumLHDMRnZYvKavz1MaS2I5dz2jRlT0aLvttkuxW2+9FahrGmnSVM+w0dGkvqepPPwo6aumahvbb799immsjs9GKkilZy2oxvzhtmeRRrHAmmjaWqcXjLZa4yJlWaqU1u+BRYb7xqIodgN2G+XfmZt4qTXtCiNqq9Z0trCm+bGm+bGm+fHYnx9rmh8/T+XHmvaQjkvpl2VZzmyTubIsTwZOhv5tmDxoWNPuMDNdrenosKb5sab5sab58TiVH2uaH2uaH2vafUb7cvaHoigWK8vyqaIoFgNG7Y/qhJjC1WLLuPhZqV4VQIDKrhPtR7J+QWUziQswlV6Otj1ZRWRlhGpx9yjTyP9pg6baGw7g/PPPB+o6y5504IEHpphsTtHKGIsqKMUfi1dI/1icQYuv49/Tnl2yl8Bsp+b73lZj0RO1P1ljoNprS1YSqPbbi2n4mFKXbS+2tUceeQSobBFQWW/iz6rwSge2xr5pqusdP358iml/wWhHVEGfaBeRzk1WRqhbeITsENIWYL755qv9Hyprage2xr5rGm1N2267LVDX5IorrgDqRQJUuKbJyhTjsTCDbN/PPvtsimlvGv0fqs+mA1tj3zSVnVP7ZAG89a1vBerXc9pppwFwyCGHpJja0HD9nH537G9VmCVaIdUmY+EFfTYd2Jv62p+qTcTCMbLax2s/9NBDgXqhmpEWQIp2skcffRSoW+ykXRynOrQ19lVTtScV6gF44YUXgPpeW3vvvTdQPWvB8NY7Ia1iW9WzVfw89PVYwKlDW2Mrnqfic4uKz/zud79LsX333ReoL4UZaX8X+1n1GfHz0GeY0dbYV03VXmKhJI0hsXDaBRdcAFRLlGDk1x3HHxHbblwC1W1GW0r/EkDl3yYDP8hzOnM1f8KadgO31fxY0/xY0/xY0/xY0/xY0/z4eSo/1rSHjKSU/jlMX6y6cFEUTwAHA18Bzi+K4sPAY8Ck4X9DfjS7opKjUJUPj5kGZX6OOOKIFNObb3yTjoUq9HYeZ8A1kxFLxWtGTbOhUC34jLPxszHr+xSwSb80VUEDZXagyrDE2RiVzL3wwgtTTDNccYZGszaROBOmzyFmeVQkRDOhUM36jnL2Z2H62FaVyYmL/XVtcdbwsMMOA+pFT9RuYjGRiPSIn41mkWIRDJWAj5opO3rvvfem2KBoqsIRX/7yl1NMGYI4c65CAHGBrzSL2aB4r+rzipqrHcffo1LQ8d5WFi3ONM8GrdB0zz33TDH1sVqwDnDQQQcB9Xu7qX3Ge1WaxnLE0j/2sfqZeF/oHOLM5WzQV02VqY7bBaj/i7O8Rx99NFC/j6XpcNlI6RdLn6sYRtN4o+xm/N3DLYyfBX3VFKrzj+3p/vvvB+DKK69MMZUhj3o06RpRXxAL4yg7FvXS74z9TYf0VVPpEdulSunHMuxyI0RNZ5WJlaax1LnaarzX5ciZVSZuNujr85SuQ24DgF/84hdAfYxoeh6dFdI0ZsTVZmOb1DNqh9mySF811T0oHaHZYaT2OTvXrb50woQJKSadY6Zz2rRps3vao2Yk1Ro/MMyXNh4mbkbHf8uytKZ5mVaW5bO4rebEmubHmubHmubHmnYBa9oV/DyVH2vaQ0ZrazTGGGOMMcYYk5GOqzX2A+0vNHHixBRTel0WB6gsI9G22JTqjOl1paOb7CGxoIUKBAy3v8egscYaawCwwQYbzPC1e+65Jx3LLhI1U/o3LuaNVqSmxaoi2sWUMtaeJ1Clq5v29Wo72hsnFo1RG7njjjtS7NprrwXqFjGl2WPhkNgmZS2LOsu+FO23sc0KfQ4ZFwr3DNmXl19++RRTG/nZz36WYlpgHYuE6DgufI8WR7VT2Wuh0jQuLpbmMSYtB1HThRZaCKgXU1K70Z5kUGkRC6HoOO75FC1natNxIbX+Xuwv9BnGIjXRcjpoyNYYr1H2mFhQQe1FmkBlfY79YET6Rp3Vx8T2rv4iFl4ZxH40onsq7o95/fXXA3DjjTemmHSI+8zJTheLVcV2qb422sXWWWcdoF5QSWNfHM8yFFrpGzrnWPRI93+04DW1O2kW+9Foe5YtNO61pyJfkV7axXpJHEtka45tpOnZSW0tahrHfrXP3XarKtWrbUcdm/bsmhNosjDGfk1jTlMs6hzty5tvvjkAm2222Qy/O9ooB6EgiDHGGGOMMcaYjAxM5izOXO2zzz4zxDSLrbLaUC2IbMpuxdmL4Y5fTPyaZu7jm7hmfTso/dxT4rlrJ/uoqWbKVZYYqtnfOHvRtJg/aq7vbdI2/h7NFMcZo6ZS3W0mnvv73/9+oJ4BUDuNxWyaZg010xM1jQVVmjJn+hziomDNIGdcaN1zon7vfOc7Z4hpdvfcc89NMc38xiyPZhdjCeyo6TPPPAPUPw/dDzFLps8m3ivKdA5K5iyep/qyeL4qdx2zkZoxHzt2bIopk6liSC/+3VOnTgXqs4+aoW8quhSLAemzi7OVTf13G9F1xGtU3xmzEUsvvTRQ108Z97itSdRUM+KxoI/6naiLNNVnBFWmI87oNxUXaiu6ziZnhgpYQXWPr7322ikmXWOfEO9/aRLHJJXWbso4Lrnkkun4t7/9LVDXUH1UPNc2tltdW3RuKJMbM+Ian2ORm7e85S1Afdxryi7EDKacC1ELZTFiNkNjX1OmKX5GbdRURE3VJ8T7Wn1pzCxK0zjGxevV1k+xvetZLmbJpWU8h6ZCQIOW9Y33osakRRddNMWk35prrpli2s5IzwAvPh4zZgxQdyo1OWmattvpFs6cGWOMMcYYY0wL8MuZMcYYY4wxxrSAgbE1xgXTSltGu8B5550HwM0335xiTQswOyGmPGWXiBZGLZ4fFFtjTOuuttpqQN2WoX1j4kJrpXpnR9OZfW+03KggQbQCaR+LQUm5R8vMuuuuC9Tbw09/+lOg3k5lUYzfJ1vecDa5Jquovnf8+PEppjYbCy088cQTM/xsm4lWF9kaoz3jrrvuAurWOdlho6YqDPTkk0+mWNRAdpwma9IKK6yQYrIzNn0eg0Jc1L/hhhsCdfuLCqo8/PDDKSZ7RyywJCvoQw89lGLR7qnCDdEWqn4n7tMlu4gsO1DZn2LbbbPO0SauAkvReiTdopVObS0W8ZGmsehNLGLz4IMPAtV9DNV+m7FN6nyivWellVYCqnsmnkNbbY2xXS611FJA1bdC9WwQxw21Ve2fBXDfffcB9Xs5fj7qK2a1DEIFRbbZZpsUk50s9i0inkNb+tw4rmiMiMXApG/UXu0pPjdo7Ir2x2jbk6bRote0157svbpvoBr743Oe+pbYB7WxT1B/Fu9htRfZa6HSMto+1b5iXxiL2Ei/+NmozcaCQCoSFAu6xX74xeca+6W2tNOI2mxsf1tvvTUA2267bYqpr4zPYtIl/myTpvG6pUf8DHUc96Rt2odT59rJu4AzZ8YYY4wxxhjTAvxyZowxxhhjjDEtYGBsjdH+pkpMserVWWedBXQnNauU50EHHZRisuBFO4oqm7UxJdxErBCmajex0t9FF100QyyXZVO2quOPPz7FZHOSpQrggQceAAZTU7XZuNeY9jSLVZWa7ES63uH2JWrSQ/aePffcM8VkWYj7Asl6MyiayvIC8La3vQ2oV7H8+c9/DtQtRbI4xfaq6412kEjT12VBGTduXIrp67KfQfV5DoqlOVqPNt10U6De1nQPRruirjHaOHS9qkQIdQuTNI1VrmQJj1UzpWn8vqa23+Y2G61JH/zgB4G6HV8WUVWwhKqicKwIJk1vvfXWFIu2RvWdr33ta1NM1t+ombSMfdJaa60F1PvYJqtTm4jXucsuuwB1q+ZTTz0F1Pd10n6nMSZdVbkNYP3110/HqkYYrWH623FJg45lqYJq7Dr99NNTTJ9zG/eWi7bm97znPUBVXTh+XVZQqPbmvPDCC1NM9632iYLKeg6Vfk3VnaMNWHufffWrX00xjZVXXXVViul5K94vbbE1xr5LlsI99tgjxXSNstxD1SdcccUVKab2tcUWW6RYtOLq78T95PSMEZ8ldthhB6CyMgP8+Mc/BupWRz1Hx2eStmgax2JZZ1VZHCqNopVe/cGvfvWrFFP/GcfxaKvX9TZpGvf//PznPw9Uz8ZQLfuJ9nu9h3RSqdWZM2OMMcYYY4xpAa3PnGmWJc7GaGY2zvhphjfXzGqc/f34xz8OwK677ppimqE48MADUyxm8tqMNJ0wYUKKaRYrZsm06DFXNiDOVEyZMgWoMiFQFWT49Kc/nWJxhqzNSNOJEyemmK43XoOyPDPLlo0EzSjFDIhmc+Iidy1G32+//VIsfsZtRprGhfda0BtnuJRhiBlKtdnYdpv2c4mz2poxf8c73pFi3/zmN4F621XG7Itf/GKKDcq9r3aj2XKo9muKBRVuueUWoF7MQLOBTbPgUec4g6xZxx133DHFlP2Ifaz2VYuZB2V3ZvUZ9htpGvtT9Wtx5lTFgKLbQvo2FaGJsdhOlf3aZJNNUkxFsuJnqP40zpLrXmn63W3bo0+6xkIVm222GVBvO9///vcBuOmmm1JMGbPopFF2MfbHUQftaRazlNIhZuCU8W2aFVf2Lf5M7Bv6nZHQZxzH3Z133hmoF0u4/PLLAbj00ktTTFne2M8qWxwLecTxRRm4+DPaEy4WelKhhVhY5O1vfztQz4oo8xP3oowuin4gTZdddtkU+8QnPgHUXR+6/3/0ox+lmJxBUR/dy8q+QX0fL/WH6jOh6lNihldFgnTPQOUEicWz9Fn/5Cc/SbGY3esn8VlGjqCoi67juuuuSzH1d7FdbLnllkB9b8LYx+keffzxx1NMfXN0RCj7GZ9R1e/HDK+0jC6J2BeNBGfOjDHGGGOMMaYF+OXMGGOMMcYYY1pA622NYsyYMelYloaYCm7aa2CkNO35ERemTp48eYafOe6444AqzQ7tsIKMBF1v3L+haa8L2WJGQ7Q26e+cdtppKSZbRfx7BxxwAFAVVoHB0VQWHFkJoLKKxmvUottZXZc+o7ggNloWtHg72mplfYh2X1lyo41hUDRVG5KNCyrrYVy8rP2emixD8d6WltFOE4sDyPoZLdTaC0WLjKHS9Prrr0+xQSkEoja50UYbpZiuMS5olqWwyYoRLWXSMlp63ve+96XjSZMmAXVbjj4H7dcFlUU0LoyXLaXt7VUabLXVVikmi1i0FKqgQpNVOxZokFV5u+22S7FoQ1WBj9g3yIYUrUmyK1199dUpJktuvFea9kxsA+rvoiVWNqfYn+k+jO1X1xKLiay33noAfOpTn0qxOAbqc4yFfrTXViygoucO2fOgKuwQ99/ScZv2j5Omu+22W4rJehfbjizy8RrVx0Wbl9roRz7ykRSLmusevvvuu1Pssssum+G89DwQ+wk908VnO7X5Nmkqy3ss/qHngGi1/fa3vw3UxxIhSy1U/We0isbxRcW9ogVc+xbG36O9v1ZdddUUU18ff7fuG7V16L+tUeP87rvvnmJabhDb6ZlnngnU732NcbEQip4h4t5m8XrVJm+44YYU030e+whZ8qMtWDbLsWPHppj68BNPPDHFojV6JDhzZowxxhhjjDEtYJaZs6IolgTOBBYBSuDksiyPK4piQeA8YGngUWBSWZbPDfd7RotmSuJC21iC9cWxkS5qjr8jziJ85zvfAeoLObXI+uijj06xww47DMi7wLefmipLEWekFIuaNpUF1kx6LBsdF0x+9KMfBeqZH83M6GtQLezOOYPbK02lVZy5ki5xYaq+L2YfXvz9UO1uv/HGG6fY5z73uXSs0roxQ6lS8poxg2p2aBA1VTYhLuKVRrHwgWLxnpYu0hGq2a699torxWIpbX1vnKVUAZeddtopxTRzOYiazj///EC9f5NWMaOjWCwrrs9DC9YBPvCBDwD1zE5Tifw4E3vqqacC9f5U2YqMmr4BeqOr7nktFoeqLcZMigob6DOAKlOgBetQZSPiTGy8z9X21TYBDj74YKBePlrOh4xjVM80haqPizP/aoNxpl0DML8AAAytSURBVFzaxIzOKqusAtSzj8qIx4IfcbxTMQCVy4ZqkX/MIEvPqGtTf95UlGg4eqWpZvnXXXfdFJOmMUumcT4+I+hnYoEmbWkQi4nEAiinnHIKAMcee2yKqZ+JmV+V5499uIqDxGJMKoIR+//h6JWm0kBbkgz9baDKqEbiM4KK+sSfVT8StbjkkkvSsZ494/Yl6jebiv/E8fO9730vUO//VfiiTZqqncZnGd2rsaCSiNejomzasgWqNnTbbbelWNwaS/d+070addbWErFomEr7R53V/8QscjcyZ/8BPlWW5YrAOsCeRVGsCOwPXFOW5VjgmqF/m9HzCqxpbqxpfqxpfqxpft7gcSo71rQLWNOu4D41P9a0h8zy5awsy6fKsvzl0PHzwP3A4sBE4IyhbzsD2Lr5N5gRMi/WNDfWND/WND/WND9/x+NUbqxpd7Cm+XGfmh9r2kNmqyBIURRLA6sBtwKLlGWplY2/Z7rtMTtKM0ZbjCwEyyyzTIrJvhB37tZC0gUWWCDFZGOKNqWY/pSNJ1p8tIA4LsDswn4lfwWW7YWmSoFHq4YsBnE3dKVrL7jgghSThSQuiJSlKdpDo8VEn1dMD8sOFRd3dmFBes81bSpAEfdu2XfffYH6XieyjsR9i5TWjxaxWDRAfy8WT9HC+bhXRxfouabxutWWYoEg2RPiAl9ZwqJ9RzrH3xetNVoAfOSRR6bYMcccA3R9b7ieaarrjdY62e20Bw7A4YcfDtQXr8tmFot/yKocdYwL+FVIQfc7VIVAuryo/1X0aJySBrHPk40m2mbVlqI+suNES7jaZxxj4udwwgknAHWrWI+Kp/RMU6jsX9Fap6Id0Qaqhf/xvtY4Fm1FaqPRdhcLesnOqAJDMHI9O3we6JmmsiTHMUljuor3AKyzzjpAfSmCNI1FFdSWb7zxxhQ79NBD0/HM9vWMNO1VJktwtIzqMxyBVbRnfaqeH6OtTecX+73x48cD9edRaRotjLJC6j6H+vPCzOyHUWfZFaO1+v777wfqzxX62yMoANczTWVBjnZFtZHYTlXkJ1qVpWkcs1WM5ZxzzkmxkY7p8d6WNTHaTLV/3YYbbphi+vyj/Xp2GfHLWVEUrwEuBPYpy/IvL1rbVRZF0diLFUWxG7Bb09dMjVpvY02zYE3zY03zY03z89uRjlPWdMSMWFOwriPFmnYF96n5saY9pBjJzFBRFC8DLgWuKMvy/4ZiDwIblmX5VFEUiwHXl2X55ln8nlFP66l0OMBJJ50E1GdwXnjhhdr/oZoRiKWzmxbuxlkbzfTEQhWPPPII0PVZyduB+eihpp/97GfTsbIPcfGtMmtxNkaaNhVliTMMsZz72WefDdRn1DSr02V6pqkG1zjbteuuuwJVZgKat3xQm4xtU20ttue4uFgFAFQuG5pnH7tAzzSVHjEbvtlmmwH19qf22ZS1jBkdfd+0adNSLM6AfelLXwKqwirQsxL5PdNUzoDrrrsuxZTxjkUnmjIB0jL2g8o2xtLmRx11VDq+9tprgeaS/F3m9rIsx83uODUaTTULHV0Ayy23HFC/p5vaaZOLQe0v9iVxxnek23F0gVFpCqPTVSWsr7zyyhRTxidqKF2jHuoL45Yb+j2xEM2jjz46w+/pNdOfcXujqTISKkEOVcY3Xr/GqRiTmygWnVEbvfnmm1NsJIUlekBX+9TY/nbYYQeg3q5UXCpu96TMVLzX5SZSMTSAs846C6hneHPf601jZRzrhvl7PdNU2xLoOQeqsSs+W+o5MjrdtG3DySefnGJqs7nu8abCeDFzr7E0PpMN83x2e1mW45q+MMs1Z8X0szgVuF8vZkNcAmgDsMnAD2b1u8wssab5sab5sab5sabdwbrmx5rmx5rmx5rmx5r2iJFUa1wf2AnYqCiKO4f+mwB8BdikKIqHgHcN/duMnvmxprmxpvmxpvmxpvlZ0eNUdqxpF7CmXcF9an6saQ8Zka0x2x/rwIIXF7ArBbzBBhukWJNdUcR0rVLusXjAJz/5yXR85513An2xOAyb3pwZnWgaF1r/4AfTJ0BiUY9oc3oxTQtPp0yZkmJa/A5Ver5HFrFIzzWNxRJkx1txxRVTrElT3YPRDqKFu7Ftat8S6ItNTPRc01h8RvYu2cagWdMmC+O5554LVMUuoG576kP7FD3XNPad3/rWt4D64mvZRqMmWkDdZGGMBQF6ZK+dFT3TVLYgWW4BvvKV6c8ssXCNvi9anVQwRVYmqMY37f0GfbEwNjEqTWF0uuq+3nrrqiCcCiotscQSKaa+MNqRtT9ZtERPnToVaE37TJRlOaNHagSMRlNZr7bffvsUUyGpWNBGWsoiBlVRihhTobaWtM9Iz+5/FU2ZPHlyimnfsjg26Znz9ttvTzH1pbFwRNPSh5bQM01VIGn33XdPMT2bRluj7Pl6bofqPo9LQnrRPpusjpHhrKKjtjUaY4wxxhhjjOk+A5M5i2gBdlwsuNVWWwH1ghYqexmLJmjH+sceeyzF+rUQ+EX0fPY8opmKffbZJ8WkaVzoeNdddwH1BcU33HADUC/F2pKZtJ5rGmdPpOkuu+ySYiqXH/XRjKSyl1CVc25J24z0VVPN7sZS2ppRiyWDL7vsMgDuvffeFFPmpyVtM9KKdjpuXHUKyqqrDwW44447gHoxH8/yVsTiM3J6LLXUUvF3A/VsrrJjUcc+ZnBnRU8zZyLqqkILsRy5MpGxNLZiLdYy0cvMmYgZHT0zxcJpTcUrmgqvtJie3/+xSJU0jTpL05i5HYT2GeiZpuor47NnfLYXuudj/zkg7VM4c2aMMcYYY4wxbcYvZ8YYY4wxxhjTAgbS1jiH0ldb4xyKNc2PNc2PNc2PNc1PX2yNczr9sDXOBbTi/m/a03CA6aumTQU35mRNnTkzxhhjjDHGmBYwfK10Y4wxxhhjzGwzB2R2WsPcpqUzZ8YYY4wxxhjTAvxyZowxxhhjjDEtwC9nxhhjjDHGGNMC/HJmjDHGGGOMMS3AL2fGGGOMMcYY0wL8cmaMMcYYY4wxLcAvZ8YYY4wxxhjTAnq9z9k04G9D/58TWJh81zJmlD9nTYfHmk7HmuanLZo+lvlc+ok1zU8bNIU56/63pt2hDbpa0+GxptPpiaZFrzd2K4ritrIsx/X0j3aJtlxLW84jB225lracRw7aci1tOY8ctOla2nQundCm62jTuXRCm66jTefSCW26jjadS6e05Vrach45aMu1tOU8ctCra7Gt0RhjjDHGGGNagF/OjDHGGGOMMaYF9OPl7OQ+/M1u0ZZract55KAt19KW88hBW66lLeeRgzZdS5vOpRPadB1tOpdOaNN1tOlcOqFN19Gmc+mUtlxLW84jB225lracRw56ci09X3NmjDHGGGOMMWZGbGs0xhhjjDHGmBbQ05ezoig2L4riwaIophZFsX8v/3YnFEWxZFEU1xVFcV9RFPcWRfH/huILFkVxVVEUDw39f4E+nJs1zX9u1jT/uQ2kptBeXa1pV87LmuY/L2ua/7ysaXfObSB1tab56bumZVn25D9gHuA3wLLAvMBdwIq9+vsdnvtiwOpDx/MBvwZWBI4E9h+K7w98tcfnZU2tqTWdC3W1ptbUmlpTa2pdremcqWkvM2drAVPLsny4LMt/AecCE3v490dNWZZPlWX5y6Hj54H7gcWZfv5nDH3bGcDWPT41a5ofa5qfgdUUWqurNc2PNc2PNc2PNe0OA6urNc1PvzXt5cvZ4sBvw7+fGIoNFEVRLA2sBtwKLFKW5VNDX/o9sEiPT8ea5sea5meO0BRapas1zY81zY81zY817Q5zhK7WND/90NQFQWaDoiheA1wI7FOW5V/i18rpOU6XvpxNrGl+rGl3sK75sab5sab5sab5sab5sab56ZemvXw5exJYMvx7iaHYQFAUxcuY/gGdVZbl94fCfyiKYrGhry8GPN3j07Km+bGm+RloTaGVulrT/FjT/FjT/FjT7jDQulrT/PRT016+nP0CGFsUxTJFUcwLvB+4pId/f9QURVEApwL3l2X5f+FLlwCTh44nAz/o8alZ0/xY0/wMrKbQWl2taX6saX6saX6saXcYWF2taX76rmmuyiIj+Q+YwPSKJ78BDuzl3+7wvMczPXX5K+DOof8mAAsB1wAPAVcDC/bh3KypNbWmc6Gu1tSaWlNrak2tqzWd8zQthk7CGGOMMcYYY0wfcUEQY4wxxhhjjGkBfjkzxhhjjDHGmBbglzNjjDHGGGOMaQF+OTPGGGOMMcaYFuCXM2OMMcYYY4xpAX45M8YYY4wxxpgW4JczY4wxxhhjjGkBfjkzxhhjjDHGmBbw/wGFlXpFYVxRsgAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] @@ -282,15 +494,15 @@ " \n", " combined = model.decoder(encoding1 * alpha + encoding2 * beta)[0]\n", " \n", - " show_image(axes[i], combined)" + " show_image(axes[i], combined.cpu())" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "vel", "language": "python", - "name": "python3" + "name": "vel" }, "language_info": { "codemirror_mode": { @@ -302,7 +514,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.3" } }, "nbformat": 4, From 3c9a2ffe45d2e407666f12ecb16e5242de80dd08 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 26 Sep 2019 14:22:24 -0700 Subject: [PATCH 097/162] Significant refactoring of optimizers. --- .../cats_vs_dogs_resnet34.yaml | 1 - .../classification/mnist/mnist_cnn_01.yaml | 3 +- .../latent/mnist/mnist_cnn_iwae.yaml | 1 - .../latent/mnist/mnist_cnn_vae.yaml | 1 - .../latent/mnist/mnist_fc_iwae.yaml | 1 - .../latent/mnist/mnist_fc_vae.yaml | 1 - .../classification/imdb_sentiment_gru.yaml | 15 ++- .../mnist/mnist-autoencoder.ipynb | 59 +++++---- vel/api/__init__.py | 6 +- vel/api/info.py | 5 - vel/api/model.py | 60 +++++++-- vel/api/model_config.py | 71 +++++++---- vel/api/optimizer.py | 107 +++++++++++++++- vel/api/source.py | 55 ++++++++ vel/command/list_command.py | 33 +++++ vel/command/phase_train_command.py | 51 ++++---- vel/command/train_command.py | 61 +++++---- vel/data/bucket_loader.py | 68 ++++++++++ vel/data/source/nlp/imdb.py | 30 +++-- vel/data/source/nlp/text_url.py | 8 +- vel/internal/provider.py | 9 +- vel/launcher.py | 6 +- vel/metric/__init__.py | 4 +- vel/metric/base/__init__.py | 4 +- vel/metric/base/averaging_metric.py | 13 ++ vel/model/imagenet/resnet34.py | 6 +- vel/model/latent/cnn_iwae.py | 4 +- vel/model/latent/cnn_vae.py | 4 +- vel/model/latent/fc_iwae.py | 9 +- vel/model/latent/fc_vae.py | 8 +- vel/model/latent/iwae.py | 16 +-- vel/model/latent/vae_base.py | 16 +-- .../multilayer_rnn_sequence_classification.py | 9 +- vel/module/input/embedding.py | 8 +- vel/notebook/loader.py | 8 +- vel/optimizer/adadelta.py | 44 +++++-- vel/optimizer/adam.py | 59 ++++----- vel/optimizer/radam.py | 64 ++++------ vel/optimizer/ranger.py | 119 ++++++++++-------- vel/optimizer/rmsprop.py | 42 +++++-- vel/optimizer/rmsprop_tf.py | 42 +++++-- vel/optimizer/sgd.py | 52 +++++--- vel/rl/algo/distributional_dqn.py | 2 +- vel/rl/api/algo_base.py | 20 --- vel/rl/command/rl_train_command.py | 2 +- vel/scheduler/ladder.py | 4 +- vel/storage/streaming/stdout.py | 4 +- vel/train/phase/cycle.py | 17 +-- vel/train/phase/generic.py | 6 +- vel/train/train_phase.py | 6 +- vel/train/trainer.py | 43 ++----- vel/util/dataloader.py | 21 ++++ vel/util/module_util.py | 16 +++ 53 files changed, 858 insertions(+), 466 deletions(-) create mode 100644 vel/command/list_command.py create mode 100644 vel/data/bucket_loader.py create mode 100644 vel/util/dataloader.py diff --git a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml b/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml index 1b2ab425..a53623fb 100644 --- a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml +++ b/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml @@ -60,7 +60,6 @@ optimizer: lr: 0.01 weight_decay: 0.0 momentum: 0.9 - layer_groups: on commands: diff --git a/examples-configs/classification/mnist/mnist_cnn_01.yaml b/examples-configs/classification/mnist/mnist_cnn_01.yaml index aaa96cef..d11b5742 100644 --- a/examples-configs/classification/mnist/mnist_cnn_01.yaml +++ b/examples-configs/classification/mnist/mnist_cnn_01.yaml @@ -16,7 +16,7 @@ source: loader: name: vel.data.dataset_loader batch_size: 128 - num_workers: 4 +# num_workers: 4 transformations: - name: vel.data.transformation.image_to_tensor @@ -24,6 +24,7 @@ loader: optimizer: name: vel.optimizer.adadelta + max_grad_norm: 1.0 commands: diff --git a/examples-configs/latent/mnist/mnist_cnn_iwae.yaml b/examples-configs/latent/mnist/mnist_cnn_iwae.yaml index df3164fe..90cb5da7 100644 --- a/examples-configs/latent/mnist/mnist_cnn_iwae.yaml +++ b/examples-configs/latent/mnist/mnist_cnn_iwae.yaml @@ -8,7 +8,6 @@ model: img_channels: 1 channels: [64, 128, 256] representation_length: 50 - max_grad_norm: 1.0 analytical_kl_div: true k: 5 # It's hard to sample many samples for this slightly larger network diff --git a/examples-configs/latent/mnist/mnist_cnn_vae.yaml b/examples-configs/latent/mnist/mnist_cnn_vae.yaml index 76cc3f90..118ad430 100644 --- a/examples-configs/latent/mnist/mnist_cnn_vae.yaml +++ b/examples-configs/latent/mnist/mnist_cnn_vae.yaml @@ -8,7 +8,6 @@ model: img_channels: 1 channels: [64, 128, 256] representation_length: 50 - max_grad_norm: 1.0 analytical_kl_div: true diff --git a/examples-configs/latent/mnist/mnist_fc_iwae.yaml b/examples-configs/latent/mnist/mnist_fc_iwae.yaml index e4ca4abb..215906dd 100644 --- a/examples-configs/latent/mnist/mnist_fc_iwae.yaml +++ b/examples-configs/latent/mnist/mnist_fc_iwae.yaml @@ -8,7 +8,6 @@ model: img_channels: 1 layers: [200, 200] representation_length: 50 - max_grad_norm: 1.0 analytical_kl_div: true k: 50 # Because it's such a small network we can try many importance samples diff --git a/examples-configs/latent/mnist/mnist_fc_vae.yaml b/examples-configs/latent/mnist/mnist_fc_vae.yaml index 96653a6e..1fa51447 100644 --- a/examples-configs/latent/mnist/mnist_fc_vae.yaml +++ b/examples-configs/latent/mnist/mnist_fc_vae.yaml @@ -8,7 +8,6 @@ model: img_channels: 1 layers: [200, 200] representation_length: 50 - max_grad_norm: 1.0 analytical_kl_div: true diff --git a/examples-configs/nlp/classification/imdb_sentiment_gru.yaml b/examples-configs/nlp/classification/imdb_sentiment_gru.yaml index 1c486db6..3e85dac8 100644 --- a/examples-configs/nlp/classification/imdb_sentiment_gru.yaml +++ b/examples-configs/nlp/classification/imdb_sentiment_gru.yaml @@ -2,16 +2,21 @@ name: 'imdb_sentiment_gru' source: - name: vel.sources.nlp.imdb + name: vel.data.source.nlp.imdb vectors: "glove.6B.100d" # precomputed 100-dimensional embeddings + + +loader: + name: vel.data.bucket_loader batch_size: 32 + model: - name: vel.models.rnn.multilayer_rnn_sequence_classification + name: vel.model.rnn.multilayer_rnn_sequence_classification input_block: - name: vel.modules.input.embedding + name: vel.module.input.embedding alphabet_size: 25_002 # Size of the alphabet output_dim: 100 # Embedding dimension @@ -31,7 +36,7 @@ model: optimizer: - name: vel.optimizers.adam + name: vel.optimizer.adam lr: [1.0e-4, 1.0e-3, 1.0e-2, 1.0e-2] weight_decay: [0.0, 0.0001, 0.001, 0.001] epsilon: 1.0e-5 @@ -41,6 +46,6 @@ optimizer: commands: train: - name: vel.commands.train_command + name: vel.command.train_command max_grad_norm: 5.0 epochs: 10 diff --git a/examples-notebooks/autoencoders/mnist/mnist-autoencoder.ipynb b/examples-notebooks/autoencoders/mnist/mnist-autoencoder.ipynb index ad7a6b1e..ea642bf8 100644 --- a/examples-notebooks/autoencoders/mnist/mnist-autoencoder.ipynb +++ b/examples-notebooks/autoencoders/mnist/mnist-autoencoder.ipynb @@ -31,22 +31,14 @@ "metadata": {}, "outputs": [], "source": [ - "config = nb.load_config('examples-configs/autoencoders/mnist/mnist_cnn_autoencoder.yaml', run_number=4, device='cpu')" + "config = nb.load_config('examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml', run_number=1, device='cpu')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:Setting up a new session...\n" - ] - } - ], + "outputs": [], "source": [ "model = config.load_trained_model()" ] @@ -63,22 +55,23 @@ "MnistCnnAutoencoder(\n", " (encoder): Sequential(\n", " (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (1): ReLU(inplace)\n", + " (1): ReLU(inplace=True)\n", " (2): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", - " (3): ReLU(inplace)\n", + " (3): ReLU(inplace=True)\n", " (4): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", " (5): Flatten()\n", " (6): Linear(in_features=784, out_features=16, bias=True)\n", " )\n", " (decoder): Sequential(\n", " (0): Linear(in_features=16, out_features=784, bias=True)\n", - " (1): ReLU(inplace)\n", - " (2): Reshape()\n", + " (1): ReLU(inplace=True)\n", + " (2): Reshape(sizes=(16, 7, 7), batch_dims=1)\n", " (3): ConvTranspose2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))\n", - " (4): ReLU(inplace)\n", + " (4): ReLU(inplace=True)\n", " (5): ConvTranspose2d(16, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))\n", - " (6): ReLU(inplace)\n", + " (6): ReLU(inplace=True)\n", " (7): ConvTranspose2d(8, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (8): Sigmoid()\n", " )\n", ")\n", "----------------------------------------------------------------------------------------------------\n", @@ -97,8 +90,10 @@ "metadata": {}, "outputs": [], "source": [ - "data_source = config.provide('source')\n", - "train_dataset = data_source.train_dataset" + "data_loader = config.provide('loader')\n", + "data_source = data_loader.transformed_source\n", + "train_dataset = data_source.train\n", + "validation_dataset = data_source.validation" ] }, { @@ -108,7 +103,7 @@ "outputs": [], "source": [ "def get_sample(idx):\n", - " return train_dataset[idx][0]" + " return train_dataset[idx]['x'].to(config.device)" ] }, { @@ -118,7 +113,7 @@ "outputs": [], "source": [ "def show_image(axis, sample):\n", - " axis.imshow(train_dataset.denormalize(sample)[:, :, 0], cmap='gray')" + " axis.imshow(train_dataset.denormalize_item(sample, 'x'), cmap='gray')" ] }, { @@ -128,7 +123,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -154,7 +149,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -178,12 +173,12 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -206,17 +201,19 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "tensor([[-0.9777, -1.3779, -0.6812, 2.4773, 2.3612, 0.1038, 0.8307, -2.4117,\n", - " -1.0913, 1.0372, -2.3588, -0.2581, -1.2573, 0.8061, -1.3952, 2.1415]])" + "tensor([[-7.0867e-03, 9.9738e+00, -6.5140e+00, 4.5889e+00, -4.8899e+00,\n", + " 3.6933e-01, -2.5673e+00, 1.4296e+01, 8.0464e-01, 1.1216e+01,\n", + " 6.2219e+00, 3.5970e+00, -1.4565e+00, -1.0004e+01, -4.6819e+00,\n", + " 1.0152e-01]])" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -227,12 +224,12 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -280,7 +277,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.3" } }, "nbformat": 4, diff --git a/vel/api/__init__.py b/vel/api/__init__.py index 89d2026f..358a356f 100644 --- a/vel/api/__init__.py +++ b/vel/api/__init__.py @@ -1,13 +1,13 @@ from .callback import Callback from .info import BatchInfo, EpochInfo, TrainingInfo from .model import ( - Model, GradientModel, LossFunctionModel, BackboneModel, LinearBackboneModel + Model, OptimizedModel, GradientModel, LossFunctionModel, BackboneModel, LinearBackboneModel ) from .model_config import ModelConfig from .model_factory import ModelFactory -from .optimizer import OptimizerFactory +from .optimizer import OptimizerFactory, VelOptimizer, VelOptimizerProxy from .schedule import Schedule from .scheduler import SchedulerFactory -from .source import Source +from .source import Source, LanguageSource from .storage import Storage from .transformation import Transformation, ScopedTransformation diff --git a/vel/api/info.py b/vel/api/info.py index 11544cff..1f0a765b 100644 --- a/vel/api/info.py +++ b/vel/api/info.py @@ -41,16 +41,11 @@ def __init__(self, start_epoch_idx=0, metrics=None, callbacks=None): self.callbacks = callbacks if callbacks is not None else [] self.history = TrainingHistory() - self.optimizer_initial_state = None - def restore(self, hidden_state): """ Restore any state from checkpoint - currently not implemented but possible to do so in the future """ for callback in self.callbacks: callback.load_state_dict(self, hidden_state) - if 'optimizer' in hidden_state: - self.optimizer_initial_state = hidden_state['optimizer'] - def initialize(self): """ Runs for the first time a training process is started from scratch. Is guaranteed to be run only once diff --git a/vel/api/model.py b/vel/api/model.py index 9cb31443..9032e098 100644 --- a/vel/api/model.py +++ b/vel/api/model.py @@ -3,6 +3,9 @@ import vel.util.module_util as mu +from vel.api.optimizer import VelOptimizer, OptimizerFactory +from vel.api.scheduler import SchedulerFactory +from vel.api.callback import Callback from vel.metric.loss_metric import Loss from vel.util.summary import summary @@ -45,10 +48,6 @@ def summary(self, input_size=None): else: summary(self, input_size) - def get_layer_groups(self): - """ Return layers grouped for optimization purposes """ - return [self] - def reset_weights(self): """ Call proper initializers for the weights """ pass @@ -63,13 +62,60 @@ def zero_state(self, batch_size): return None -class GradientModel(Model): - """ Model for a supervised learning problem """ +class OptimizedModel(Model): + """ Model that is being optimized by an 'optimizer' """ + + def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: + """ Create optimizer for the purpose of optimizing this model """ + parameters = filter(lambda p: p.requires_grad, self.parameters()) + return optimizer_factory.instantiate(parameters) + + def optimize(self, data: dict, optimizer: VelOptimizer) -> dict: + """ + Perform one step of optimization of the model + :returns a dictionary of metrics + """ + raise NotImplementedError + + def validate(self, data: dict) -> dict: + """ + Perform one step of model inference without optimization + :returns a dictionary of metrics + """ + raise NotImplementedError + + +class GradientModel(OptimizedModel): + """ Model that calculates a single gradient and optimizes it """ + + def optimize(self, data: dict, optimizer: VelOptimizer) -> dict: + """ + Perform one step of optimization of the model + :returns a dictionary of metrics + """ + optimizer.zero_grad() + + metrics = self.calculate_gradient(data) + + opt_metrics = optimizer.step() + + for key, value in opt_metrics.items(): + metrics[key] = value + + return metrics + + @torch.no_grad() + def validate(self, data: dict) -> dict: + """ + Perform one step of model inference without optimization + :returns a dictionary of metrics + """ + return self.calculate_gradient(data) def calculate_gradient(self, data: dict) -> dict: """ Calculate gradient for given batch of training data. - Returns a dictionary of metrics. + :returns a dictionary of metrics """ raise NotImplementedError diff --git a/vel/api/model_config.py b/vel/api/model_config.py index b593e878..62bcbdbc 100644 --- a/vel/api/model_config.py +++ b/vel/api/model_config.py @@ -41,7 +41,7 @@ def from_project_directory(path) -> str: return os.path.join(ModelConfig.find_project_directory('.'), path) @classmethod - def from_file(cls, filename: str, run_number: int = 1, continue_training: bool = False, seed: int = None, + def from_file(cls, filename: str, run_number: int = 1, resume_training: bool = False, seed: int = None, device: str = 'cuda', parameters: typing.Optional[dict] = None, tag: typing.Optional[str] = None): """ Create model config from file """ with open(filename, 'r') as fp: @@ -62,7 +62,7 @@ def from_file(cls, filename: str, run_number: int = 1, continue_training: bool = configuration=aggregate_dictionary, run_number=run_number, project_dir=project_config_path, - continue_training=continue_training, + resume_training=resume_training, seed=seed, device=device, parameters=parameters, @@ -71,7 +71,7 @@ def from_file(cls, filename: str, run_number: int = 1, continue_training: bool = @classmethod def script(cls, model_name: str = 'script', configuration: typing.Optional[dict] = None, run_number: int = 1, - continue_training=False, seed: int = None, device: str = 'cuda', + resume_training=False, seed: int = None, device: str = 'cuda', parameters: typing.Optional[dict] = None, tag: typing.Optional[str] = None): """ Create model config from supplied data """ if configuration is None: @@ -94,7 +94,7 @@ def script(cls, model_name: str = 'script', configuration: typing.Optional[dict] configuration=aggregate_dictionary, run_number=run_number, project_dir=project_config_path, - continue_training=continue_training, + resume_training=resume_training, seed=seed, device=device, parameters=parameters, @@ -102,23 +102,29 @@ def script(cls, model_name: str = 'script', configuration: typing.Optional[dict] ) def __init__(self, filename: str, configuration: dict, run_number: int, project_dir: str, - continue_training=False, seed: int = None, device: str = 'cuda', + resume_training=False, seed: int = None, device: str = 'cuda', parameters: typing.Optional[dict] = None, tag: typing.Optional[str] = None): self.filename = filename self.device = device - self.continue_training = continue_training + self.resume_training = resume_training self.run_number = run_number self.seed = seed if seed is not None else (dtm.date.today().year + self.run_number) self.contents = configuration self.project_dir = os.path.normpath(project_dir) - self.command_descriptors = self.contents.get('commands', []) + self.command_descriptors = { + **self.contents.get('global_commands', {}), + **self.contents.get('commands', {}) + } # This one is special and needs to get removed if 'commands' in self.contents: del self.contents['commands'] + if 'global_commands' in self.contents: + del self.contents['global_commands'] + self.provider = Provider(self._prepare_environment(), {'model_config': self}, parameters=parameters) if self.provider.has_name('output_directory'): @@ -128,18 +134,19 @@ def __init__(self, filename: str, configuration: dict, run_number: int, project_ self._model_name = self.provider.get("name") - if continue_training: + if self.meta_exists(): self._meta = self._load_meta() - if tag is None: - self._tag = self._meta['tag'] - else: - if self._tag != self._meta['tag']: + if resume_training: + if (tag is not None) and (tag != self._meta['tag']): raise VelInitializationException("Model tag mismatch") + else: + self._tag = self._meta['tag'] + else: + self._tag = tag else: self._tag = tag - self._meta = self._create_meta() - self._write_meta() + self._meta = None #################################################################################################################### # INTERNAL FUNCTIONS @@ -149,19 +156,12 @@ def _prepare_environment(self) -> dict: def _load_meta(self) -> dict: """ Load previously written metadata about the project """ - if not os.path.exists(self.meta_dir(self.META_FILE_NAME)): + if not self.meta_exists(): raise VelInitializationException("Previous run does not exist") with open(self.meta_dir(self.META_FILE_NAME), 'rt') as fp: return json.load(fp) - def _write_meta(self) -> None: - """ Write metadata to a file """ - pathlib.Path(self.meta_dir()).mkdir(parents=True, exist_ok=True) - - with open(self.meta_dir(self.META_FILE_NAME), 'wt') as fp: - return json.dump(self.meta, fp) - def _create_meta(self) -> dict: """ Metadata for this model/config """ return { @@ -171,6 +171,26 @@ def _create_meta(self) -> dict: 'config': self.render_configuration() } + #################################################################################################################### + # Metadata handling + def meta_exists(self): + """ If metadata file exists for this config """ + return os.path.exists(self.meta_dir(self.META_FILE_NAME)) + + def enforce_meta(self): + """ Make sure metadata exists for this config """ + if self._meta is None: + raise VelInitializationException("Given model has not been initialized") + + def write_meta(self) -> None: + """ Write metadata to a file """ + self._meta = self._create_meta() + + pathlib.Path(self.meta_dir()).mkdir(parents=True, exist_ok=True) + + with open(self.meta_dir(self.META_FILE_NAME), 'wt') as fp: + return json.dump(self.meta, fp) + #################################################################################################################### # COMMAND UTILITIES def get_command(self, command_name): @@ -223,6 +243,7 @@ def name(self) -> str: @property def meta(self) -> dict: """ Return name of the model """ + self.enforce_meta() return self._meta @property @@ -239,7 +260,7 @@ def torch_device(self): def render_configuration(self) -> dict: """ Return a nice and picklable run configuration """ - return self.provider.render_configuration() + return self.provider.render_environment() #################################################################################################################### # PROVIDER API @@ -300,9 +321,7 @@ def load_trained_model(self): if last_epoch_idx == 0: raise VelInitializationException("No trained model available") - training_info = TrainingInfo( - start_epoch_idx=last_epoch_idx, - ) + training_info = TrainingInfo(start_epoch_idx=last_epoch_idx) model_state, hidden_state = storage.load(training_info) diff --git a/vel/api/optimizer.py b/vel/api/optimizer.py index 0f677f19..82f56825 100644 --- a/vel/api/optimizer.py +++ b/vel/api/optimizer.py @@ -1,10 +1,111 @@ -from vel.api import Model +import typing +import itertools as it -from torch.optim import Optimizer +from torch.nn.utils import clip_grad_norm_ +from torch.optim.optimizer import Optimizer +from vel.metric import DefaultAveragingNamedMetric +from vel.api.callback import Callback +from vel.api.scheduler import SchedulerFactory + + +class VelOptimizer: + """ Vel optimizer interface """ + + def get_lr(self) -> float: + """ Return current learning rate of the optimizer """ + raise NotImplementedError + + def set_lr(self, lr: float): + """ Set current learning rate of the optimizer """ + raise NotImplementedError + + def state_dict(self) -> dict: + raise NotImplementedError + + def load_state_dict(self, state_dict: dict) -> None: + raise NotImplementedError + + def zero_grad(self) -> None: + raise NotImplementedError + + def step(self, closure=None) -> dict: + raise NotImplementedError + + def add_param_group(self, param_group: dict) -> None: + raise NotImplementedError + + def metrics(self) -> list: + """ Set of metrics for this model """ + return [] + + def create_scheduler(self, scheduler_factory: SchedulerFactory, last_epoch: int = -1) -> [Callback]: + """ Create a scheduler instance for this optimizer """ + raise NotImplementedError + + +class VelOptimizerProxy(VelOptimizer): + """ Proxy PyTorch optimizer into a Vel optimizer """ + def __init__(self, optimizer: Optimizer, max_grad_norm: typing.Optional[float] = None): + self.optimizer = optimizer + self.max_grad_norm = max_grad_norm + + def get_lr(self) -> float: + """ Return current learning rate of the optimizer """ + return self.optimizer.param_groups[-1]['lr'] + + def set_lr(self, lr: float): + """ Set current learning rate of the optimizer """ + if isinstance(lr, list): + for group_lr, param_group in zip(lr, self.optimizer.param_groups): + param_group['lr'] = group_lr + else: + for param_group in self.optimizer.param_groups: + param_group['lr'] = lr + + def state_dict(self) -> dict: + return self.optimizer.state_dict() + + def load_state_dict(self, state_dict: dict) -> None: + self.optimizer.load_state_dict(state_dict) + + def zero_grad(self) -> None: + self.optimizer.zero_grad() + + def step(self, closure=None) -> dict: + # TODO(jerry): potentially allow max_grad_norm being a list? + if self.max_grad_norm is not None: + grad_norm = clip_grad_norm_( + parameters=it.chain.from_iterable(pg['params'] for pg in self.optimizer.param_groups), + max_norm=self.max_grad_norm + ) + self.optimizer.step(closure) + return {'grad_norm': grad_norm} + else: + self.optimizer.step(closure) + return {} + + def add_param_group(self, param_group: dict) -> None: + self.optimizer.add_param_group(param_group) + + def metrics(self) -> list: + """ Set of metrics for this model """ + if self.max_grad_norm is not None: + return [ + DefaultAveragingNamedMetric('grad_norm', scope="opt", defaut_value=0.0) + ] + else: + return [] + + def create_scheduler(self, scheduler_factory: SchedulerFactory, last_epoch: int = -1) -> [Callback]: + """ Create a scheduler instance for this optimizer """ + return [scheduler_factory.instantiate(self.optimizer, last_epoch=last_epoch)] class OptimizerFactory: """ Base class for optimizer factories """ - def instantiate(self, model: Model) -> Optimizer: + def instantiate(self, parameters) -> VelOptimizer: + raise NotImplementedError + + def instantiate_parameter_groups(self, parameters) -> VelOptimizer: raise NotImplementedError diff --git a/vel/api/source.py b/vel/api/source.py index fb02e7e6..892f65a7 100644 --- a/vel/api/source.py +++ b/vel/api/source.py @@ -17,3 +17,58 @@ def __init__(self, train: data.Dataset, validation: data.Dataset, self.test = test self.metadata = {} if metadata is None else metadata + + +class LanguageSource(Source): + """ Special source for language datasets that handles dictionaries/encodings """ + + def __init__(self, train: data.Dataset, validation: data.Dataset, + fields: dict, mapping: dict, + test: typing.Optional[data.Dataset] = None, metadata: typing.Optional[dict] = None): + super().__init__(train, validation, test, metadata) + + self.fields = fields + self.mapping = mapping + + +# class SupervisedTextData(Source): +# """ An NLP torchtext data source """ +# def __init__(self, train_source, val_source, train_iterator, val_iterator, data_field, target_field): +# super().__init__() +# +# self.train_source = train_source +# self.val_source = val_source +# self.train_iterator = train_iterator +# self.val_iterator = val_iterator +# self.data_field = data_field +# self.target_field = target_field +# +# @property +# def train_loader(self): +# """ PyTorch loader of training data """ +# return self.train_iterator +# +# @property +# def val_loader(self): +# """ PyTorch loader of validation data """ +# return self.val_iterator +# +# @property +# def train_dataset(self): +# """ Return the training dataset """ +# return self.train_source +# +# @property +# def val_dataset(self): +# """ Return the validation dataset """ +# return self.val_source +# +# @property +# def train_iterations_per_epoch(self): +# """ Return number of iterations per epoch """ +# return len(self.train_iterator) +# +# @property +# def val_iterations_per_epoch(self): +# """ Return number of iterations per epoch - validation """ +# return len(self.val_iterator) diff --git a/vel/command/list_command.py b/vel/command/list_command.py new file mode 100644 index 00000000..11e37df8 --- /dev/null +++ b/vel/command/list_command.py @@ -0,0 +1,33 @@ +import os +import os.path +import glob +import json + +from vel.api import ModelConfig + + +class ListCommand: + """ List trained models for given config and their basic metadata """ + + def __init__(self, model_config: ModelConfig): + self.model_config = model_config + + def run(self): + meta_dir = self.model_config.output_dir('meta', self.model_config.name) + meta_paths = os.path.join(meta_dir, '*', 'meta.json') + + for path in sorted(glob.glob(meta_paths)): + with open(path, 'rt') as fp: + meta = json.load(fp) + + print("-" * 80) + print("Run name: {}".format(meta['run_name'])) + print("Tag: {}".format(meta['tag'])) + print("Created: {}".format(meta['created'])) + + +def create(model_config): + """ Vel factory function """ + return ListCommand( + model_config=model_config, + ) diff --git a/vel/command/phase_train_command.py b/vel/command/phase_train_command.py index 2318d7bf..0fb91197 100644 --- a/vel/command/phase_train_command.py +++ b/vel/command/phase_train_command.py @@ -56,27 +56,21 @@ def _select_phase_right_bound(self, epoch_number): def run(self): """ Run the command with supplied configuration """ device = self.model_config.torch_device() - learner = train.Trainer(device, self.model_factory.instantiate()) - - # All callbacks useful for learning - callbacks = self.gather_callbacks() - - # Metrics to track through this training - metrics = learner.metrics() + [SamplesPerSec()] + trainer = train.Trainer(device, self.model_factory.instantiate()) # Check if training was already started and potentially continue where we left off - training_info, hidden_state = self.resume_training(learner, callbacks, metrics) + training_info, hidden_state = self.start_training(trainer) # Prepare current training phase current_phase_idx = self._select_phase_left_bound(training_info.start_epoch_idx) current_phase = self.phases[current_phase_idx] local_idx = training_info.start_epoch_idx - self.ladder[current_phase_idx] - current_phase.set_up_phase(training_info, learner.model, self.loader) + current_phase.set_up_phase(training_info, trainer.model, self.loader) print(current_phase.banner()) if training_info.start_epoch_idx > 0: - current_phase.restore(training_info, local_idx, learner.model, hidden_state) + current_phase.restore(training_info, local_idx, trainer.model, hidden_state) training_info.on_train_begin() @@ -86,46 +80,46 @@ def run(self): # Phase preparations while current_phase_idx != iteration_phase_idx: - current_phase.tear_down_phase(training_info, learner.model) + current_phase.tear_down_phase(training_info, trainer.model) current_phase_idx += 1 current_phase = self.phases[current_phase_idx] - current_phase.set_up_phase(training_info, learner.model, self.loader) + current_phase.set_up_phase(training_info, trainer.model, self.loader) print(current_phase.banner()) # Create epoch info epoch_info = current_phase.epoch_info(training_info, global_epoch_idx, local_idx) # Execute learning - current_phase.execute_epoch(epoch_info, learner) + current_phase.execute_epoch(epoch_info, trainer) # Epoch checkpoint - self.storage.checkpoint(epoch_info, learner.model) + self.storage.checkpoint(epoch_info, trainer.model) # Tear down the last phase if current_phase is not None: - current_phase.tear_down_phase(training_info, learner.model) + current_phase.tear_down_phase(training_info, trainer.model) training_info.on_train_end() return training_info - def gather_callbacks(self) -> list: - """ Gather all the callbacks to be used in this training run """ + def start_training(self, trainer) -> (api.TrainingInfo, dict): + """ Possibly resume training from a saved state from the storage """ + if self.model_config.resume_training: + start_epoch = self.storage.last_epoch_idx() + else: + start_epoch = 0 + + # Initial set of callbacks, always useful callbacks = [TimeTracker(), SampleTracker()] callbacks.extend(self.callbacks) callbacks.extend(self.storage.streaming_callbacks()) - return callbacks - - def resume_training(self, learner, callbacks, metrics) -> (api.TrainingInfo, dict): - """ Possibly resume training from a saved state from the storage """ - if self.model_config.continue_training: - start_epoch = self.storage.last_epoch_idx() - else: - start_epoch = 0 + # Metrics to track through this training + metrics = trainer.metrics() + [SamplesPerSec()] training_info = api.TrainingInfo( start_epoch_idx=start_epoch, @@ -134,13 +128,16 @@ def resume_training(self, learner, callbacks, metrics) -> (api.TrainingInfo, dic ) if start_epoch == 0: + self.model_config.write_meta() self.storage.reset(self.model_config.render_configuration()) training_info.initialize() - learner.initialize_training(training_info) + trainer.initialize_training(training_info) hidden_state = None else: model_state, hidden_state = self.storage.load(training_info) - learner.initialize_training(training_info, model_state, hidden_state) + training_info.restore(hidden_state) + + trainer.initialize_training(training_info, model_state, hidden_state) return training_info, hidden_state diff --git a/vel/command/train_command.py b/vel/command/train_command.py index 5d3ca3ce..a504f9c7 100644 --- a/vel/command/train_command.py +++ b/vel/command/train_command.py @@ -14,8 +14,8 @@ class SimpleTrainCommand: def __init__(self, epochs: int, model_config: api.ModelConfig, model_factory: api.ModelFactory, optimizer_factory: api.OptimizerFactory, scheduler_factory: typing.Optional[api.SchedulerFactory], - loader: data.DatasetLoader, storage: api.Storage, callbacks: typing.Optional[typing.List[api.Callback]], - max_grad_norm: typing.Optional[float]): + loader: data.DatasetLoader, storage: api.Storage, + callbacks: typing.Optional[typing.List[api.Callback]]): self.epochs = epochs self.model_config = model_config self.model_factory = model_factory @@ -26,29 +26,19 @@ def __init__(self, epochs: int, model_config: api.ModelConfig, model_factory: ap self.loader = loader self.storage = storage self.callbacks = callbacks if callbacks is not None else [] - self.max_grad_norm = max_grad_norm def run(self): """ Run the command with supplied configuration """ device = self.model_config.torch_device() - learner = train.Trainer(device, self.model_factory.instantiate(), self.max_grad_norm) - optimizer = self.optimizer_factory.instantiate(learner.model) - - # All callbacks used for learning - callbacks = self.gather_callbacks(optimizer) - - # Metrics to track through this training - metrics = learner.metrics() + [SamplesPerSec()] + trainer = train.Trainer(device, self.model_factory.instantiate()) + optimizer = trainer.model.create_optimizer(self.optimizer_factory) # Check if training was already started and potentially continue where we left off - training_info = self.resume_training(learner, callbacks, metrics) + training_info = self.start_training(trainer, optimizer) training_info.on_train_begin() - if training_info.optimizer_initial_state: - optimizer.load_state_dict(training_info.optimizer_initial_state) - for global_epoch_idx in range(training_info.start_epoch_idx + 1, self.epochs + 1): epoch_info = api.EpochInfo( training_info=training_info, @@ -58,32 +48,34 @@ def run(self): ) # Execute learning - learner.run_epoch(epoch_info, self.loader) + trainer.run_epoch(epoch_info, self.loader) - self.storage.checkpoint(epoch_info, learner.model) + self.storage.checkpoint(epoch_info, trainer.model) training_info.on_train_end() return training_info - def gather_callbacks(self, optimizer) -> list: - """ Gather all the callbacks to be used in this training run """ + def start_training(self, trainer: train.Trainer, optimizer: api.VelOptimizer) -> api.TrainingInfo: + """ Possibly resume training from a saved state from the storage """ + if self.model_config.resume_training: + start_epoch = self.storage.last_epoch_idx() + else: + start_epoch = 0 + + # Initial set of callbacks, always useful callbacks = [TimeTracker(), SampleTracker()] if self.scheduler_factory is not None: - callbacks.append(self.scheduler_factory.instantiate(optimizer)) + callbacks.extend( + optimizer.create_scheduler(scheduler_factory=self.scheduler_factory, last_epoch=start_epoch-1) + ) callbacks.extend(self.callbacks) callbacks.extend(self.storage.streaming_callbacks()) - return callbacks - - def resume_training(self, learner, callbacks, metrics) -> api.TrainingInfo: - """ Possibly resume training from a saved state from the storage """ - if self.model_config.continue_training: - start_epoch = self.storage.last_epoch_idx() - else: - start_epoch = 0 + # Metrics to track through this training + metrics = trainer.metrics() + optimizer.metrics() + [SamplesPerSec()] training_info = api.TrainingInfo( start_epoch_idx=start_epoch, @@ -92,17 +84,23 @@ def resume_training(self, learner, callbacks, metrics) -> api.TrainingInfo: ) if start_epoch == 0: + self.model_config.write_meta() self.storage.reset(self.model_config.render_configuration()) training_info.initialize() - learner.initialize_training(training_info) + trainer.initialize_training(training_info) else: model_state, hidden_state = self.storage.load(training_info) - learner.initialize_training(training_info, model_state, hidden_state) + + training_info.restore(hidden_state) + trainer.initialize_training(training_info, model_state, hidden_state) + + if 'optimizer' in hidden_state: + optimizer.load_state_dict(hidden_state['optimizer']) return training_info -def create(model_config, epochs, optimizer, model, loader, storage, scheduler=None, callbacks=None, max_grad_norm=None): +def create(model_config, epochs, optimizer, model, loader, storage, scheduler=None, callbacks=None): """ Vel factory function """ return SimpleTrainCommand( epochs=epochs, @@ -113,5 +111,4 @@ def create(model_config, epochs, optimizer, model, loader, storage, scheduler=No loader=loader, storage=storage, callbacks=callbacks, - max_grad_norm=max_grad_norm ) diff --git a/vel/data/bucket_loader.py b/vel/data/bucket_loader.py new file mode 100644 index 00000000..46f70a4b --- /dev/null +++ b/vel/data/bucket_loader.py @@ -0,0 +1,68 @@ +import torchtext.data as data + +from vel.util.dataloader import IteratorDictWrapper +from vel.api import LanguageSource, ModelConfig + + +class BucketLoader: + """ Loads sequence data from a source and batches together examples of similar length """ + + def __init__(self, model_config: ModelConfig, source: LanguageSource, batch_size: int): + self.source = source + self.batch_size = batch_size + + if self.source.test is None: + self.train_loader, self.val_loader = data.BucketIterator.splits( + (self.source.train, self.source.validation), + batch_size=batch_size, + device=model_config.torch_device(), + shuffle=True + ) + self.test_loader = None + else: + self.train_loader, self.val_loader, self.test_loader = data.BucketIterator.splits( + (self.source.train, self.source.validation, self.source.test), + batch_size=batch_size, + device=model_config.torch_device(), + shuffle=True + ) + + self.train_loader = IteratorDictWrapper(self.train_loader, self.source.mapping) + self.val_loader = IteratorDictWrapper(self.val_loader, self.source.mapping) + + if self.test_loader: + self.test_loader = IteratorDictWrapper(self.test_loader, self.source.mapping) + + self._loaders = { + 'train': self.train_loader, + 'val': self.val_loader, + 'test': self.test_loader + } + + self._loader_sizes = { + 'train': len(self.train_loader), + 'val': len(self.val_loader), + 'test': 0 if self.test_loader is None else len(self.test_loader) + } + + def __getitem__(self, item): + return self._loaders[item] + + @property + def loader(self): + """ Get a dict of loaders """ + return self._loaders + + @property + def size(self): + """ Get a dict of sizes of each loader """ + return self._loader_sizes + + +def create(model_config: ModelConfig, source: LanguageSource, batch_size: int): + """ Vel factory function """ + return BucketLoader( + model_config=model_config, + source=source, + batch_size=batch_size, + ) diff --git a/vel/data/source/nlp/imdb.py b/vel/data/source/nlp/imdb.py index ff351b0a..71168756 100644 --- a/vel/data/source/nlp/imdb.py +++ b/vel/data/source/nlp/imdb.py @@ -7,7 +7,7 @@ import torchtext.datasets as ds -from vel.api import SupervisedTextData +from vel.api import LanguageSource class IMDBCached(ds.IMDB): @@ -45,7 +45,7 @@ def __init__(self, path, text_field, label_field, **kwargs): data.Dataset.__init__(self, examples, fields, **kwargs) -def create(model_config, batch_size, data_dir='imdb', vectors=None): +def create(model_config, data_dir='imdb', vectors=None): """ Create an IMDB dataset """ path = model_config.data_dir(data_dir) @@ -61,13 +61,23 @@ def create(model_config, batch_size, data_dir='imdb', vectors=None): text_field.build_vocab(train_source, max_size=25_000, vectors=vectors) label_field.build_vocab(train_source) - train_iterator, test_iterator = data.BucketIterator.splits( - (train_source, test_source), - batch_size=batch_size, - device=model_config.torch_device(), - shuffle=True + return LanguageSource( + train_source, + test_source, + fields=train_source.fields, + mapping={ + 'x': 'text', + 'y': 'label' + } ) - return SupervisedTextData( - train_source, test_source, train_iterator, test_iterator, text_field, label_field - ) + # train_iterator, test_iterator = data.BucketIterator.splits( + # (train_source, test_source), + # batch_size=batch_size, + # device=model_config.torch_device(), + # shuffle=True + # ) + + # return SupervisedTextData( + # train_source, test_source, train_iterator, test_iterator, text_field, label_field + # ) diff --git a/vel/data/source/nlp/text_url.py b/vel/data/source/nlp/text_url.py index fea44f95..cadafecd 100644 --- a/vel/data/source/nlp/text_url.py +++ b/vel/data/source/nlp/text_url.py @@ -36,6 +36,12 @@ def __init__(self, url, absolute_data_path, train_val_split=0.8): } ) + def encode_character(self, char): + return self.data_dict['character_to_index'][char] + + def decode_character(self, index): + return self.data_dict['index_to_character'][index] + def download(self) -> dict: """ Make sure data file is downloaded and stored properly """ if not os.path.exists(self.data_path): @@ -86,4 +92,4 @@ def create(model_config, url, local_dir, train_val_split=0.8): url, absolute_data_path=local_dir, train_val_split=train_val_split, -) + ) diff --git a/vel/internal/provider.py b/vel/internal/provider.py index e1060f2d..79921125 100644 --- a/vel/internal/provider.py +++ b/vel/internal/provider.py @@ -77,11 +77,12 @@ def instantiate_from_data(self, object_data): else: return object_data - def render_configuration(self, configuration=None): - """ Render variables in configuration object but don't instantiate anything """ - if configuration is None: - configuration = self.environment + def render_environment(self): + """ Render variables in current environment """ + return self.render_configuration(self.environment) + def render_configuration(self, configuration): + """ Render variables in configuration object but don't instantiate anything """ if isinstance(configuration, dict): return {k: self.render_configuration(v) for k, v in configuration.items()} elif isinstance(configuration, list): diff --git a/vel/launcher.py b/vel/launcher.py index 18a4f687..8b537415 100644 --- a/vel/launcher.py +++ b/vel/launcher.py @@ -24,7 +24,7 @@ def main(): help="Configuration parameters" ) parser.add_argument( - '--continue', action='store_true', default=False, help="Continue previously started learning process" + '--resume', action='store_true', default=False, help="Resume previously started learning process" ) parser.add_argument( '--profile', type=str, default=None, help="Profiler output" @@ -38,8 +38,8 @@ def main(): warnings.filterwarnings('error', module='torch\\..*') model_config = ModelConfig.from_file( - args.config, args.run_number, continue_training=getattr(args, 'continue'), device=args.device, seed=args.seed, - parameters={k: v for (k, v) in (Parser.parse_equality(eq) for eq in args.param)} + args.config, args.run_number, resume_training=args.resume, device=args.device, seed=args.seed, + parameters={k: v for (k, v) in (Parser.parse_equality(eq) for eq in args.param)}, tag=args.tag ) if model_config.project_dir not in sys.path: diff --git a/vel/metric/__init__.py b/vel/metric/__init__.py index be14e6c4..29d9f310 100644 --- a/vel/metric/__init__.py +++ b/vel/metric/__init__.py @@ -1,3 +1,5 @@ from .base.base_metric import BaseMetric, MetricKey # noqa -from .base.averaging_metric import AveragingMetric, AveragingNamedMetric, AveragingSupervisedMetric # noqa +from .base.averaging_metric import ( + AveragingMetric, AveragingNamedMetric, AveragingSupervisedMetric, DefaultAveragingNamedMetric # noqa +) from .base.value_metric import ValueMetric # noqa diff --git a/vel/metric/base/__init__.py b/vel/metric/base/__init__.py index 20f27c6d..a50c4f49 100644 --- a/vel/metric/base/__init__.py +++ b/vel/metric/base/__init__.py @@ -1,4 +1,4 @@ -from .averaging_metric import AveragingSupervisedMetric, AveragingNamedMetric, AveragingMetric +from .averaging_metric import AveragingSupervisedMetric, AveragingNamedMetric, AveragingMetric, DefaultAveragingNamedMetric from .base_metric import BaseMetric, MetricKey from .summing_metric import SummingMetric, SummingNamedMetric -from .value_metric import ValueMetric \ No newline at end of file +from .value_metric import ValueMetric diff --git a/vel/metric/base/averaging_metric.py b/vel/metric/base/averaging_metric.py index 2355b7fd..ae535fa7 100644 --- a/vel/metric/base/averaging_metric.py +++ b/vel/metric/base/averaging_metric.py @@ -36,6 +36,19 @@ def _value_function(self, batch_info): return batch_info[self.name] +class DefaultAveragingNamedMetric(AveragingNamedMetric): + """ AveragingNamedMetric that has a default value in case a metric is not found in the batch """ + def __init__(self, name, scope="general", defaut_value=0.0): + super().__init__(name, scope=scope) + self.default_value = defaut_value + + def _value_function(self, batch_info): + if self.name not in batch_info: + return self.default_value + else: + return batch_info[self.name] + + class AveragingSupervisedMetric(BaseMetric): """ Base class for metrics that simply calculate the average over the epoch """ def __init__(self, name, scope="general"): diff --git a/vel/model/imagenet/resnet34.py b/vel/model/imagenet/resnet34.py index a4a78f86..18bdd667 100644 --- a/vel/model/imagenet/resnet34.py +++ b/vel/model/imagenet/resnet34.py @@ -5,7 +5,7 @@ import vel.module.layers as layers import vel.util.module_util as mu -from vel.api import LossFunctionModel, ModelFactory +from vel.api import LossFunctionModel, ModelFactory, OptimizerFactory, VelOptimizer # Because of concat pooling it's 2x 512 @@ -84,6 +84,10 @@ def get_layer_groups(self): g3 = list(self.model[self.group_cut_layers[1]:]) return [g1, g2, g3] + def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: + parameters = mu.to_parameter_groups(self.get_layer_groups()) + return optimizer_factory.instantiate_parameter_groups(parameters) + def forward(self, x): """ Calculate model value """ return self.model(x) diff --git a/vel/model/latent/cnn_iwae.py b/vel/model/latent/cnn_iwae.py index c4b79ded..a6ee1d7f 100644 --- a/vel/model/latent/cnn_iwae.py +++ b/vel/model/latent/cnn_iwae.py @@ -19,8 +19,8 @@ class CnnIWAE(IWAE): """ def __init__(self, img_rows, img_cols, img_channels, k=5, channels=None, representation_length=32, - analytical_kl_div=True, max_grad_norm=0.5): - super().__init__(k=k, analytical_kl_div=analytical_kl_div, max_grad_norm=max_grad_norm) + analytical_kl_div=True): + super().__init__(k=k, analytical_kl_div=analytical_kl_div) if channels is None: channels = [16, 32, 32] diff --git a/vel/model/latent/cnn_vae.py b/vel/model/latent/cnn_vae.py index 958877bf..491cdb70 100644 --- a/vel/model/latent/cnn_vae.py +++ b/vel/model/latent/cnn_vae.py @@ -19,8 +19,8 @@ class CnnVAE(VaeBase): """ def __init__(self, img_rows, img_cols, img_channels, channels=None, representation_length=32, - analytical_kl_div=True, max_grad_norm=0.5): - super().__init__(analytical_kl_div=analytical_kl_div, max_grad_norm=max_grad_norm) + analytical_kl_div=True): + super().__init__(analytical_kl_div=analytical_kl_div) if channels is None: channels = [16, 32, 32] diff --git a/vel/model/latent/fc_iwae.py b/vel/model/latent/fc_iwae.py index ab6d8602..7e7a44da 100644 --- a/vel/model/latent/fc_iwae.py +++ b/vel/model/latent/fc_iwae.py @@ -16,8 +16,8 @@ class FcIwae(IWAE): """ def __init__(self, img_rows, img_cols, img_channels, k=5, layers=None, representation_length=32, - analytical_kl_div=False, max_grad_norm=None): - super().__init__(k=k, analytical_kl_div=analytical_kl_div, max_grad_norm=max_grad_norm) + analytical_kl_div=False): + super().__init__(k=k, analytical_kl_div=analytical_kl_div) if layers is None: layers = [512, 256] @@ -91,8 +91,7 @@ def decoder_sample(self, decoded: torch.Tensor) -> torch.Tensor: # self._weight_initializer(m) -def create(img_rows, img_cols, img_channels, k=5, layers=None, representation_length=32, max_grad_norm=None, - analytical_kl_div=True): +def create(img_rows, img_cols, img_channels, k=5, layers=None, representation_length=32, analytical_kl_div=True): """ Vel factory function """ if layers is None: layers = [512, 256] @@ -100,7 +99,7 @@ def create(img_rows, img_cols, img_channels, k=5, layers=None, representation_le def instantiate(**_): return FcIwae( img_rows, img_cols, img_channels, k=k, layers=layers, representation_length=representation_length, - max_grad_norm=max_grad_norm, analytical_kl_div=analytical_kl_div + analytical_kl_div=analytical_kl_div ) return ModelFactory.generic(instantiate) diff --git a/vel/model/latent/fc_vae.py b/vel/model/latent/fc_vae.py index 50d7d99c..fbad9e29 100644 --- a/vel/model/latent/fc_vae.py +++ b/vel/model/latent/fc_vae.py @@ -16,8 +16,8 @@ class FcVae(VaeBase): """ def __init__(self, img_rows, img_cols, img_channels, layers=None, representation_length=32, - analytical_kl_div=False, max_grad_norm=None): - super().__init__(analytical_kl_div=analytical_kl_div, max_grad_norm=max_grad_norm) + analytical_kl_div=False): + super().__init__(analytical_kl_div=analytical_kl_div) if layers is None: layers = [512, 256] @@ -91,7 +91,7 @@ def decoder_sample(self, decoded: torch.Tensor) -> torch.Tensor: # self._weight_initializer(m) -def create(img_rows, img_cols, img_channels, layers=None, representation_length=32, max_grad_norm=None, +def create(img_rows, img_cols, img_channels, layers=None, representation_length=32, analytical_kl_div=True): """ Vel factory function """ if layers is None: @@ -100,7 +100,7 @@ def create(img_rows, img_cols, img_channels, layers=None, representation_length= def instantiate(**_): return FcVae( img_rows, img_cols, img_channels, layers=layers, representation_length=representation_length, - max_grad_norm=max_grad_norm, analytical_kl_div=analytical_kl_div + analytical_kl_div=analytical_kl_div ) return ModelFactory.generic(instantiate) diff --git a/vel/model/latent/iwae.py b/vel/model/latent/iwae.py index 46359c41..faa24a68 100644 --- a/vel/model/latent/iwae.py +++ b/vel/model/latent/iwae.py @@ -8,8 +8,8 @@ class IWAE(VaeBase): Importance-Weighted Auto-Encoder https://arxiv.org/abs/1509.00519 """ - def __init__(self, k: int = 5, analytical_kl_div=True, max_grad_norm=1.0): - super().__init__(analytical_kl_div=analytical_kl_div, max_grad_norm=max_grad_norm) + def __init__(self, k: int = 5, analytical_kl_div=True): + super().__init__(analytical_kl_div=analytical_kl_div) self.k = k @@ -53,21 +53,9 @@ def calculate_gradient(self, data: dict) -> dict: if self.training: loss.backward() - if self.max_grad_norm is not None: - grad_norm = torch.nn.utils.clip_grad_norm_( - filter(lambda p: p.requires_grad, self.parameters()), - max_norm=self.max_grad_norm - ) - else: - grad_norm = 0.0 - else: - grad_norm = 0.0 - with torch.no_grad(): return { 'loss': loss.item(), - - 'grad_norm': grad_norm, 'reconstruction': -reconstruction.mean().item(), 'kl_divergence': kl_divergence.mean().item() } diff --git a/vel/model/latent/vae_base.py b/vel/model/latent/vae_base.py index 03fa88f7..75562b42 100644 --- a/vel/model/latent/vae_base.py +++ b/vel/model/latent/vae_base.py @@ -9,11 +9,10 @@ class VaeBase(GradientModel): """ Base module for variational auto-encoder implementations """ - def __init__(self, analytical_kl_div=True, max_grad_norm=1.0): + def __init__(self, analytical_kl_div=True): super().__init__() self.analytical_kl_div = analytical_kl_div - self.max_grad_norm = max_grad_norm #################################################################################################################### # Interface methods @@ -96,20 +95,8 @@ def calculate_gradient(self, data: dict) -> dict: if self.training: loss.backward() - if self.max_grad_norm is not None: - grad_norm = torch.nn.utils.clip_grad_norm_( - filter(lambda p: p.requires_grad, self.parameters()), - max_norm=self.max_grad_norm - ) - else: - grad_norm = 0.0 - else: - grad_norm = 0.0 - return { 'loss': loss.item(), - - 'grad_norm': grad_norm, 'reconstruction': -reconstruction.item(), 'kl_divergence': kl_divergence.item() } @@ -131,7 +118,6 @@ def metrics(self): Loss(), AveragingNamedMetric('reconstruction', scope="train"), AveragingNamedMetric('kl_divergence', scope="train"), - AveragingNamedMetric('grad_norm', scope="train") ] @torch.no_grad() diff --git a/vel/model/rnn/multilayer_rnn_sequence_classification.py b/vel/model/rnn/multilayer_rnn_sequence_classification.py index db5da9f0..4c724c76 100644 --- a/vel/model/rnn/multilayer_rnn_sequence_classification.py +++ b/vel/model/rnn/multilayer_rnn_sequence_classification.py @@ -4,7 +4,9 @@ import torch.nn.functional as F import torch.nn as nn -from vel.api import LossFunctionModel, ModelFactory, LinearBackboneModel +import vel.util.module_util as mu + +from vel.api import LossFunctionModel, ModelFactory, LinearBackboneModel, OptimizerFactory, VelOptimizer from vel.metric.accuracy import Accuracy from vel.metric.loss_metric import Loss from vel.module.rnn_layer import RnnLayer @@ -129,6 +131,11 @@ def get_layer_groups(self): self.output_layer ] + def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: + """ Create optimizer for the purpose of optimizing this model """ + parameters = mu.to_parameter_groups(self.get_layer_groups()) + return optimizer_factory.instantiate_parameter_groups(parameters) + @property def state_dim(self) -> int: """ Dimension of model state """ diff --git a/vel/module/input/embedding.py b/vel/module/input/embedding.py index 37d3387b..d055e257 100644 --- a/vel/module/input/embedding.py +++ b/vel/module/input/embedding.py @@ -1,13 +1,13 @@ import torch.nn as nn -from vel.api import LinearBackboneModel, SupervisedTextData, ModelFactory +from vel.api import LinearBackboneModel, ModelFactory, LanguageSource class EmbeddingInput(LinearBackboneModel): """ Learnable Embedding input layer """ def __init__(self, alphabet_size: int, output_dim: int, pretrained: bool = False, frozen: bool = False, - source: SupervisedTextData = None): + source: LanguageSource = None): super().__init__() self._output_dim = output_dim @@ -20,7 +20,7 @@ def __init__(self, alphabet_size: int, output_dim: int, pretrained: bool = False def reset_weights(self): if self._pretrained: - self.layer.weight.data.copy_(self._source.data_field.vocab.vectors) + self.layer.weight.data.copy_(self._source.fields['text'].vocab.vectors) if self._frozen: self.layer.weight.requires_grad = False @@ -35,7 +35,7 @@ def forward(self, input_data): def create(alphabet_size: int, output_dim: int, pretrained: bool = False, frozen: bool = False, - source: SupervisedTextData = None): + source: LanguageSource = None): """ Vel factory function """ def instantiate(**_): return EmbeddingInput(alphabet_size, output_dim, pretrained=pretrained, frozen=frozen, source=source) diff --git a/vel/notebook/loader.py b/vel/notebook/loader.py index ca0db31b..5efbd88c 100644 --- a/vel/notebook/loader.py +++ b/vel/notebook/loader.py @@ -1,21 +1,21 @@ from vel.api import ModelConfig -def load_config(config_path, run_number=0, device='cuda:0', continue_training=True): +def load_config(config_path, run_number=0, device='cuda:0', resume_training=True): """ Load a ModelConfig from filename """ return ModelConfig.from_file( ModelConfig.from_project_directory(config_path), run_number=run_number, device=device, - continue_training=continue_training + resume_training=resume_training ) -def script(model_name: str = 'script', run_number=0, device='cuda:0', continue_training=True): +def script(model_name: str = 'script', run_number=0, device='cuda:0', resume_training=True): """ Create an ad-hoc script model config """ return ModelConfig.script( model_name=model_name, run_number=run_number, device=device, - continue_training=continue_training + resume_training=resume_training ) diff --git a/vel/optimizer/adadelta.py b/vel/optimizer/adadelta.py index 7bdc5529..e5e01f0e 100644 --- a/vel/optimizer/adadelta.py +++ b/vel/optimizer/adadelta.py @@ -1,24 +1,50 @@ -import torch.optim +import typing -from vel.api import OptimizerFactory, Model +from torch.optim.adadelta import Adadelta + +import vel.util.module_util as mu + +from vel.api import OptimizerFactory, VelOptimizerProxy, VelOptimizer class AdadeltaFactory(OptimizerFactory): """ Adadelta optimizer factory """ - def __init__(self, lr=1.0, rho=0.9, eps=1e-6, weight_decay=0): + def __init__(self, lr: float = 1.0, rho: float = 0.9, eps: float = 1e-6, weight_decay: float = 0.0, + max_grad_norm: typing.Optional[float] = None): self.lr = lr self.rho = rho self.eps = eps self.weight_decay = weight_decay + self.max_grad_norm = max_grad_norm - def instantiate(self, model: Model) -> torch.optim.Adadelta: - return torch.optim.Adadelta( - filter(lambda p: p.requires_grad, model.parameters()), + def instantiate(self, parameters) -> VelOptimizer: + return VelOptimizerProxy(Adadelta( + parameters, lr=self.lr, rho=self.rho, eps=self.eps, weight_decay=self.weight_decay - ) + ), self.max_grad_norm) + + def instantiate_parameter_groups(self, out_parameters) -> VelOptimizer: + settings_dict = { + 'lr': self.lr, + 'rho': self.rho, + 'eps': self.eps, + 'weight_decay': self.weight_decay + } + + out_parameters = out_parameters.copy() + out_settings_dict = mu.optimizer_parameter_helper(out_parameters, settings_dict) + + return VelOptimizerProxy(Adadelta(out_parameters, **out_settings_dict), self.max_grad_norm) -def create(): +def create(lr: float = 1.0, rho: float = 0.9, eps: float = 1e-6, weight_decay: float = 0.0, + max_grad_norm: typing.Optional[float] = None): """ Vel factory function """ - return AdadeltaFactory() + return AdadeltaFactory( + lr=lr, + rho=rho, + eps=eps, + weight_decay=weight_decay, + max_grad_norm=max_grad_norm + ) diff --git a/vel/optimizer/adam.py b/vel/optimizer/adam.py index 5bda0c4f..46ad3f06 100644 --- a/vel/optimizer/adam.py +++ b/vel/optimizer/adam.py @@ -1,55 +1,44 @@ -import collections -import torch.optim +import typing + +from torch.optim.adam import Adam import vel.util.module_util as mu -from vel.api import OptimizerFactory, Model +from vel.api import OptimizerFactory, VelOptimizer, VelOptimizerProxy class AdamFactory(OptimizerFactory): """ Adam optimizer factory """ - def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=False, layer_groups=False): + def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=False, + max_grad_norm: typing.Optional[float] = None): self.lr = lr self.betas = betas self.eps = eps self.weight_decay = weight_decay self.amsgrad = amsgrad - self.layer_groups = layer_groups - - def instantiate(self, model: Model) -> torch.optim.Adam: - if self.layer_groups: - parameters = mu.to_parameter_groups(model.get_layer_groups()) - - if isinstance(self.lr, collections.Sequence): - for idx, lr in enumerate(self.lr): - parameters[idx]['lr'] = lr - - default_lr = self.lr[0] - else: - default_lr = float(self.lr) + self.max_grad_norm = max_grad_norm - if isinstance(self.weight_decay, collections.Sequence): - for idx, weight_decay in enumerate(self.weight_decay): - parameters[idx]['weight_decay'] = weight_decay + def instantiate(self, parameters) -> VelOptimizer: + return VelOptimizerProxy(Adam( + parameters, + lr=self.lr, betas=self.betas, eps=self.eps, weight_decay=self.weight_decay, amsgrad=self.amsgrad + ), self.max_grad_norm) - default_weight_decay = self.weight_decay[0] - else: - default_weight_decay = self.weight_decay + def instantiate_parameter_groups(self, out_parameters) -> VelOptimizer: + settings_dict = { + 'lr': self.lr, + 'eps': self.eps, + 'weight_decay': self.weight_decay, + 'amsgrad': self.amsgrad + } - return torch.optim.Adam( - parameters, - lr=default_lr, betas=self.betas, eps=self.eps, weight_decay=default_weight_decay, amsgrad=self.amsgrad - ) - else: - parameters = filter(lambda p: p.requires_grad, model.parameters()) + out_parameters = out_parameters.copy() + out_settings_dict = mu.optimizer_parameter_helper(out_parameters, settings_dict) - return torch.optim.Adam( - parameters, - lr=self.lr, betas=self.betas, eps=self.eps, weight_decay=self.weight_decay, amsgrad=self.amsgrad - ) + return VelOptimizerProxy(Adam(out_parameters, betas=self.betas, **out_settings_dict), self.max_grad_norm) -def create(lr, betas=(0.9, 0.999), weight_decay=0, epsilon=1e-8, layer_groups=False): +def create(lr, betas=(0.9, 0.999), weight_decay=0, epsilon=1e-8, max_grad_norm=None): """ Vel factory function """ - return AdamFactory(lr=lr, betas=betas, weight_decay=weight_decay, eps=epsilon, layer_groups=layer_groups) + return AdamFactory(lr=lr, betas=betas, weight_decay=weight_decay, eps=epsilon, max_grad_norm=max_grad_norm) diff --git a/vel/optimizer/radam.py b/vel/optimizer/radam.py index 607b11e9..7abc4959 100644 --- a/vel/optimizer/radam.py +++ b/vel/optimizer/radam.py @@ -1,16 +1,19 @@ """ RAdam implementation from: https://github.com/LiyuanLucasLiu/RAdam/blob/master/cifar_imagenet/utils/radam.py """ -import math import collections -import torch.optim +import math +import torch +import typing + +from torch.optim.optimizer import Optimizer import vel.util.module_util as mu -from vel.api import OptimizerFactory, Model +from vel.api import OptimizerFactory, VelOptimizer, VelOptimizerProxy -class RAdam(torch.optim.Optimizer): +class RAdam(Optimizer): def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0): defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) self.buffer = [[None, None, None] for ind in range(10)] @@ -86,48 +89,35 @@ def step(self, closure=None): class RAdamFactory(OptimizerFactory): - """ RAdam optimizer factory """ + """ Adam optimizer factory """ - def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, layer_groups=False): + def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, + max_grad_norm: typing.Optional[float] = None): self.lr = lr self.betas = betas self.eps = eps self.weight_decay = weight_decay - self.layer_groups = layer_groups - - def instantiate(self, model: Model) -> RAdam: - if self.layer_groups: - parameters = mu.to_parameter_groups(model.get_layer_groups()) - - if isinstance(self.lr, collections.Sequence): - for idx, lr in enumerate(self.lr): - parameters[idx]['lr'] = lr - - default_lr = self.lr[0] - else: - default_lr = float(self.lr) + self.max_grad_norm = max_grad_norm - if isinstance(self.weight_decay, collections.Sequence): - for idx, weight_decay in enumerate(self.weight_decay): - parameters[idx]['weight_decay'] = weight_decay + def instantiate(self, parameters) -> VelOptimizer: + return VelOptimizerProxy(RAdam( + parameters, + lr=self.lr, betas=self.betas, eps=self.eps, weight_decay=self.weight_decay + ), self.max_grad_norm) - default_weight_decay = self.weight_decay[0] - else: - default_weight_decay = self.weight_decay + def instantiate_parameter_groups(self, out_parameters) -> VelOptimizer: + settings_dict = { + 'lr': self.lr, + 'eps': self.eps, + 'weight_decay': self.weight_decay + } - return RAdam( - parameters, - lr=default_lr, betas=self.betas, eps=self.eps, weight_decay=default_weight_decay, - ) - else: - parameters = filter(lambda p: p.requires_grad, model.parameters()) + out_parameters = out_parameters.copy() + out_settings_dict = mu.optimizer_parameter_helper(out_parameters, settings_dict) - return RAdam( - parameters, - lr=self.lr, betas=self.betas, eps=self.eps, weight_decay=self.weight_decay, - ) + return VelOptimizerProxy(RAdam(out_parameters, betas=self.betas, **out_settings_dict), self.max_grad_norm) -def create(lr, betas=(0.9, 0.999), weight_decay=0, epsilon=1e-8, layer_groups=False): +def create(lr, betas=(0.9, 0.999), weight_decay=0, epsilon=1e-8, max_grad_norm=None): """ Vel factory function """ - return RAdamFactory(lr=lr, betas=betas, weight_decay=weight_decay, eps=epsilon, layer_groups=layer_groups) + return RAdamFactory(lr=lr, betas=betas, weight_decay=weight_decay, eps=epsilon, max_grad_norm=max_grad_norm) diff --git a/vel/optimizer/ranger.py b/vel/optimizer/ranger.py index 3ab7f896..9f688ead 100644 --- a/vel/optimizer/ranger.py +++ b/vel/optimizer/ranger.py @@ -1,31 +1,15 @@ -#Ranger deep learning optimizer - RAdam + Lookahead combined. -#https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer - -#Ranger has now been used to capture 12 records on the FastAI leaderboard. - -#This version = 9.3.19 - -#Credits: -#RAdam --> https://github.com/LiyuanLucasLiu/RAdam -#Lookahead --> rewritten by lessw2020, but big thanks to Github @LonePatient and @RWightman for ideas from their code. -#Lookahead paper --> MZhang,G Hinton https://arxiv.org/abs/1907.08610 - -#summary of changes: -#full code integration with all updates at param level instead of group, moves slow weights into state dict (from generic weights), -#supports group learning rates (thanks @SHolderbach), fixes sporadic load from saved model issues. -#changes 8/31/19 - fix references to *self*.N_sma_threshold; - #changed eps to 1e-5 as better default than 1e-8. +# Ranger deep learning optimizer - RAdam + Lookahead combined. +# https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer import math import torch -import collections +import typing from torch.optim.optimizer import Optimizer - import vel.util.module_util as mu -from vel.api import OptimizerFactory, Model +from vel.api import OptimizerFactory, VelOptimizer, VelOptimizerProxy class Ranger(Optimizer): @@ -170,49 +154,78 @@ def step(self, closure=None): return loss +# class RangerFactory(OptimizerFactory): +# """ RAdam optimizer factory """ +# +# def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, layer_groups=False): +# self.lr = lr +# self.betas = betas +# self.eps = eps +# self.weight_decay = weight_decay +# self.layer_groups = layer_groups +# +# def instantiate(self, model: Model) -> Ranger: +# if self.layer_groups: +# parameters = mu.to_parameter_groups(model.get_layer_groups()) +# +# if isinstance(self.lr, collections.Sequence): +# for idx, lr in enumerate(self.lr): +# parameters[idx]['lr'] = lr +# +# default_lr = self.lr[0] +# else: +# default_lr = float(self.lr) +# +# if isinstance(self.weight_decay, collections.Sequence): +# for idx, weight_decay in enumerate(self.weight_decay): +# parameters[idx]['weight_decay'] = weight_decay +# +# default_weight_decay = self.weight_decay[0] +# else: +# default_weight_decay = self.weight_decay +# +# return Ranger( +# parameters, +# lr=default_lr, betas=self.betas, eps=self.eps, weight_decay=default_weight_decay, +# ) +# else: +# parameters = filter(lambda p: p.requires_grad, model.parameters()) +# +# return Ranger( +# parameters, +# lr=self.lr, betas=self.betas, eps=self.eps, weight_decay=self.weight_decay, +# ) + class RangerFactory(OptimizerFactory): - """ RAdam optimizer factory """ + """ Adam optimizer factory """ - def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, layer_groups=False): + def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, + max_grad_norm: typing.Optional[float] = None): self.lr = lr self.betas = betas self.eps = eps self.weight_decay = weight_decay - self.layer_groups = layer_groups - - def instantiate(self, model: Model) -> Ranger: - if self.layer_groups: - parameters = mu.to_parameter_groups(model.get_layer_groups()) - - if isinstance(self.lr, collections.Sequence): - for idx, lr in enumerate(self.lr): - parameters[idx]['lr'] = lr - - default_lr = self.lr[0] - else: - default_lr = float(self.lr) + self.max_grad_norm = max_grad_norm - if isinstance(self.weight_decay, collections.Sequence): - for idx, weight_decay in enumerate(self.weight_decay): - parameters[idx]['weight_decay'] = weight_decay + def instantiate(self, parameters) -> VelOptimizer: + return VelOptimizerProxy(Ranger( + parameters, + lr=self.lr, betas=self.betas, eps=self.eps, weight_decay=self.weight_decay + ), self.max_grad_norm) - default_weight_decay = self.weight_decay[0] - else: - default_weight_decay = self.weight_decay + def instantiate_parameter_groups(self, out_parameters) -> VelOptimizer: + settings_dict = { + 'lr': self.lr, + 'eps': self.eps, + 'weight_decay': self.weight_decay + } - return Ranger( - parameters, - lr=default_lr, betas=self.betas, eps=self.eps, weight_decay=default_weight_decay, - ) - else: - parameters = filter(lambda p: p.requires_grad, model.parameters()) + out_parameters = out_parameters.copy() + out_settings_dict = mu.optimizer_parameter_helper(out_parameters, settings_dict) - return Ranger( - parameters, - lr=self.lr, betas=self.betas, eps=self.eps, weight_decay=self.weight_decay, - ) + return VelOptimizerProxy(Ranger(out_parameters, betas=self.betas, **out_settings_dict), self.max_grad_norm) -def create(lr, betas=(0.9, 0.999), weight_decay=0, epsilon=1e-8, layer_groups=False): +def create(lr, betas=(0.9, 0.999), weight_decay=0, epsilon=1e-8, max_grad_norm=None): """ Vel factory function """ - return RangerFactory(lr=lr, betas=betas, weight_decay=weight_decay, eps=epsilon, layer_groups=layer_groups) + return RangerFactory(lr=lr, betas=betas, weight_decay=weight_decay, eps=epsilon, max_grad_norm=max_grad_norm) diff --git a/vel/optimizer/rmsprop.py b/vel/optimizer/rmsprop.py index fd80f327..eacf02ac 100644 --- a/vel/optimizer/rmsprop.py +++ b/vel/optimizer/rmsprop.py @@ -1,27 +1,51 @@ -import torch.optim +import typing -from vel.api import OptimizerFactory, Model +from torch.optim.rmsprop import RMSprop + +import vel.util.module_util as mu + +from vel.api import OptimizerFactory, VelOptimizerProxy, VelOptimizer class RMSpropFactory(OptimizerFactory): """ RMSprop optimizer factory """ - def __init__(self, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0, momentum=0, centered=False): + def __init__(self, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0, momentum=0, centered=False, + max_grad_norm: typing.Optional[float] = None): self.lr = lr self.alpha = alpha self.eps = eps self.weight_decay = weight_decay self.momentum = momentum self.centered = centered + self.max_grad_norm = max_grad_norm - def instantiate(self, model: Model) -> torch.optim.RMSprop: - return torch.optim.RMSprop( - filter(lambda p: p.requires_grad, model.parameters()), + def instantiate(self, parameters) -> VelOptimizer: + return VelOptimizerProxy(RMSprop( + parameters, lr=self.lr, alpha=self.alpha, eps=self.eps, weight_decay=self.weight_decay, momentum=self.momentum, centered=self.centered - ) + ), self.max_grad_norm) + + def instantiate_parameter_groups(self, out_parameters) -> VelOptimizer: + settings_dict = { + 'lr': self.lr, + 'alpha': self.alpha, + 'eps': self.eps, + 'weight_decay': self.weight_decay, + 'momentum': self.momentum, + 'centered': self.centered + } + + out_parameters = out_parameters.copy() + out_settings_dict = mu.optimizer_parameter_helper(out_parameters, settings_dict) + + return VelOptimizerProxy(RMSprop(out_parameters, **out_settings_dict), self.max_grad_norm) -def create(lr, alpha, momentum=0, weight_decay=0, epsilon=1e-8): +def create(lr, alpha, momentum=0, weight_decay=0, epsilon=1e-8, max_grad_norm=None): """ Vel factory function """ - return RMSpropFactory(lr=lr, alpha=alpha, momentum=momentum, weight_decay=weight_decay, eps=float(epsilon)) + return RMSpropFactory( + lr=lr, alpha=alpha, momentum=momentum, weight_decay=weight_decay, eps=float(epsilon), + max_grad_norm=max_grad_norm + ) diff --git a/vel/optimizer/rmsprop_tf.py b/vel/optimizer/rmsprop_tf.py index 5b0197da..4e80a401 100644 --- a/vel/optimizer/rmsprop_tf.py +++ b/vel/optimizer/rmsprop_tf.py @@ -1,9 +1,11 @@ -import torch.optim +from torch.optim.optimizer import Optimizer -from vel.api import OptimizerFactory, Model +import vel.util.module_util as mu +from vel.api import OptimizerFactory, VelOptimizer, VelOptimizerProxy -class RMSpropTF(torch.optim.Optimizer): + +class RMSpropTF(Optimizer): """Implements RMSprop algorithm. A TensorFlow version with epsilon under the square root Proposed by G. Hinton in his @@ -113,22 +115,42 @@ def step(self, closure=None): class RMSpropTFFactory(OptimizerFactory): """ RMSprop optimizer factory - A Tensorflow version with epsilon under square root """ - def __init__(self, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0, momentum=0, centered=False): + def __init__(self, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0, momentum=0, centered=False, + max_grad_norm: typing.Optional[float] = None): self.lr = lr self.alpha = alpha self.eps = eps self.weight_decay = weight_decay self.momentum = momentum self.centered = centered + self.max_grad_norm = max_grad_norm - def instantiate(self, model: Model) -> RMSpropTF: - return RMSpropTF( - filter(lambda p: p.requires_grad, model.parameters()), + def instantiate(self, parameters) -> VelOptimizer: + return VelOptimizerProxy(RMSpropTF( + parameters, lr=self.lr, alpha=self.alpha, eps=self.eps, weight_decay=self.weight_decay, momentum=self.momentum, centered=self.centered - ) + ), self.max_grad_norm) + + def instantiate_parameter_groups(self, out_parameters) -> VelOptimizer: + settings_dict = { + 'lr': self.lr, + 'alpha': self.alpha, + 'eps': self.eps, + 'weight_decay': self.weight_decay, + 'momentum': self.momentum, + 'centered': self.centered + } + + out_parameters = out_parameters.copy() + out_settings_dict = mu.optimizer_parameter_helper(out_parameters, settings_dict) + + return VelOptimizerProxy(RMSpropTF(out_parameters, **out_settings_dict), self.max_grad_norm) -def create(lr, alpha, momentum=0, weight_decay=0, epsilon=1e-8): +def create(lr, alpha, momentum=0, weight_decay=0, epsilon=1e-8, max_grad_norm=None): """ Vel factory function """ - return RMSpropTFFactory(lr=lr, alpha=alpha, momentum=momentum, weight_decay=weight_decay, eps=float(epsilon)) + return RMSpropTFFactory( + lr=lr, alpha=alpha, momentum=momentum, weight_decay=weight_decay, eps=float(epsilon), + max_grad_norm=max_grad_norm + ) diff --git a/vel/optimizer/sgd.py b/vel/optimizer/sgd.py index 128ccd1e..383053d5 100644 --- a/vel/optimizer/sgd.py +++ b/vel/optimizer/sgd.py @@ -1,34 +1,52 @@ -import torch.optim +import typing + +from torch.optim.sgd import SGD import vel.util.module_util as mu -from vel.api import OptimizerFactory, Model +from vel.api import OptimizerFactory, VelOptimizer, VelOptimizerProxy class SgdFactory(OptimizerFactory): """ SGD optimizer factory """ - def __init__(self, lr, momentum=0, dampening=0, weight_decay=0, nesterov=False, layer_groups: bool = False): + def __init__(self, lr, momentum=0, dampening=0, weight_decay=0, nesterov=False, + max_grad_norm: typing.Optional[float] = None): self.lr = lr self.momentum = momentum self.dampening = dampening self.weight_decay = weight_decay self.nesterov = nesterov - self.layer_groups = layer_groups - - def instantiate(self, model: Model) -> torch.optim.SGD: - if self.layer_groups: - parameters = mu.to_parameter_groups(model.get_layer_groups()) - else: - parameters = filter(lambda p: p.requires_grad, model.parameters()) - - return torch.optim.SGD( - parameters, - lr=self.lr, momentum=self.momentum, dampening=self.dampening, weight_decay=self.weight_decay, - nesterov=self.nesterov + self.max_grad_norm = max_grad_norm + + def instantiate(self, parameters) -> VelOptimizer: + return VelOptimizerProxy( + SGD( + parameters, + lr=self.lr, momentum=self.momentum, dampening=self.dampening, weight_decay=self.weight_decay, + nesterov=self.nesterov + ), self.max_grad_norm ) + def instantiate_parameter_groups(self, parameters) -> VelOptimizer: + settings_dict = { + 'lr': self.lr, + 'momentum': self.momentum, + 'dampening': self.dampening, + 'weight_decay': self.weight_decay, + 'nesterov': self.nesterov + } + + parameters = parameters.copy() + out_settings_dict = mu.optimizer_parameter_helper(parameters, settings_dict) + + return VelOptimizerProxy(SGD(parameters, **out_settings_dict), self.max_grad_norm) + -def create(lr, weight_decay=0, momentum=0, layer_groups=False): +def create(lr, momentum=0, dampening=0, weight_decay=0, nesterov=False, + max_grad_norm: typing.Optional[float] = None): """ Vel factory function """ - return SgdFactory(lr=lr, weight_decay=weight_decay, momentum=momentum, layer_groups=layer_groups) + return SgdFactory( + lr=lr, momentum=momentum, dampening=dampening, + weight_decay=weight_decay, nesterov=nesterov, max_grad_norm=max_grad_norm + ) diff --git a/vel/rl/algo/distributional_dqn.py b/vel/rl/algo/distributional_dqn.py index 95ca440e..adbee949 100644 --- a/vel/rl/algo/distributional_dqn.py +++ b/vel/rl/algo/distributional_dqn.py @@ -10,7 +10,7 @@ class DistributionalDeepQLearning(OptimizerAlgoBase): """ Deep Q-Learning algorithm """ def __init__(self, model_factory: ModelFactory, discount_factor: float, double_dqn: bool, - target_update_frequency: int, max_grad_norm: float): + target_update_frequency: int): super().__init__(max_grad_norm) self.model_factory = model_factory diff --git a/vel/rl/api/algo_base.py b/vel/rl/api/algo_base.py index 305e2624..b25374b1 100644 --- a/vel/rl/api/algo_base.py +++ b/vel/rl/api/algo_base.py @@ -1,18 +1,3 @@ -import torch.nn.utils - - -def clip_gradients(batch_result, model, max_grad_norm): - """ Clip gradients to a given maximum length """ - if max_grad_norm is not None: - grad_norm = torch.nn.utils.clip_grad_norm_( - filter(lambda p: p.requires_grad, model.parameters()), - max_norm=max_grad_norm - ) - else: - grad_norm = 0.0 - - batch_result['grad_norm'] = grad_norm - class AlgoBase: """ Base class for algo reinforcement calculations """ @@ -37,9 +22,6 @@ def metrics(self) -> list: class OptimizerAlgoBase(AlgoBase): """ RL algo that does a simple optimizer update """ - def __init__(self, max_grad_norm): - self.max_grad_norm = max_grad_norm - def calculate_gradient(self, batch_info, device, model, rollout): """ Calculate loss of the supplied rollout """ raise NotImplementedError @@ -54,8 +36,6 @@ def optimize(self, batch_info, device, model, rollout): batch_result = self.calculate_gradient(batch_info=batch_info, device=device, model=model, rollout=rollout) - clip_gradients(batch_result, model, self.max_grad_norm) - batch_info.optimizer.step(closure=None) self.post_optimization_step(batch_info, device, model, rollout) diff --git a/vel/rl/command/rl_train_command.py b/vel/rl/command/rl_train_command.py index f0363cbb..539cf5d3 100644 --- a/vel/rl/command/rl_train_command.py +++ b/vel/rl/command/rl_train_command.py @@ -117,7 +117,7 @@ def gather_callbacks(self, optimizer) -> list: def resume_training(self, reinforcer, callbacks, metrics) -> TrainingInfo: """ Possibly resume training from a saved state from the storage """ - if self.model_config.continue_training: + if self.model_config.resume_training: start_epoch = self.storage.last_epoch_idx() else: start_epoch = 0 diff --git a/vel/scheduler/ladder.py b/vel/scheduler/ladder.py index 0699c266..780ed013 100644 --- a/vel/scheduler/ladder.py +++ b/vel/scheduler/ladder.py @@ -2,7 +2,7 @@ import numpy as np -from vel.api import Callback, SchedulerFactory +from vel.api import Callback, SchedulerFactory, EpochInfo class LadderScheduler(Callback): @@ -16,7 +16,7 @@ def lambda_fn(self, epoch_idx): idx = np.minimum(np.searchsorted(self.schedule_limits, epoch_idx), len(self.schedule_limits) - 1) return self.schedule_numbers[idx] - def on_epoch_begin(self, epoch_info): + def on_epoch_end(self, epoch_info: EpochInfo) -> None: self.scheduler.step(epoch=epoch_info.global_epoch_idx) diff --git a/vel/storage/streaming/stdout.py b/vel/storage/streaming/stdout.py index 7ef02893..7dc18f8e 100644 --- a/vel/storage/streaming/stdout.py +++ b/vel/storage/streaming/stdout.py @@ -29,12 +29,12 @@ def _print_metrics_line(metrics, dataset=None): dataset = 'Metrics:' metrics_list = [ - "{}/{} {:.06f}".format(k.scope, k.name, metrics[k]) + "{}/{} {:.04f}".format(k.scope, k.name, metrics[k]) for k in sorted([k for k in metrics.keys() if k.dataset is None]) ] else: metrics_list = [ - "{}/{} {:.06f}".format(k.scope, k.name, metrics[k]) + "{}/{} {:.04f}".format(k.scope, k.name, metrics[k]) for k in sorted([k for k in metrics.keys() if k.dataset == dataset]) ] diff --git a/vel/train/phase/cycle.py b/vel/train/phase/cycle.py index 1f3358c0..63fae205 100644 --- a/vel/train/phase/cycle.py +++ b/vel/train/phase/cycle.py @@ -3,7 +3,7 @@ import vel.util.interpolate as interp -from vel.api import BatchInfo, EpochInfo, TrainingInfo, Callback +from vel.api import BatchInfo, EpochInfo, TrainingInfo, Callback, OptimizedModel from vel.train import TrainPhase @@ -77,16 +77,7 @@ def on_batch_begin(self, batch_info: BatchInfo, dataset: typing.Optional[str] = else: lr = interp.interpolate_single(self.max_lr, self.min_lr, interpolation_number, how=self.interpolate) - self.set_lr(lr) - - def set_lr(self, lr): - """ Set a learning rate for the optimizer """ - if isinstance(lr, list): - for group_lr, param_group in zip(lr, self.optimizer.param_groups): - param_group['lr'] = group_lr - else: - for param_group in self.optimizer.param_groups: - param_group['lr'] = lr + self.optimizer.set_lr(lr) class CyclePhase(TrainPhase): @@ -123,10 +114,10 @@ def __init__(self, optimizer_factory, max_lr, min_lr, cycles, cycle_len=1, cycle def number_of_epochs(self) -> int: return self.epochs - def set_up_phase(self, training_info, model, loader): + def set_up_phase(self, training_info, model: OptimizedModel, loader): """ Prepare the phase for learning """ # To parameter groups handles properly filtering parameters that don't require gradient - self._optimizer_instance = self.optimizer_factory.instantiate(model) + self._optimizer_instance = model.create_optimizer(self.optimizer_factory) self._loader = loader self.special_callback = CycleCallback( diff --git a/vel/train/phase/generic.py b/vel/train/phase/generic.py index db790fed..00c86660 100644 --- a/vel/train/phase/generic.py +++ b/vel/train/phase/generic.py @@ -1,4 +1,4 @@ -from vel.api import TrainingInfo, EpochInfo +from vel.api import TrainingInfo, EpochInfo, OptimizedModel from vel.data import DatasetLoader from vel.train import TrainPhase @@ -18,9 +18,9 @@ def __init__(self, lr, epochs, optimizer_factory): def number_of_epochs(self) -> int: return self.epochs - def set_up_phase(self, training_info, model, loader: DatasetLoader): + def set_up_phase(self, training_info: TrainingInfo, model: OptimizedModel, loader: DatasetLoader): """ Prepare the phase for learning """ - self._optimizer_instance = self.optimizer_factory.instantiate(model) + self._optimizer_instance = model.create_optimizer(self.optimizer_factory) self._loader = loader def epoch_info(self, training_info: TrainingInfo, global_idx: int, local_idx: int) -> EpochInfo: diff --git a/vel/train/train_phase.py b/vel/train/train_phase.py index daeb5733..13733d12 100644 --- a/vel/train/train_phase.py +++ b/vel/train/train_phase.py @@ -1,6 +1,4 @@ -from torch.optim import Optimizer - -from vel.api import TrainingInfo, EpochInfo, Model +from vel.api import TrainingInfo, EpochInfo, Model, VelOptimizer from vel.data import DatasetLoader from .trainer import Trainer @@ -14,7 +12,7 @@ def number_of_epochs(self) -> int: """ How many epochs does this phase take """ raise NotImplementedError - def set_up_phase(self, training_info: TrainingInfo, model: Model, loader: DatasetLoader) -> Optimizer: + def set_up_phase(self, training_info: TrainingInfo, model: Model, loader: DatasetLoader) -> VelOptimizer: """ Prepare the phase for learning, returns phase optimizer """ pass diff --git a/vel/train/trainer.py b/vel/train/trainer.py index 9bddd3d5..6222067c 100644 --- a/vel/train/trainer.py +++ b/vel/train/trainer.py @@ -2,9 +2,8 @@ import torch import torch.nn import tqdm -import typing -from vel.api import GradientModel, TrainingInfo, EpochInfo, BatchInfo +from vel.api import OptimizedModel, TrainingInfo, EpochInfo, BatchInfo from vel.data import DatasetLoader from vel.util.tensor_util import to_device @@ -13,19 +12,14 @@ class Trainer: """ Manages training process of a single model """ - def __init__(self, device: torch.device, model: GradientModel, max_grad_norm: typing.Optional[float] = None): + def __init__(self, device: torch.device, model: OptimizedModel): self.device = device - self.model = model.to(device) - self.max_grad_norm = max_grad_norm + self.model: OptimizedModel = model.to(device) def metrics(self): """ Return metrics for given learner/model """ return self.model.metrics() - def summary(self): - """ Print summary for given learner/model """ - return self.model.summary() - def train(self): """ Set model in the training mode """ return self.model.train() @@ -49,7 +43,7 @@ def run_epoch(self, epoch_info: EpochInfo, loader: DatasetLoader): """ Run full epoch of learning """ epoch_info.on_epoch_begin() - lr = epoch_info.optimizer.param_groups[-1]['lr'] + lr = epoch_info.optimizer.get_lr() print("|-------- Epoch {:06} Lr={:.6f} ----------|".format(epoch_info.global_epoch_idx, lr)) self.train_epoch(epoch_info, loader) @@ -74,7 +68,9 @@ def train_epoch(self, epoch_info, loader: DatasetLoader, interactive=True): batch_info['datapoint'] = datapoint batch_info.on_batch_begin('train') - self.train_batch(batch_info, datapoint) + datapoint = to_device(datapoint, self.device) # Move a data batch into the right device + metrics = self.model.optimize(datapoint, batch_info.optimizer) + batch_info.update(metrics) batch_info.on_batch_end('train') iterator.set_postfix(loss=epoch_info.result_accumulator.intermediate_value('loss')) @@ -94,26 +90,9 @@ def validation_epoch(self, epoch_info, loader: DatasetLoader, interactive=True): batch_info['datapoint'] = datapoint batch_info.on_batch_begin('val') - self.feed_batch(batch_info, datapoint) - batch_info.on_batch_end('val') - - def feed_batch(self, batch_info, data): - """ Run single batch of data """ - data = to_device(data, self.device) # Move a data batch into the right device - - metrics = self.model.calculate_gradient(data) - batch_info.update(metrics) + datapoint = to_device(datapoint, self.device) # Move a data batch into the right device + metrics = self.model.validate(datapoint) + batch_info.update(metrics) - def train_batch(self, batch_info, data): - """ Train single batch of data """ - batch_info.optimizer.zero_grad() - self.feed_batch(batch_info, data) - - if self.max_grad_norm is not None: - batch_info['grad_norm'] = torch.nn.utils.clip_grad_norm_( - filter(lambda p: p.requires_grad, self.model.parameters()), - max_norm=self.max_grad_norm - ) - - batch_info.optimizer.step() + batch_info.on_batch_end('val') diff --git a/vel/util/dataloader.py b/vel/util/dataloader.py new file mode 100644 index 00000000..b6b03fe5 --- /dev/null +++ b/vel/util/dataloader.py @@ -0,0 +1,21 @@ +class IteratorDictWrapper: + """ Transform dataset loader into a dictionary """ + + def __init__(self, iterator, field_mapping): + self.iterator = iterator + self.field_mapping = field_mapping + + def __iter__(self): + return map(self.map_values, iter(self.iterator)) + + def __len__(self): + return len(self.iterator) + + def map_values(self, item): + """ Map iterator values into a dictionary """ + return { + name: getattr(item, argument) for name, argument in self.field_mapping.items() + } + + + diff --git a/vel/util/module_util.py b/vel/util/module_util.py index 1c501336..ae415425 100644 --- a/vel/util/module_util.py +++ b/vel/util/module_util.py @@ -81,3 +81,19 @@ def chain_params(p): def to_parameter_groups(layer_groups): """ Convert from list of layer groups into list of parameter settings for an optimizer """ return [{'params': chain_params(x)} for x in layer_groups] + + +def optimizer_parameter_helper(parameters, parameter_dict): + """ Helper function for creating layer group optimizer instances """ + out_dict = parameter_dict.copy() + + for parameter, value in parameter_dict.items(): + if isinstance(value, collections.Sequence): + for idx, this_value in enumerate(value): + parameters[idx][parameter] = this_value + + out_dict[parameter] = value[0] + + return out_dict + + From 7e70b3d75a9c0ce2b8f23aaaa0d910cd4a4dd9b7 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 26 Sep 2019 22:20:40 -0700 Subject: [PATCH 098/162] Bring back RL algos. --- examples-configs/rl/atari/atari_a2c.yaml | 31 +- examples-configs/rl/atari/atari_ppo.yaml | 43 +- examples-configs/rl/atari/atari_trpo.yaml | 50 +-- .../atari/{ => purgatory}/atari_a2c_lstm.yaml | 0 .../{ => purgatory}/atari_a2c_tf_rmsprop.yaml | 0 .../rl/atari/{ => purgatory}/atari_acer.yaml | 0 .../atari_acer_trust_region.yaml | 0 .../atari/{ => purgatory}/atari_ppo_gru.yaml | 0 .../rl/mujoco/a2c/reacher_a2c.yaml | 67 --- examples-configs/rl/mujoco/mujoco_a2c.yaml | 65 +++ examples-configs/rl/mujoco/mujoco_ppo.yaml | 77 ++++ examples-configs/rl/mujoco/mujoco_trpo.yaml | 74 ++++ .../rl/mujoco/ppo/half_cheetah_ppo.yaml | 77 ---- .../rl/mujoco/ppo/hopper_ppo.yaml | 77 ---- .../rl/mujoco/ppo/reacher_ppo.yaml | 77 ---- .../rl/mujoco/ppo/walker_ppo.yaml | 77 ---- .../rl/mujoco/trpo/half_cheetah_trpo.yaml | 81 ---- .../rl/mujoco/trpo/hopper_trpo.yaml | 82 ---- .../rl/mujoco/trpo/reacher_trpo.yaml | 81 ---- .../multilayer_rnn_sequence_classification.py | 2 +- .../rnn/multilayer_rnn_sequence_model.py | 2 +- vel/openai/baselines/common/atari_wrappers.py | 76 +++- vel/openai/baselines/common/retro_wrappers.py | 63 +-- .../baselines/common/running_mean_std.py | 24 +- .../baselines/common/vec_env/__init__.py | 45 +- .../baselines/common/vec_env/dummy_vec_env.py | 9 +- .../baselines/common/vec_env/shmem_vec_env.py | 38 +- .../common/vec_env/subproc_vec_env.py | 32 +- vel/openai/baselines/common/wrappers.py | 31 ++ vel/openai/baselines/logger.py | 131 +++--- vel/rl/algo/a2c.py | 142 ++++++ vel/rl/algo/policy_gradient/trpo.py | 274 ------------ vel/rl/algo/ppo.py | 193 ++++++++ vel/rl/algo/trpo.py | 413 ++++++++++++++++++ vel/rl/api/__init__.py | 8 +- vel/rl/api/algo_base.py | 47 -- vel/rl/api/evaluator.py | 154 ------- vel/rl/api/policy.py | 22 - vel/rl/api/reinforcer_base.py | 9 +- vel/rl/api/rl_model.py | 65 +++ vel/rl/command/rl_train_command.py | 53 +-- vel/rl/env_roller/step_env_roller.py | 4 +- .../trajectory_replay_env_roller.py | 4 +- .../transition_replay_env_roller.py | 4 +- .../policy/purgatory/deterministic_policy.py | 4 +- ...tic_policy.py => old_stochastic_policy.py} | 4 +- .../old_stochastic_rnn_policy.py} | 49 +-- .../purgatory/q_distributional_policy.py | 4 +- vel/rl/policy/purgatory/q_model.py | 4 +- vel/rl/policy/purgatory/q_noisy_model.py | 4 +- .../policy/purgatory/stochastic_rnn_policy.py | 49 ++- .../semipurgatory}/__init__.py | 0 .../semipurgatory/a2c_rnn.py} | 7 +- .../semipurgatory}/acer.py | 0 .../semipurgatory}/ddpg.py | 0 vel/rl/{algo => policy/semipurgatory}/dqn.py | 0 .../semipurgatory/ppo_rnn.py} | 0 vel/rl/policy/stochastic_policy.py | 53 +-- ...arate.py => stochastic_policy_separate.py} | 43 +- ...fered_mixed_policy_iteration_reinforcer.py | 26 +- ...uffered_off_policy_iteration_reinforcer.py | 24 +- .../on_policy_iteration_reinforcer.py | 67 ++- vel/rl/test/test_integration.py | 4 +- vel/rl/util/actor.py | 4 +- 64 files changed, 1522 insertions(+), 1628 deletions(-) rename examples-configs/rl/atari/{ => purgatory}/atari_a2c_lstm.yaml (100%) rename examples-configs/rl/atari/{ => purgatory}/atari_a2c_tf_rmsprop.yaml (100%) rename examples-configs/rl/atari/{ => purgatory}/atari_acer.yaml (100%) rename examples-configs/rl/atari/{ => purgatory}/atari_acer_trust_region.yaml (100%) rename examples-configs/rl/atari/{ => purgatory}/atari_ppo_gru.yaml (100%) delete mode 100644 examples-configs/rl/mujoco/a2c/reacher_a2c.yaml create mode 100644 examples-configs/rl/mujoco/mujoco_a2c.yaml create mode 100644 examples-configs/rl/mujoco/mujoco_ppo.yaml create mode 100644 examples-configs/rl/mujoco/mujoco_trpo.yaml delete mode 100644 examples-configs/rl/mujoco/ppo/half_cheetah_ppo.yaml delete mode 100644 examples-configs/rl/mujoco/ppo/hopper_ppo.yaml delete mode 100644 examples-configs/rl/mujoco/ppo/reacher_ppo.yaml delete mode 100644 examples-configs/rl/mujoco/ppo/walker_ppo.yaml delete mode 100644 examples-configs/rl/mujoco/trpo/half_cheetah_trpo.yaml delete mode 100644 examples-configs/rl/mujoco/trpo/hopper_trpo.yaml delete mode 100644 examples-configs/rl/mujoco/trpo/reacher_trpo.yaml create mode 100644 vel/openai/baselines/common/wrappers.py create mode 100644 vel/rl/algo/a2c.py delete mode 100644 vel/rl/algo/policy_gradient/trpo.py create mode 100644 vel/rl/algo/ppo.py create mode 100644 vel/rl/algo/trpo.py delete mode 100644 vel/rl/api/algo_base.py delete mode 100644 vel/rl/api/evaluator.py delete mode 100644 vel/rl/api/policy.py create mode 100644 vel/rl/api/rl_model.py rename vel/rl/policy/purgatory/{stochastic_policy.py => old_stochastic_policy.py} (97%) rename vel/rl/policy/{stochastic_rnn_policy.py => purgatory/old_stochastic_rnn_policy.py} (76%) rename vel/rl/{algo/policy_gradient => policy/semipurgatory}/__init__.py (100%) rename vel/rl/{algo/policy_gradient/a2c.py => policy/semipurgatory/a2c_rnn.py} (92%) rename vel/rl/{algo/policy_gradient => policy/semipurgatory}/acer.py (100%) rename vel/rl/{algo/policy_gradient => policy/semipurgatory}/ddpg.py (100%) rename vel/rl/{algo => policy/semipurgatory}/dqn.py (100%) rename vel/rl/{algo/policy_gradient/ppo.py => policy/semipurgatory/ppo_rnn.py} (100%) rename vel/rl/policy/{purgatory/stochastic_policy_model_separate.py => stochastic_policy_separate.py} (67%) diff --git a/examples-configs/rl/atari/atari_a2c.yaml b/examples-configs/rl/atari/atari_a2c.yaml index 1d15f2dd..cdacb76c 100644 --- a/examples-configs/rl/atari/atari_a2c.yaml +++ b/examples-configs/rl/atari/atari_a2c.yaml @@ -12,34 +12,34 @@ vec_env: model: - name: vel.rl.policy.stochastic_policy + name: vel.rl.algo.a2c - input_block: - name: vel.module.input.image_to_tensor + entropy_coefficient: 0.01 + value_coefficient: 0.5 + discount_factor: 0.99 - backbone: - name: vel.rl.backbone.nature_cnn + policy: + name: vel.rl.policy.stochastic_policy + input_block: + name: vel.module.input.image_to_tensor - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history + backbone: + name: vel.rl.backbone.nature_cnn + + input_width: 84 + input_height: 84 + input_channels: 4 # The same as frame_history reinforcer: name: vel.rl.reinforcer.on_policy_iteration_reinforcer - algo: - name: vel.rl.algo.policy_gradient.a2c - entropy_coefficient: 0.01 - value_coefficient: 0.5 - max_grad_norm: 0.5 - discount_factor: 0.99 - env_roller: name: vel.rl.env_roller.step_env_roller number_of_steps: 5 # How many environment steps go into a single batch parallel_envs: 16 # How many environments to run in parallel + batch_size: 256 # How many samples can go into the model once optimizer: @@ -47,6 +47,7 @@ optimizer: lr: 7.0e-4 alpha: 0.99 epsilon: 1.0e-3 + max_grad_norm: 0.5 commands: diff --git a/examples-configs/rl/atari/atari_ppo.yaml b/examples-configs/rl/atari/atari_ppo.yaml index 013f8ef0..c96c518c 100644 --- a/examples-configs/rl/atari/atari_ppo.yaml +++ b/examples-configs/rl/atari/atari_ppo.yaml @@ -12,36 +12,34 @@ vec_env: model: - name: vel.rl.policy.stochastic_policy + name: vel.rl.algo.ppo - input_block: - name: vel.module.input.image_to_tensor + cliprange: + name: vel.function.linear + initial_value: 0.1 + final_value: 0.0 - backbone: - name: vel.rl.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history + entropy_coefficient: 0.01 + value_coefficient: 0.5 + discount_factor: 0.99 # Discount factor for the rewards + gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter -reinforcer: - name: vel.rl.reinforcer.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.ppo + policy: + name: vel.rl.policy.stochastic_policy - entropy_coefficient: 0.01 - value_coefficient: 0.5 + input_block: + name: vel.module.input.image_to_tensor - discount_factor: 0.99 # Discount factor for the rewards - gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter + backbone: + name: vel.rl.backbone.nature_cnn + input_width: 84 + input_height: 84 + input_channels: 4 # The same as frame_history - max_grad_norm: 0.5 # Gradient clipping parameter - cliprange: - name: vel.function.linear - initial_value: 0.1 - final_value: 0.0 +reinforcer: + name: vel.rl.reinforcer.on_policy_iteration_reinforcer env_roller: name: vel.rl.env_roller.step_env_roller @@ -56,6 +54,7 @@ optimizer: name: vel.optimizer.adam lr: 2.5e-4 epsilon: 1.0e-5 + max_grad_norm: 0.5 # Gradient clipping parameter scheduler: diff --git a/examples-configs/rl/atari/atari_trpo.yaml b/examples-configs/rl/atari/atari_trpo.yaml index 6b363274..df2446ab 100644 --- a/examples-configs/rl/atari/atari_trpo.yaml +++ b/examples-configs/rl/atari/atari_trpo.yaml @@ -12,40 +12,37 @@ vec_env: model: - name: vel.rl.models.stochastic_policy_model_separate + name: vel.rl.algo.trpo + + max_kl: 0.001 + cg_iters: 10 + line_search_iters: 10 + improvement_acceptance_ratio: 0.1 + cg_damping: 0.001 + vf_iters: 3 + entropy_coefficient: 0.1 + discount_factor: 0.99 + + gae_lambda: 1.00 # Generalized Advantage Estimator Lambda parameter input_block: - name: vel.modules.input.image_to_tensor + name: vel.module.input.image_to_tensor policy_backbone: - name: vel.rl.models.backbone.nature_cnn_small + name: vel.rl.backbone.nature_cnn_small input_width: 84 input_height: 84 input_channels: 4 # The same as frame_history value_backbone: - name: vel.rl.models.backbone.nature_cnn_small + name: vel.rl.backbone.nature_cnn_small input_width: 84 input_height: 84 input_channels: 4 # The same as frame_history reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.trpo - max_kl: 0.001 - cg_iters: 10 - line_search_iters: 10 - improvement_acceptance_ratio: 0.1 - cg_damping: 0.001 - vf_iters: 3 - entropy_coef: 0.1 - discount_factor: 0.99 - -# max_grad_norm: 0.5 - gae_lambda: 1.00 # Generalized Advantage Estimator Lambda parameter + name: vel.rl.reinforcer.on_policy_iteration_reinforcer env_roller: name: vel.rl.env_roller.step_env_roller @@ -58,25 +55,20 @@ reinforcer: optimizer: -# name: vel.optimizers.rmsprop -# lr: 7.0e-4 -# alpha: 0.99 -## epsilon: 1.0e-5 -# epsilon: 1.0e-3 - - name: vel.optimizers.adam + name: vel.optimizer.adam lr: 1.0e-4 epsilon: 1.0e-3 +# max_grad_norm: 0.5 commands: train: - name: vel.rl.commands.rl_train_command + name: vel.rl.command.rl_train_command total_frames: 1.1e7 batches_per_epoch: 16 record: - name: vel.rl.commands.record_movie_command + name: vel.rl.command.record_movie_command takes: 10 videoname: 'atari_trpo_vid_{:04}.avi' frame_history: 4 @@ -84,7 +76,7 @@ commands: argmax_sampling: true evaluate: - name: vel.rl.commands.evaluate_env_command + name: vel.rl.command.evaluate_env_command takes: 100 frame_history: 4 sample_args: diff --git a/examples-configs/rl/atari/atari_a2c_lstm.yaml b/examples-configs/rl/atari/purgatory/atari_a2c_lstm.yaml similarity index 100% rename from examples-configs/rl/atari/atari_a2c_lstm.yaml rename to examples-configs/rl/atari/purgatory/atari_a2c_lstm.yaml diff --git a/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml b/examples-configs/rl/atari/purgatory/atari_a2c_tf_rmsprop.yaml similarity index 100% rename from examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml rename to examples-configs/rl/atari/purgatory/atari_a2c_tf_rmsprop.yaml diff --git a/examples-configs/rl/atari/atari_acer.yaml b/examples-configs/rl/atari/purgatory/atari_acer.yaml similarity index 100% rename from examples-configs/rl/atari/atari_acer.yaml rename to examples-configs/rl/atari/purgatory/atari_acer.yaml diff --git a/examples-configs/rl/atari/atari_acer_trust_region.yaml b/examples-configs/rl/atari/purgatory/atari_acer_trust_region.yaml similarity index 100% rename from examples-configs/rl/atari/atari_acer_trust_region.yaml rename to examples-configs/rl/atari/purgatory/atari_acer_trust_region.yaml diff --git a/examples-configs/rl/atari/atari_ppo_gru.yaml b/examples-configs/rl/atari/purgatory/atari_ppo_gru.yaml similarity index 100% rename from examples-configs/rl/atari/atari_ppo_gru.yaml rename to examples-configs/rl/atari/purgatory/atari_ppo_gru.yaml diff --git a/examples-configs/rl/mujoco/a2c/reacher_a2c.yaml b/examples-configs/rl/mujoco/a2c/reacher_a2c.yaml deleted file mode 100644 index f3a313ef..00000000 --- a/examples-configs/rl/mujoco/a2c/reacher_a2c.yaml +++ /dev/null @@ -1,67 +0,0 @@ -name: 'reacher_a2c' - - -env: - name: vel.rl.env.mujoco - game: 'Reacher-v2' - normalize_returns: true - - -vec_env: - name: vel.rl.vecenv.dummy - - -model: - name: vel.rl.models.stochastic_policy_model - - input_block: - name: vel.modules.input.normalize_observations - input_shape: 11 - - backbone: - name: vel.rl.models.backbone.mlp - input_length: 11 - hidden_layers: [64, 64] - activation: 'tanh' - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.a2c - - entropy_coefficient: 0.0 - value_coefficient: 0.5 - max_grad_norm: 0.5 - - gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter - discount_factor: 0.99 # Discount factor for the rewards - - env_roller: - name: vel.rl.env_roller.step_env_roller - - parallel_envs: 1 # How many environments to run in parallel - number_of_steps: 2048 # How many environment steps go into a single batch - batch_size: 2048 # How many samples can go into the model once - - -optimizer: - name: vel.optimizers.adam - lr: 3.0e-4 - epsilon: 1.0e-5 - - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.0e6 - batches_per_epoch: 1 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'reacher_vid_{:04}.avi' - sample_args: - argmax_sampling: true diff --git a/examples-configs/rl/mujoco/mujoco_a2c.yaml b/examples-configs/rl/mujoco/mujoco_a2c.yaml new file mode 100644 index 00000000..266f7353 --- /dev/null +++ b/examples-configs/rl/mujoco/mujoco_a2c.yaml @@ -0,0 +1,65 @@ +name: 'mujoco_a2c' + + +env: + name: vel.rl.env.mujoco + game: !param game = 'Reacher-v2' + normalize_returns: true + + +vec_env: + name: vel.rl.vecenv.dummy + + +model: + name: vel.rl.algo.a2c + + entropy_coefficient: 0.0 + value_coefficient: 0.5 + gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter + discount_factor: 0.99 # Discount factor for the rewards + + policy: + name: vel.rl.policy.stochastic_policy + + input_block: + name: vel.module.input.normalize_observations + input_shape: 11 + + backbone: + name: vel.rl.backbone.mlp + input_length: 11 + hidden_layers: [64, 64] + activation: 'tanh' + + +reinforcer: + name: vel.rl.reinforcer.on_policy_iteration_reinforcer + + env_roller: + name: vel.rl.env_roller.step_env_roller + + parallel_envs: 1 # How many environments to run in parallel + number_of_steps: 2048 # How many environment steps go into a single batch + batch_size: 2048 # How many samples can go into the model once + + +optimizer: + name: vel.optimizer.adam + lr: 3.0e-4 + epsilon: 1.0e-5 + max_grad_norm: 0.5 + + +commands: + train: + name: vel.rl.command.rl_train_command + total_frames: 1.0e6 + batches_per_epoch: 1 + + record: + name: vel.rl.command.record_movie_command + takes: 10 + videoname: 'reacher_vid_{:04}.avi' + sample_args: + argmax_sampling: true diff --git a/examples-configs/rl/mujoco/mujoco_ppo.yaml b/examples-configs/rl/mujoco/mujoco_ppo.yaml new file mode 100644 index 00000000..a1cc2113 --- /dev/null +++ b/examples-configs/rl/mujoco/mujoco_ppo.yaml @@ -0,0 +1,77 @@ +name: 'mujoco_ppo' + + +env: + name: vel.rl.env.mujoco + game: !param game = 'Reacher-v2' + normalize_returns: true + + +vec_env: + name: vel.rl.vecenv.dummy + + +model: + name: vel.rl.algo.ppo + + entropy_coefficient: 0.0 + value_coefficient: 0.5 + + cliprange: 0.2 + + discount_factor: 0.99 # Discount factor for the rewards + gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter + + policy: + name: vel.rl.policy.stochastic_policy_separate + + input_block: + name: vel.module.input.normalize_observations + input_shape: 11 + + policy_backbone: + name: vel.rl.backbone.mlp + input_length: 11 + hidden_layers: [64, 64] + activation: 'tanh' + + value_backbone: + name: vel.rl.backbone.mlp + input_length: 11 + hidden_layers: [64, 64] + activation: 'tanh' + + +reinforcer: + name: vel.rl.reinforcer.on_policy_iteration_reinforcer + + env_roller: + name: vel.rl.env_roller.step_env_roller + + parallel_envs: 1 # How many environments to run in parallel + batch_size: 64 # How many samples can go into the model in one batch + number_of_steps: 2048 # How many environment steps go into a single batch + experience_replay: 10 # How many times to replay the experience + + +optimizer: + name: vel.optimizer.adam + lr: 3.0e-4 + epsilon: 1.0e-5 + max_grad_norm: 0.5 # Gradient clipping parameter + + +scheduler: + name: vel.scheduler.linear_batch_scaler + + +commands: + train: + name: vel.rl.command.rl_train_command + total_frames: 1.0e6 + batches_per_epoch: 1 + + record: + name: vel.rl.command.record_movie_command + takes: 10 + videoname: 'half_cheetah_vid_{:04}.avi' diff --git a/examples-configs/rl/mujoco/mujoco_trpo.yaml b/examples-configs/rl/mujoco/mujoco_trpo.yaml new file mode 100644 index 00000000..47356e0d --- /dev/null +++ b/examples-configs/rl/mujoco/mujoco_trpo.yaml @@ -0,0 +1,74 @@ +name: 'mujoco_trpo' + +env: + name: vel.rl.env.mujoco + game: !param game = 'Reacher-v2' + normalize_returns: true + + +vec_env: + name: vel.rl.vecenv.dummy + + +model: + name: vel.rl.algo.trpo + + discount_factor: 0.99 # Discount factor for the rewards + gae_lambda: 0.98 # Generalized Advantage Estimator Lambda parameter + + max_kl: 0.01 + cg_iters: 10 + line_search_iters: 10 + improvement_acceptance_ratio: 0.1 + cg_damping: 0.1 + vf_iters: 5 + entropy_coefficient: 0.0 + + input_block: + name: vel.module.input.normalize_observations + input_shape: 11 + + policy_backbone: + name: vel.rl.backbone.mlp + input_length: 11 + hidden_layers: [32, 32] + activation: 'tanh' + + value_backbone: + name: vel.rl.backbone.mlp + input_length: 11 + hidden_layers: [32, 32] + activation: 'tanh' + + +reinforcer: + name: vel.rl.reinforcer.on_policy_iteration_reinforcer + + env_roller: + name: vel.rl.env_roller.step_env_roller + + parallel_envs: 1 # How many environments to run in parallel + number_of_steps: 1024 # How many environment steps go into a single batch + batch_size: 1024 # How many samples can go into the model once + + + +optimizer: + name: vel.optimizer.adam + lr: 1.0e-3 + epsilon: 1.0e-8 + # max_grad_norm: 0.5 + + +commands: + train: + name: vel.rl.command.rl_train_command + total_frames: 1.0e6 + batches_per_epoch: 4 + + record: + name: vel.rl.command.record_movie_command + takes: 10 + videoname: 'reacher_vid_{:04}.avi' + sample_args: + argmax_sampling: true diff --git a/examples-configs/rl/mujoco/ppo/half_cheetah_ppo.yaml b/examples-configs/rl/mujoco/ppo/half_cheetah_ppo.yaml deleted file mode 100644 index 202e0d4f..00000000 --- a/examples-configs/rl/mujoco/ppo/half_cheetah_ppo.yaml +++ /dev/null @@ -1,77 +0,0 @@ -name: 'half_cheetah_ppo' - - -env: - name: vel.rl.env.mujoco - game: 'HalfCheetah-v2' - normalize_returns: true - - -vec_env: - name: vel.rl.vecenv.dummy - - -model: - name: vel.rl.models.stochastic_policy_model_separate - - input_block: - name: vel.modules.input.normalize_observations - input_shape: 17 - - policy_backbone: - name: vel.rl.models.backbone.mlp - input_length: 17 - hidden_layers: [64, 64] - activation: 'tanh' - - value_backbone: - name: vel.rl.models.backbone.mlp - input_length: 17 - hidden_layers: [64, 64] - activation: 'tanh' - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.ppo - - entropy_coefficient: 0.0 - value_coefficient: 0.5 - - cliprange: 0.2 - - max_grad_norm: 0.5 # Gradient clipping parameter - discount_factor: 0.99 # Discount factor for the rewards - gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter - - env_roller: - name: vel.rl.env_roller.step_env_roller - - parallel_envs: 1 # How many environments to run in parallel - batch_size: 64 # How many samples can go into the model in one batch - number_of_steps: 2048 # How many environment steps go into a single batch - experience_replay: 10 # How many times to replay the experience - - -optimizer: - name: vel.optimizers.adam - lr: 3.0e-4 - epsilon: 1.0e-5 - - -scheduler: - name: vel.scheduler.linear_batch_scaler - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.0e6 - batches_per_epoch: 1 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'half_cheetah_vid_{:04}.avi' diff --git a/examples-configs/rl/mujoco/ppo/hopper_ppo.yaml b/examples-configs/rl/mujoco/ppo/hopper_ppo.yaml deleted file mode 100644 index 2001d0c9..00000000 --- a/examples-configs/rl/mujoco/ppo/hopper_ppo.yaml +++ /dev/null @@ -1,77 +0,0 @@ -name: 'hopper_ppo' - - -env: - name: vel.rl.env.mujoco - game: 'Hopper-v2' - normalize_returns: true - - -vec_env: - name: vel.rl.vecenv.dummy - - -model: - name: vel.rl.models.stochastic_policy_model_separate - - input_block: - name: vel.modules.input.normalize_observations - input_shape: 11 - - policy_backbone: - name: vel.rl.models.backbone.mlp - input_length: 11 - hidden_layers: [64, 64] - activation: 'tanh' - - value_backbone: - name: vel.rl.models.backbone.mlp - input_length: 11 - hidden_layers: [64, 64] - activation: 'tanh' - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.ppo - - entropy_coefficient: 0.0 - value_coefficient: 0.5 - - cliprange: 0.2 - - max_grad_norm: 0.5 # Gradient clipping parameter - discount_factor: 0.99 # Discount factor for the rewards - gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter - - env_roller: - name: vel.rl.env_roller.step_env_roller - - parallel_envs: 1 # How many environments to run in parallel - batch_size: 64 # How many samples can go into the model once - number_of_steps: 2048 # How many environment steps go into a single batch - experience_replay: 10 # How many times to replay the experience - - -optimizer: - name: vel.optimizers.adam - lr: 3.0e-4 - epsilon: 1.0e-5 - - -scheduler: - name: vel.scheduler.linear_batch_scaler - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.0e6 - batches_per_epoch: 1 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'hopper_vid_{:04}.avi' diff --git a/examples-configs/rl/mujoco/ppo/reacher_ppo.yaml b/examples-configs/rl/mujoco/ppo/reacher_ppo.yaml deleted file mode 100644 index f0da8742..00000000 --- a/examples-configs/rl/mujoco/ppo/reacher_ppo.yaml +++ /dev/null @@ -1,77 +0,0 @@ -name: 'reacher_ppo' - - -env: - name: vel.rl.env.mujoco - game: 'Reacher-v2' - normalize_returns: true - - -vec_env: - name: vel.rl.vecenv.dummy - - -model: - name: vel.rl.models.stochastic_policy_model_separate - - input_block: - name: vel.modules.input.normalize_observations - input_shape: 11 - - policy_backbone: - name: vel.rl.models.backbone.mlp - input_length: 11 - hidden_layers: [64, 64] - activation: 'tanh' - - value_backbone: - name: vel.rl.models.backbone.mlp - input_length: 11 - hidden_layers: [64, 64] - activation: 'tanh' - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.ppo - - entropy_coefficient: 0.0 - value_coefficient: 0.5 - - cliprange: 0.2 - - max_grad_norm: 0.5 # Gradient clipping parameter - discount_factor: 0.99 # Discount factor for the rewards - gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter - - env_roller: - name: vel.rl.env_roller.step_env_roller - - parallel_envs: 1 # How many environments to run in parallel - batch_size: 64 # How many samples can go into the model once - number_of_steps: 2048 # How many environment steps go into a single batch - experience_replay: 10 # How many times to replay the experience - - -optimizer: - name: vel.optimizers.adam - lr: 3.0e-4 - epsilon: 1.0e-5 - - -scheduler: - name: vel.scheduler.linear_batch_scaler - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.0e6 - batches_per_epoch: 1 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'reacher_vid_{:04}.avi' diff --git a/examples-configs/rl/mujoco/ppo/walker_ppo.yaml b/examples-configs/rl/mujoco/ppo/walker_ppo.yaml deleted file mode 100644 index a07777f0..00000000 --- a/examples-configs/rl/mujoco/ppo/walker_ppo.yaml +++ /dev/null @@ -1,77 +0,0 @@ -name: 'walker_ppo' - - -env: - name: vel.rl.env.mujoco - game: 'Walker2d-v2' - normalize_returns: true - - -vec_env: - name: vel.rl.vecenv.dummy - - -model: - name: vel.rl.models.stochastic_policy_model_separate - - input_block: - name: vel.modules.input.normalize_observations - input_shape: 17 - - policy_backbone: - name: vel.rl.models.backbone.mlp - input_length: 17 - hidden_layers: [64, 64] - activation: 'tanh' - - value_backbone: - name: vel.rl.models.backbone.mlp - input_length: 17 - hidden_layers: [64, 64] - activation: 'tanh' - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.ppo - - entropy_coefficient: 0.0 - value_coefficient: 0.5 - - cliprange: 0.2 - - max_grad_norm: 0.5 # Gradient clipping parameter - discount_factor: 0.99 # Discount factor for the rewards - gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter - - env_roller: - name: vel.rl.env_roller.step_env_roller - - parallel_envs: 1 # How many environments to run in parallel - batch_size: 64 # How many samples can go into the model once - number_of_steps: 2048 # How many environment steps go into a single batch - experience_replay: 10 # How many times to replay the experience - - -optimizer: - name: vel.optimizers.adam - lr: 3.0e-4 - epsilon: 1.0e-5 - - -scheduler: - name: vel.scheduler.linear_batch_scaler - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.0e6 - batches_per_epoch: 1 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'walker_vid_{:04}.avi' diff --git a/examples-configs/rl/mujoco/trpo/half_cheetah_trpo.yaml b/examples-configs/rl/mujoco/trpo/half_cheetah_trpo.yaml deleted file mode 100644 index 131bb6e3..00000000 --- a/examples-configs/rl/mujoco/trpo/half_cheetah_trpo.yaml +++ /dev/null @@ -1,81 +0,0 @@ -name: 'half_cheetah_trpo' - -env: - name: vel.rl.env.mujoco - game: 'HalfCheetah-v2' - normalize_returns: true - - -vec_env: - name: vel.rl.vecenv.dummy - - -model: - name: vel.rl.models.stochastic_policy_model_separate - - input_block: - name: vel.modules.input.normalize_observations - input_shape: 17 - - policy_backbone: - name: vel.rl.models.backbone.mlp - input_length: 17 - hidden_layers: [32, 32] - activation: 'tanh' - - value_backbone: - name: vel.rl.models.backbone.mlp - input_length: 17 - hidden_layers: [32, 32] - activation: 'tanh' - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.trpo - discount_factor: 0.99 # Discount factor for the rewards - gae_lambda: 0.98 # Generalized Advantage Estimator Lambda parameter - - max_kl: 0.01 - cg_iters: 10 - line_search_iters: 10 - improvement_acceptance_ratio: 0.1 - cg_damping: 0.1 - vf_iters: 5 - entropy_coef: 0.0 -# max_grad_norm: 0.5 - - env_roller: - name: vel.rl.env_roller.step_env_roller - - parallel_envs: 1 # How many environments to run in parallel - number_of_steps: 1024 # How many environment steps go into a single batch - batch_size: 1024 # How many samples can go into the model once -# experience_replay: 10 # How many times to replay the experience - - - -optimizer: - name: vel.optimizers.adam - lr: 1.0e-3 - epsilon: 1.0e-8 - - -#scheduler: -# name: vel.scheduler.linear_batch_scaler - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.0e6 - batches_per_epoch: 4 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'reacher_vid_{:04}.avi' - sample_args: - argmax_sampling: true diff --git a/examples-configs/rl/mujoco/trpo/hopper_trpo.yaml b/examples-configs/rl/mujoco/trpo/hopper_trpo.yaml deleted file mode 100644 index 41444d79..00000000 --- a/examples-configs/rl/mujoco/trpo/hopper_trpo.yaml +++ /dev/null @@ -1,82 +0,0 @@ -name: 'hopper_trpo' - -env: - name: vel.rl.env.mujoco - game: 'Hopper-v2' - normalize_returns: true - - -vec_env: - name: vel.rl.vecenv.dummy - - -model: - name: vel.rl.models.policy_gradient_model_separate - - input_block: - name: vel.modules.input.normalize_observations - input_shape: 17 - - policy_backbone: - name: vel.rl.models.backbone.mlp - input_length: 11 - hidden_layers: [32, 32] - activation: 'tanh' - - value_backbone: - name: vel.rl.models.backbone.mlp - input_length: 11 - hidden_layers: [32, 32] - activation: 'tanh' - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.trpo - max_kl: 0.01 - cg_iters: 10 - line_search_iters: 10 - improvement_acceptance_ratio: 0.1 - cg_damping: 0.1 - vf_iters: 5 - entropy_coef: 0.0 -# max_grad_norm: 0.5 - - env_roller: - name: vel.rl.env_roller.vec.step_env_roller - gae_lambda: 0.98 # Generalized Advantage Estimator Lambda parameter - - - parallel_envs: 1 # How many environments to run in parallel - batch_size: 1024 # How many samples can go into the model once - number_of_steps: 1024 # How many environment steps go into a single batch -# experience_replay: 10 # How many times to replay the experience - - discount_factor: 0.99 # Discount factor for the rewards - - -optimizer: - name: vel.optimizers.adam - lr: 0.001 - epsilon: 1.0e-8 - - -#scheduler: -# name: vel.scheduler.linear_batch_scaler - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.0e6 - batches_per_epoch: 2 - openai_logging: true - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'reacher_vid_{:04}.avi' - sample_args: - argmax_sampling: true diff --git a/examples-configs/rl/mujoco/trpo/reacher_trpo.yaml b/examples-configs/rl/mujoco/trpo/reacher_trpo.yaml deleted file mode 100644 index 65aa7bc0..00000000 --- a/examples-configs/rl/mujoco/trpo/reacher_trpo.yaml +++ /dev/null @@ -1,81 +0,0 @@ -name: 'reacher_trpo' - -env: - name: vel.rl.env.mujoco - game: 'Reacher-v2' - normalize_returns: true - - -vec_env: - name: vel.rl.vecenv.dummy - - -model: - name: vel.rl.models.policy_gradient_model_separate - - input_block: - name: vel.modules.input.normalize_observations - input_shape: 17 - - policy_backbone: - name: vel.rl.models.backbone.mlp - input_length: 11 - hidden_layers: [32, 32] - activation: 'tanh' - - value_backbone: - name: vel.rl.models.backbone.mlp - input_length: 11 - hidden_layers: [32, 32] - activation: 'tanh' - - -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.trpo - max_kl: 0.01 - cg_iters: 10 - line_search_iters: 10 - improvement_acceptance_ratio: 0.1 - cg_damping: 0.1 - vf_iters: 5 - entropy_coef: 0.0 -# max_grad_norm: 0.5 - - env_roller: - name: vel.rl.env_roller.vec.step_env_roller - gae_lambda: 0.98 # Generalized Advantage Estimator Lambda parameter - - parallel_envs: 1 # How many environments to run in parallel - batch_size: 1024 # How many samples can go into the model once - number_of_steps: 1024 # How many environment steps go into a single batch -# experience_replay: 10 # How many times to replay the experience - - discount_factor: 0.99 # Discount factor for the rewards - - -optimizer: - name: vel.optimizers.adam - lr: 0.001 - epsilon: 1.0e-8 - - -#scheduler: -# name: vel.scheduler.linear_batch_scaler - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.0e6 - batches_per_epoch: 2 - openai_logging: true - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'reacher_vid_{:04}.avi' - sample_args: - argmax_sampling: true diff --git a/vel/model/rnn/multilayer_rnn_sequence_classification.py b/vel/model/rnn/multilayer_rnn_sequence_classification.py index 4c724c76..d19f40f3 100644 --- a/vel/model/rnn/multilayer_rnn_sequence_classification.py +++ b/vel/model/rnn/multilayer_rnn_sequence_classification.py @@ -13,7 +13,7 @@ class MultilayerRnnSequenceClassification(LossFunctionModel): - """ Multilayer GRU network for sequence modeling (n:1) """ + """ Multilayer RNN network for sequence modeling (n:1) """ def __init__(self, input_block: LinearBackboneModel, rnn_type: str, output_dim: int, rnn_layers: typing.List[int], rnn_dropout: float = 0.0, bidirectional: bool = False, diff --git a/vel/model/rnn/multilayer_rnn_sequence_model.py b/vel/model/rnn/multilayer_rnn_sequence_model.py index 2e90c2d3..959741a5 100644 --- a/vel/model/rnn/multilayer_rnn_sequence_model.py +++ b/vel/model/rnn/multilayer_rnn_sequence_model.py @@ -9,7 +9,7 @@ class MultilayerRnnSequenceModel(LossFunctionModel): - """ Multilayer GRU network for sequence modeling (n:n) """ + """ Multilayer RNN network for sequence modeling (n:n) """ def __init__(self, input_block: LinearBackboneModel, rnn_type: str, hidden_layers: typing.List[int], output_dim: int, dropout: float = 0.0): diff --git a/vel/openai/baselines/common/atari_wrappers.py b/vel/openai/baselines/common/atari_wrappers.py index a5a7fa9a..3b3540b7 100644 --- a/vel/openai/baselines/common/atari_wrappers.py +++ b/vel/openai/baselines/common/atari_wrappers.py @@ -4,6 +4,8 @@ from gym import spaces import cv2 cv2.ocl.setUseOpenCL(False) +from .wrappers import TimeLimit + class NoopResetEnv(gym.Wrapper): def __init__(self, env, noop_max=30): @@ -174,27 +176,60 @@ def reward(self, reward): """Bin reward to {+1, 0, -1} by its sign.""" return np.sign(reward) + class WarpFrame(gym.ObservationWrapper): - def __init__(self, env, width=84, height=84, grayscale=True): - """Warp frames to 84x84 as done in the Nature paper and later work.""" - gym.ObservationWrapper.__init__(self, env) - self.width = width - self.height = height - self.grayscale = grayscale - if self.grayscale: - self.observation_space = spaces.Box(low=0, high=255, - shape=(self.height, self.width, 1), dtype=np.uint8) + def __init__(self, env, width=84, height=84, grayscale=True, dict_space_key=None): + """ + Warp frames to 84x84 as done in the Nature paper and later work. + + If the environment uses dictionary observations, `dict_space_key` can be specified which indicates which + observation should be warped. + """ + super().__init__(env) + self._width = width + self._height = height + self._grayscale = grayscale + self._key = dict_space_key + if self._grayscale: + num_colors = 1 + else: + num_colors = 3 + + new_space = gym.spaces.Box( + low=0, + high=255, + shape=(self._height, self._width, num_colors), + dtype=np.uint8, + ) + if self._key is None: + original_space = self.observation_space + self.observation_space = new_space else: - self.observation_space = spaces.Box(low=0, high=255, - shape=(self.height, self.width, 3), dtype=np.uint8) + original_space = self.observation_space.spaces[self._key] + self.observation_space.spaces[self._key] = new_space + assert original_space.dtype == np.uint8 and len(original_space.shape) == 3 - def observation(self, frame): - if self.grayscale: + def observation(self, obs): + if self._key is None: + frame = obs + else: + frame = obs[self._key] + + if self._grayscale: frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) - frame = cv2.resize(frame, (self.width, self.height), interpolation=cv2.INTER_AREA) - if self.grayscale: + frame = cv2.resize( + frame, (self._width, self._height), interpolation=cv2.INTER_AREA + ) + if self._grayscale: frame = np.expand_dims(frame, -1) - return frame + + if self._key is None: + obs = frame + else: + obs = obs.copy() + obs[self._key] = frame + return obs + class FrameStack(gym.Wrapper): def __init__(self, env, k): @@ -265,16 +300,15 @@ def __len__(self): return len(self._force()) def __getitem__(self, i): - return self._force()[i] + return self._force()[..., i] -def make_atari(env_id, timelimit=True): - # XXX(john): remove timelimit argument after gym is upgraded to allow double wrapping +def make_atari(env_id, max_episode_steps=None): env = gym.make(env_id) - if not timelimit: - env = env.env assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) + if max_episode_steps is not None: + env = TimeLimit(env, max_episode_steps=max_episode_steps) return env def wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False): diff --git a/vel/openai/baselines/common/retro_wrappers.py b/vel/openai/baselines/common/retro_wrappers.py index 6e8fe912..badbbdd6 100644 --- a/vel/openai/baselines/common/retro_wrappers.py +++ b/vel/openai/baselines/common/retro_wrappers.py @@ -1,28 +1,12 @@ - # flake8: noqa F403, F405 -from .atari_wrappers import * +from collections import deque +import cv2 +cv2.ocl.setUseOpenCL(False) +from .atari_wrappers import WarpFrame, ClipRewardEnv, FrameStack, ScaledFloatFrame +from .wrappers import TimeLimit import numpy as np import gym -class TimeLimit(gym.Wrapper): - def __init__(self, env, max_episode_steps=None): - super(TimeLimit, self).__init__(env) - self._max_episode_steps = max_episode_steps - self._elapsed_steps = 0 - - def step(self, ac): - observation, reward, done, info = self.env.step(ac) - self._elapsed_steps += 1 - if self._elapsed_steps >= self._max_episode_steps: - done = True - info['TimeLimit.truncated'] = True - return observation, reward, done, info - - def reset(self, **kwargs): - self._elapsed_steps = 0 - return self.env.reset(**kwargs) - - class StochasticFrameSkip(gym.Wrapper): def __init__(self, env, n, stickprob): gym.Wrapper.__init__(self, env) @@ -61,7 +45,6 @@ def step(self, ac): def seed(self, s): self.rng.seed(s) - class PartialFrameStack(gym.Wrapper): def __init__(self, env, k, channel=1): """ @@ -71,8 +54,8 @@ def __init__(self, env, k, channel=1): shp = env.observation_space.shape self.channel = channel self.observation_space = gym.spaces.Box(low=0, high=255, - shape=(shp[0], shp[1], shp[2] + k - 1), - dtype=env.observation_space.dtype) + shape=(shp[0], shp[1], shp[2] + k - 1), + dtype=env.observation_space.dtype) self.k = k self.frames = deque([], maxlen=k) shp = env.observation_space.shape @@ -92,8 +75,7 @@ def step(self, ac): def _get_ob(self): assert len(self.frames) == self.k return np.concatenate([frame if i==self.k-1 else frame[:,:,self.channel:self.channel+1] - for (i, frame) in enumerate(self.frames)], axis=2) - + for (i, frame) in enumerate(self.frames)], axis=2) class Downsample(gym.ObservationWrapper): def __init__(self, env, ratio): @@ -103,10 +85,8 @@ def __init__(self, env, ratio): gym.ObservationWrapper.__init__(self, env) (oldh, oldw, oldc) = env.observation_space.shape newshape = (oldh//ratio, oldw//ratio, oldc) - self.observation_space = spaces.Box( - low=0, high=255, - shape=newshape, dtype=np.uint8 - ) + self.observation_space = gym.spaces.Box(low=0, high=255, + shape=newshape, dtype=np.uint8) def observation(self, frame): height, width, _ = self.observation_space.shape @@ -115,7 +95,6 @@ def observation(self, frame): frame = frame[:,:,None] return frame - class Rgb2gray(gym.ObservationWrapper): def __init__(self, env): """ @@ -123,10 +102,8 @@ def __init__(self, env): """ gym.ObservationWrapper.__init__(self, env) (oldh, oldw, _oldc) = env.observation_space.shape - self.observation_space = spaces.Box( - low=0, high=255, - shape=(oldh, oldw, 1), dtype=np.uint8 - ) + self.observation_space = gym.spaces.Box(low=0, high=255, + shape=(oldh, oldw, 1), dtype=np.uint8) def observation(self, frame): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) @@ -148,7 +125,6 @@ def reset(self): self.epcount += 1 return self.env.reset() - class AppendTimeout(gym.Wrapper): def __init__(self, env): gym.Wrapper.__init__(self, env) @@ -165,7 +141,7 @@ def __init__(self, env): self.observation_space = gym.spaces.Dict({ 'original': self.original_os, 'value_estimation_timeout': self.timeout_space - }) + }) self.dict_mode = False self.ac_count = None while 1: @@ -191,7 +167,6 @@ def _process(self, ob): else: return { 'original': ob, 'value_estimation_timeout': fracmissing } - class StartDoingRandomActionsWrapper(gym.Wrapper): """ Warning: can eat info dicts, not good if you depend on them @@ -224,28 +199,28 @@ def step(self, a): self.some_random_steps() return self.last_obs, rew, done, info - -def make_retro(*, game, state, max_episode_steps, **kwargs): +def make_retro(*, game, state=None, max_episode_steps=4500, **kwargs): import retro + if state is None: + state = retro.State.DEFAULT env = retro.make(game, state, **kwargs) env = StochasticFrameSkip(env, n=4, stickprob=0.25) if max_episode_steps is not None: env = TimeLimit(env, max_episode_steps=max_episode_steps) return env - def wrap_deepmind_retro(env, scale=True, frame_stack=4): """ Configure environment for retro games, using config similar to DeepMind-style Atari in wrap_deepmind """ env = WarpFrame(env) env = ClipRewardEnv(env) - env = FrameStack(env, frame_stack) + if frame_stack > 1: + env = FrameStack(env, frame_stack) if scale: env = ScaledFloatFrame(env) return env - class SonicDiscretizer(gym.ActionWrapper): """ Wrap a gym-retro environment and make it use discrete @@ -267,7 +242,6 @@ def __init__(self, env): def action(self, a): # pylint: disable=W0221 return self._actions[a].copy() - class RewardScaler(gym.RewardWrapper): """ Bring rewards to a reasonable scale for PPO. @@ -281,7 +255,6 @@ def __init__(self, env, scale=0.01): def reward(self, reward): return reward * self.scale - class AllowBacktracking(gym.Wrapper): """ Use deltas in max(X) as the reward, rather than deltas diff --git a/vel/openai/baselines/common/running_mean_std.py b/vel/openai/baselines/common/running_mean_std.py index a8daae81..fb891589 100644 --- a/vel/openai/baselines/common/running_mean_std.py +++ b/vel/openai/baselines/common/running_mean_std.py @@ -15,18 +15,20 @@ def update(self, x): self.update_from_moments(batch_mean, batch_var, batch_count) def update_from_moments(self, batch_mean, batch_var, batch_count): - delta = batch_mean - self.mean - tot_count = self.count + batch_count + self.mean, self.var, self.count = update_mean_var_count_from_moments( + self.mean, self.var, self.count, batch_mean, batch_var, batch_count) - new_mean = self.mean + delta * batch_count / tot_count - m_a = self.var * self.count - m_b = batch_var * batch_count - M2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count) - new_var = M2 / (self.count + batch_count) - new_count = batch_count + self.count +def update_mean_var_count_from_moments(mean, var, count, batch_mean, batch_var, batch_count): + delta = batch_mean - mean + tot_count = count + batch_count - self.mean = new_mean - self.var = new_var - self.count = new_count + new_mean = mean + delta * batch_count / tot_count + m_a = var * count + m_b = batch_var * batch_count + M2 = m_a + m_b + np.square(delta) * count * batch_count / tot_count + new_var = M2 / tot_count + new_count = tot_count + + return new_mean, new_var, new_count diff --git a/vel/openai/baselines/common/vec_env/__init__.py b/vel/openai/baselines/common/vec_env/__init__.py index 0a124cb9..a0d5a348 100644 --- a/vel/openai/baselines/common/vec_env/__init__.py +++ b/vel/openai/baselines/common/vec_env/__init__.py @@ -1,4 +1,7 @@ +import contextlib +import os from abc import ABC, abstractmethod + from vel.openai.baselines.common.tile_images import tile_images @@ -135,7 +138,6 @@ def get_viewer(self): self.viewer = rendering.SimpleImageViewer() return self.viewer - class VecEnvWrapper(VecEnv): """ An environment wrapper that applies to an entire batch @@ -144,8 +146,7 @@ class VecEnvWrapper(VecEnv): def __init__(self, venv, observation_space=None, action_space=None): self.venv = venv - VecEnv.__init__(self, - num_envs=venv.num_envs, + super().__init__(num_envs=venv.num_envs, observation_space=observation_space or venv.observation_space, action_space=action_space or venv.action_space) @@ -169,6 +170,25 @@ def render(self, mode='human'): def get_images(self): return self.venv.get_images() + def __getattr__(self, name): + if name.startswith('_'): + raise AttributeError("attempted to get missing private attribute '{}'".format(name)) + return getattr(self.venv, name) + + +class VecEnvObservationWrapper(VecEnvWrapper): + @abstractmethod + def process(self, obs): + pass + + def reset(self): + obs = self.venv.reset() + return self.process(obs) + + def step_wait(self): + obs, rews, dones, infos = self.venv.step_wait() + return self.process(obs), rews, dones, infos + class CloudpickleWrapper(object): """ @@ -185,3 +205,22 @@ def __getstate__(self): def __setstate__(self, ob): import pickle self.x = pickle.loads(ob) + + +@contextlib.contextmanager +def clear_mpi_env_vars(): + """ + from mpi4py import MPI will call MPI_Init by default. If the child process has MPI environment variables, MPI will think that the child process is an MPI process just like the parent and do bad things such as hang. + This context manager is a hacky way to clear those environment variables temporarily such as when we are starting multiprocessing + Processes. + """ + removed_environment = {} + for k, v in list(os.environ.items()): + for prefix in ['OMPI_', 'PMI_']: + if k.startswith(prefix): + removed_environment[k] = v + del os.environ[k] + try: + yield + finally: + os.environ.update(removed_environment) diff --git a/vel/openai/baselines/common/vec_env/dummy_vec_env.py b/vel/openai/baselines/common/vec_env/dummy_vec_env.py index 4f5c106f..ea21f130 100644 --- a/vel/openai/baselines/common/vec_env/dummy_vec_env.py +++ b/vel/openai/baselines/common/vec_env/dummy_vec_env.py @@ -1,8 +1,8 @@ import numpy as np -from gym import spaces from . import VecEnv from .util import copy_obs_dict, dict_to_obs, obs_space_info + class DummyVecEnv(VecEnv): """ VecEnv that does runs multiple environments sequentially, that is, @@ -13,6 +13,7 @@ class DummyVecEnv(VecEnv): def __init__(self, env_fns): """ Arguments: + env_fns: iterable of callables functions that build environments """ self.envs = [fn() for fn in env_fns] @@ -26,7 +27,7 @@ def __init__(self, env_fns): self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] self.actions = None - self.specs = [e.spec for e in self.envs] + self.spec = self.envs[0].spec def step_async(self, actions): listify = True @@ -45,8 +46,8 @@ def step_async(self, actions): def step_wait(self): for e in range(self.num_envs): action = self.actions[e] - if isinstance(self.envs[e].action_space, spaces.Discrete): - action = int(action) + # if isinstance(self.envs[e].action_space, spaces.Discrete): + # action = int(action) obs, self.buf_rews[e], self.buf_dones[e], self.buf_infos[e] = self.envs[e].step(action) if self.buf_dones[e]: diff --git a/vel/openai/baselines/common/vec_env/shmem_vec_env.py b/vel/openai/baselines/common/vec_env/shmem_vec_env.py index 4d941043..fcee5ad5 100644 --- a/vel/openai/baselines/common/vec_env/shmem_vec_env.py +++ b/vel/openai/baselines/common/vec_env/shmem_vec_env.py @@ -2,12 +2,12 @@ An interface for asynchronous vectorized environments. """ -from multiprocessing import Pipe, Array, Process - +import multiprocessing as mp import numpy as np from . import VecEnv, CloudpickleWrapper import ctypes from vel.openai.baselines import logger +from . import clear_mpi_env_vars from .util import dict_to_obs, obs_space_info, obs_to_dict @@ -23,11 +23,12 @@ class ShmemVecEnv(VecEnv): Optimized version of SubprocVecEnv that uses shared variables to communicate observations. """ - def __init__(self, env_fns, spaces=None): + def __init__(self, env_fns, spaces=None, context='spawn'): """ If you don't specify observation_space, we'll have to create a dummy environment to get it. """ + ctx = mp.get_context(context) if spaces: observation_space, action_space = spaces else: @@ -40,22 +41,24 @@ def __init__(self, env_fns, spaces=None): VecEnv.__init__(self, len(env_fns), observation_space, action_space) self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info(observation_space) self.obs_bufs = [ - {k: Array(_NP_TO_CT[self.obs_dtypes[k].type], int(np.prod(self.obs_shapes[k]))) for k in self.obs_keys} + {k: ctx.Array(_NP_TO_CT[self.obs_dtypes[k].type], int(np.prod(self.obs_shapes[k]))) for k in + self.obs_keys} for _ in env_fns] self.parent_pipes = [] self.procs = [] - for env_fn, obs_buf in zip(env_fns, self.obs_bufs): - wrapped_fn = CloudpickleWrapper(env_fn) - parent_pipe, child_pipe = Pipe() - proc = Process(target=_subproc_worker, - args=(child_pipe, parent_pipe, wrapped_fn, obs_buf, self.obs_shapes, self.obs_dtypes, self.obs_keys)) - proc.daemon = True - self.procs.append(proc) - self.parent_pipes.append(parent_pipe) - proc.start() - child_pipe.close() + with clear_mpi_env_vars(): + for env_fn, obs_buf in zip(env_fns, self.obs_bufs): + wrapped_fn = CloudpickleWrapper(env_fn) + parent_pipe, child_pipe = ctx.Pipe() + proc = ctx.Process(target=_subproc_worker, + args=(child_pipe, parent_pipe, wrapped_fn, obs_buf, self.obs_shapes, + self.obs_dtypes, self.obs_keys)) + proc.daemon = True + self.procs.append(proc) + self.parent_pipes.append(parent_pipe) + proc.start() + child_pipe.close() self.waiting_step = False - self.specs = [f().spec for f in env_fns] self.viewer = None def reset(self): @@ -70,9 +73,11 @@ def step_async(self, actions): assert len(actions) == len(self.parent_pipes) for pipe, act in zip(self.parent_pipes, actions): pipe.send(('step', act)) + self.waiting_step = True def step_wait(self): outs = [pipe.recv() for pipe in self.parent_pipes] + self.waiting_step = False obs, rews, dones, infos = zip(*outs) return self._decode_obses(obs), np.array(rews), np.array(dones), infos @@ -95,18 +100,17 @@ def get_images(self, mode='human'): def _decode_obses(self, obs): result = {} for k in self.obs_keys: - bufs = [b[k] for b in self.obs_bufs] o = [np.frombuffer(b.get_obj(), dtype=self.obs_dtypes[k]).reshape(self.obs_shapes[k]) for b in bufs] result[k] = np.array(o) return dict_to_obs(result) - def _subproc_worker(pipe, parent_pipe, env_fn_wrapper, obs_bufs, obs_shapes, obs_dtypes, keys): """ Control a single environment instance using IPC and shared memory. """ + def _write_obs(maybe_dict_obs): flatdict = obs_to_dict(maybe_dict_obs) for k in keys: diff --git a/vel/openai/baselines/common/vec_env/subproc_vec_env.py b/vel/openai/baselines/common/vec_env/subproc_vec_env.py index fe46ff06..a5b72ace 100644 --- a/vel/openai/baselines/common/vec_env/subproc_vec_env.py +++ b/vel/openai/baselines/common/vec_env/subproc_vec_env.py @@ -1,6 +1,7 @@ +import multiprocessing as mp + import numpy as np -from multiprocessing import Process, Pipe -from . import VecEnv, CloudpickleWrapper +from . import VecEnv, CloudpickleWrapper, clear_mpi_env_vars def worker(remote, parent_remote, env_fn_wrapper): @@ -22,8 +23,8 @@ def worker(remote, parent_remote, env_fn_wrapper): elif cmd == 'close': remote.close() break - elif cmd == 'get_spaces': - remote.send((env.observation_space, env.action_space)) + elif cmd == 'get_spaces_spec': + remote.send((env.observation_space, env.action_space, env.spec)) else: raise NotImplementedError except KeyboardInterrupt: @@ -37,7 +38,7 @@ class SubprocVecEnv(VecEnv): VecEnv that runs multiple environments in parallel in subproceses and communicates with them via pipes. Recommended to use when num_envs > 1 and step() can be a bottleneck. """ - def __init__(self, env_fns, spaces=None): + def __init__(self, env_fns, spaces=None, context='spawn'): """ Arguments: @@ -46,19 +47,20 @@ def __init__(self, env_fns, spaces=None): self.waiting = False self.closed = False nenvs = len(env_fns) - self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) - self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) + ctx = mp.get_context(context) + self.remotes, self.work_remotes = zip(*[ctx.Pipe() for _ in range(nenvs)]) + self.ps = [ctx.Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)] for p in self.ps: p.daemon = True # if the main process crashes, we should not cause things to hang - p.start() + with clear_mpi_env_vars(): + p.start() for remote in self.work_remotes: remote.close() - self.remotes[0].send(('get_spaces', None)) - observation_space, action_space = self.remotes[0].recv() + self.remotes[0].send(('get_spaces_spec', None)) + observation_space, action_space, self.spec = self.remotes[0].recv() self.viewer = None - self.specs = [f().spec for f in env_fns] VecEnv.__init__(self, len(env_fns), observation_space, action_space) def step_async(self, actions): @@ -100,16 +102,16 @@ def get_images(self): def _assert_not_closed(self): assert not self.closed, "Trying to operate on a SubprocVecEnv after calling close()" + def __del__(self): + if not self.closed: + self.close() def _flatten_obs(obs): - assert isinstance(obs, list) or isinstance(obs, tuple) + assert isinstance(obs, (list, tuple)) assert len(obs) > 0 if isinstance(obs[0], dict): - import collections - assert isinstance(obs, collections.OrderedDict) keys = obs[0].keys() return {k: np.stack([o[k] for o in obs]) for k in keys} else: return np.stack(obs) - diff --git a/vel/openai/baselines/common/wrappers.py b/vel/openai/baselines/common/wrappers.py new file mode 100644 index 00000000..95919264 --- /dev/null +++ b/vel/openai/baselines/common/wrappers.py @@ -0,0 +1,31 @@ +import gym + + +class TimeLimit(gym.Wrapper): + def __init__(self, env, max_episode_steps=None): + super(TimeLimit, self).__init__(env) + self._max_episode_steps = max_episode_steps + self._elapsed_steps = 0 + + def step(self, ac): + observation, reward, done, info = self.env.step(ac) + self._elapsed_steps += 1 + if self._elapsed_steps >= self._max_episode_steps: + done = True + info['TimeLimit.truncated'] = True + return observation, reward, done, info + + def reset(self, **kwargs): + self._elapsed_steps = 0 + return self.env.reset(**kwargs) + + +class ClipActionsWrapper(gym.Wrapper): + def step(self, action): + import numpy as np + action = np.nan_to_num(action) + action = np.clip(action, self.action_space.low, self.action_space.high) + return self.env.step(action) + + def reset(self, **kwargs): + return self.env.reset(**kwargs) diff --git a/vel/openai/baselines/logger.py b/vel/openai/baselines/logger.py index 05eab9ac..e92776ca 100644 --- a/vel/openai/baselines/logger.py +++ b/vel/openai/baselines/logger.py @@ -7,6 +7,7 @@ import datetime import tempfile from collections import defaultdict +from contextlib import contextmanager DEBUG = 10 INFO = 20 @@ -37,8 +38,8 @@ def writekvs(self, kvs): # Create strings for printing key2str = {} for (key, val) in sorted(kvs.items()): - if isinstance(val, float): - valstr = '%-8.3g' % (val,) + if hasattr(val, '__float__'): + valstr = '%-8.3g' % val else: valstr = str(val) key2str[self._truncate(key)] = self._truncate(valstr) @@ -68,7 +69,8 @@ def writekvs(self, kvs): self.file.flush() def _truncate(self, s): - return s[:20] + '...' if len(s) > 23 else s + maxlen = 30 + return s[:maxlen-3] + '...' if len(s) > maxlen else s def writeseq(self, seq): seq = list(seq) @@ -90,7 +92,6 @@ def __init__(self, filename): def writekvs(self, kvs): for k, v in sorted(kvs.items()): if hasattr(v, 'dtype'): - v = v.tolist() kvs[k] = float(v) self.file.write(json.dumps(kvs) + '\n') self.file.flush() @@ -195,13 +196,13 @@ def logkv(key, val): Call this once for each diagnostic quantity, each iteration If called many times, last value will be used. """ - Logger.CURRENT.logkv(key, val) + get_current().logkv(key, val) def logkv_mean(key, val): """ The same as logkv(), but if called many times, values averaged. """ - Logger.CURRENT.logkv_mean(key, val) + get_current().logkv_mean(key, val) def logkvs(d): """ @@ -213,21 +214,18 @@ def logkvs(d): def dumpkvs(): """ Write all of the diagnostics from the current iteration - - level: int. (see logger.py docs) If the global logger level is higher than - the level argument here, don't print to stdout. """ - Logger.CURRENT.dumpkvs() + return get_current().dumpkvs() def getkvs(): - return Logger.CURRENT.name2val + return get_current().name2val def log(*args, level=INFO): """ Write the sequence of args, with no separators, to the console and output files (if you've configured an output file). """ - Logger.CURRENT.log(*args, level=level) + get_current().log(*args, level=level) def debug(*args): log(*args, level=DEBUG) @@ -246,30 +244,29 @@ def set_level(level): """ Set logging threshold on current logger. """ - Logger.CURRENT.set_level(level) + get_current().set_level(level) + +def set_comm(comm): + get_current().set_comm(comm) def get_dir(): """ Get directory that log files are being written to. will be None if there is no output directory (i.e., if you didn't call start) """ - return Logger.CURRENT.get_dir() + return get_current().get_dir() record_tabular = logkv dump_tabular = dumpkvs -class ProfileKV: - """ - Usage: - with logger.ProfileKV("interesting_scope"): - code - """ - def __init__(self, n): - self.n = "wait_" + n - def __enter__(self): - self.t1 = time.time() - def __exit__(self ,type, value, traceback): - Logger.CURRENT.name2val[self.n] += time.time() - self.t1 +@contextmanager +def profile_kv(scopename): + logkey = 'wait_' + scopename + tstart = time.time() + try: + yield + finally: + get_current().name2val[logkey] += time.time() - tstart def profile(n): """ @@ -279,7 +276,7 @@ def my_func(): code """ def decorator_with_name(func): def func_wrapper(*args, **kwargs): - with ProfileKV(n): + with profile_kv(n): return func(*args, **kwargs) return func_wrapper return decorator_with_name @@ -289,17 +286,25 @@ def func_wrapper(*args, **kwargs): # Backend # ================================================================ +def get_current(): + if Logger.CURRENT is None: + _configure_default_logger() + + return Logger.CURRENT + + class Logger(object): DEFAULT = None # A logger with no output files. (See right below class definition) # So that you can still log to the terminal without setting up any output files CURRENT = None # Current logger being used by the free functions above - def __init__(self, dir, output_formats): + def __init__(self, dir, output_formats, comm=None): self.name2val = defaultdict(float) # values this iteration self.name2cnt = defaultdict(int) self.level = INFO self.dir = dir self.output_formats = output_formats + self.comm = comm # Logging API, forwarded # ---------------------------------------- @@ -307,20 +312,19 @@ def logkv(self, key, val): self.name2val[key] = val def logkv_mean(self, key, val): - if val is None: - self.name2val[key] = None - return oldval, cnt = self.name2val[key], self.name2cnt[key] self.name2val[key] = oldval*cnt/(cnt+1) + val/(cnt+1) self.name2cnt[key] = cnt + 1 def dumpkvs(self): - if self.level == DISABLED: return + d = self.name2val + out = d.copy() # Return the dict for unit testing purposes for fmt in self.output_formats: if isinstance(fmt, KVWriter): - fmt.writekvs(self.name2val) + fmt.writekvs(d) self.name2val.clear() self.name2cnt.clear() + return out def log(self, *args, level=INFO): if self.level <= level: @@ -331,6 +335,9 @@ def log(self, *args, level=INFO): def set_level(self, level): self.level = level + def set_comm(self, comm): + self.comm = comm + def get_dir(self): return self.dir @@ -345,7 +352,19 @@ def _do_log(self, args): if isinstance(fmt, SeqWriter): fmt.writeseq(map(str, args)) -def configure(dir=None, format_strs=None): +def get_rank_without_mpi_import(): + # check environment variables here instead of importing mpi4py + # to avoid calling MPI_Init() when this module is imported + for varname in ['PMI_RANK', 'OMPI_COMM_WORLD_RANK']: + if varname in os.environ: + return int(os.environ[varname]) + return 0 + + +def configure(dir=None, format_strs=None, comm=None, log_suffix=''): + """ + If comm is provided, average all numerical stats across that comm + """ if dir is None: dir = os.getenv('OPENAI_LOGDIR') if dir is None: @@ -354,15 +373,9 @@ def configure(dir=None, format_strs=None): assert isinstance(dir, str) os.makedirs(dir, exist_ok=True) - log_suffix = '' - rank = 0 - # check environment variables here instead of importing mpi4py - # to avoid calling MPI_Init() when this module is imported - for varname in ['PMI_RANK', 'OMPI_COMM_WORLD_RANK']: - if varname in os.environ: - rank = int(os.environ[varname]) + rank = get_rank_without_mpi_import() if rank > 0: - log_suffix = "-rank%03i" % rank + log_suffix = log_suffix + "-rank%03i" % rank if format_strs is None: if rank == 0: @@ -372,15 +385,11 @@ def configure(dir=None, format_strs=None): format_strs = filter(None, format_strs) output_formats = [make_output_format(f, dir, log_suffix) for f in format_strs] - Logger.CURRENT = Logger(dir=dir, output_formats=output_formats) - # log('Logging to %s'%dir) + Logger.CURRENT = Logger(dir=dir, output_formats=output_formats, comm=comm) + log('Logging to %s'%dir) def _configure_default_logger(): - format_strs = None - # keep the old default of only writing to stdout - if 'OPENAI_LOG_FORMAT' not in os.environ: - format_strs = ['stdout'] - configure(format_strs=format_strs) + configure() Logger.DEFAULT = Logger.CURRENT def reset(): @@ -389,17 +398,15 @@ def reset(): Logger.CURRENT = Logger.DEFAULT log('Reset logger') -class scoped_configure(object): - def __init__(self, dir=None, format_strs=None): - self.dir = dir - self.format_strs = format_strs - self.prevlogger = None - def __enter__(self): - self.prevlogger = Logger.CURRENT - configure(dir=self.dir, format_strs=self.format_strs) - def __exit__(self, *args): +@contextmanager +def scoped_configure(dir=None, format_strs=None, comm=None): + prevlogger = Logger.CURRENT + configure(dir=dir, format_strs=format_strs, comm=comm) + try: + yield + finally: Logger.CURRENT.close() - Logger.CURRENT = self.prevlogger + Logger.CURRENT = prevlogger # ================================================================ @@ -423,7 +430,7 @@ def _demo(): logkv_mean("b", -44.4) logkv("a", 5.5) dumpkvs() - info("^^^ should see b = 33.3") + info("^^^ should see b = -33.3") logkv("b", -2.5) dumpkvs() @@ -456,7 +463,6 @@ def read_tb(path): import pandas import numpy as np from glob import glob - from collections import defaultdict import tensorflow as tf if osp.isdir(path): fnames = glob(osp.join(path, "events.*")) @@ -482,8 +488,5 @@ def read_tb(path): data[step-1, colidx] = value return pandas.DataFrame(data, columns=tags) -# configure the default logger on import -_configure_default_logger() - if __name__ == "__main__": _demo() diff --git a/vel/rl/algo/a2c.py b/vel/rl/algo/a2c.py new file mode 100644 index 00000000..69b7926d --- /dev/null +++ b/vel/rl/algo/a2c.py @@ -0,0 +1,142 @@ +import torch +import torch.nn.functional as F + +from vel.metric.base import AveragingNamedMetric +from vel.calc.function import explained_variance +from vel.api import BackboneModel, ModelFactory, BatchInfo + +from vel.rl.api import RlPolicy, Rollout, Trajectories +from vel.rl.discount_bootstrap import discount_bootstrap_gae + + +class A2C(RlPolicy): + """ Simplest policy gradient - calculate loss as an advantage of an actor versus value function """ + def __init__(self, policy: BackboneModel, entropy_coefficient, value_coefficient, discount_factor: float, + gae_lambda=1.0): + super().__init__(discount_factor) + + self.entropy_coefficient = entropy_coefficient + self.value_coefficient = value_coefficient + self.gae_lambda = gae_lambda + + self.policy = policy + + def reset_weights(self): + """ Initialize properly model weights """ + self.policy.reset_weights() + + def forward(self, observation): + """ Calculate model outputs """ + return self.policy(observation) + + def act(self, observation, state=None, deterministic=False): + """ Select actions based on model's output """ + action_pd_params, value_output = self(observation) + actions = self.policy.action_head.sample(action_pd_params, deterministic=deterministic) + + # log likelihood of selected action + logprobs = self.policy.action_head.logprob(actions, action_pd_params) + + return { + 'actions': actions, + 'values': value_output, + 'action:logprobs': logprobs + } + + def process_rollout(self, rollout: Rollout) -> Rollout: + """ Process rollout for optimization before any chunking/shuffling """ + assert isinstance(rollout, Trajectories), "A2C requires trajectory rollouts" + + advantages = discount_bootstrap_gae( + rewards_buffer=rollout.transition_tensors['rewards'], + dones_buffer=rollout.transition_tensors['dones'], + values_buffer=rollout.transition_tensors['values'], + final_values=rollout.rollout_tensors['final_values'], + discount_factor=self.discount_factor, + gae_lambda=self.gae_lambda, + number_of_steps=rollout.num_steps + ) + + returns = advantages + rollout.transition_tensors['values'] + + rollout.transition_tensors['advantages'] = advantages + rollout.transition_tensors['returns'] = returns + + return rollout + + def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: + """ Calculate loss of the supplied rollout """ + observations = rollout.batch_tensor('observations') + + actions = rollout.batch_tensor('actions') + advantages = rollout.batch_tensor('advantages') + returns = rollout.batch_tensor('returns') + rollout_values = rollout.batch_tensor('values') + + pd_params, model_values = self(observations) + + log_probs = self.policy.action_head.logprob(actions, pd_params) + entropy = self.policy.action_head.entropy(pd_params) + + # Actual calculations. Pretty trivial + policy_loss = -torch.mean(advantages * log_probs) + value_loss = 0.5 * F.mse_loss(model_values, returns) + policy_entropy = torch.mean(entropy) + + loss_value = ( + policy_loss - self.entropy_coefficient * policy_entropy + self.value_coefficient * value_loss + ) + + loss_value.backward() + + return { + 'policy_loss': policy_loss.item(), + 'value_loss': value_loss.item(), + 'policy_entropy': policy_entropy.item(), + 'advantage_norm': torch.norm(advantages).item(), + 'explained_variance': explained_variance(returns, rollout_values) + } + + def metrics(self) -> list: + """ List of metrics to track for this learning process """ + return [ + AveragingNamedMetric("value_loss", scope="model"), + AveragingNamedMetric("policy_entropy", scope="model"), + AveragingNamedMetric("policy_loss", scope="model"), + AveragingNamedMetric("advantage_norm", scope="model"), + AveragingNamedMetric("explained_variance", scope="model") + ] + + +class A2CFactory(ModelFactory): + """ Factory class for policy gradient models """ + def __init__(self, policy, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): + self.policy = policy + self.entropy_coefficient = entropy_coefficient + self.value_coefficient = value_coefficient + self.discount_factor = discount_factor + self.gae_lambda = gae_lambda + + def instantiate(self, **extra_args): + """ Instantiate the model """ + # action_space = extra_args.pop('action_space') + policy = self.policy.instantiate(**extra_args) + + return A2C( + policy=policy, + entropy_coefficient=self.entropy_coefficient, + value_coefficient=self.value_coefficient, + discount_factor=self.discount_factor, + gae_lambda=self.gae_lambda + ) + + +def create(policy: BackboneModel, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): + """ Vel factory function """ + return A2CFactory( + policy=policy, + entropy_coefficient=entropy_coefficient, + value_coefficient=value_coefficient, + discount_factor=discount_factor, + gae_lambda=gae_lambda + ) diff --git a/vel/rl/algo/policy_gradient/trpo.py b/vel/rl/algo/policy_gradient/trpo.py deleted file mode 100644 index f4fa4206..00000000 --- a/vel/rl/algo/policy_gradient/trpo.py +++ /dev/null @@ -1,274 +0,0 @@ -import numpy as np -import torch -import torch.autograd as autograd -import torch.nn.functional as F -import torch.nn.utils - -from vel.calc.function import explained_variance -from vel.metric.base import AveragingNamedMetric -from vel.rl.api import AlgoBase, Rollout, Trajectories -from vel.rl.discount_bootstrap import discount_bootstrap_gae - - -def p2v(params): - """ Parameters to vector - shorthand utility version """ - return torch.nn.utils.parameters_to_vector(params) - - -def v2p(vector, params): - """ Vector to parameters - shorthand utility version """ - return torch.nn.utils.vector_to_parameters(vector, params) - - -def conjugate_gradient_method(matrix_vector_operator, loss_gradient, nsteps, rdotr_tol=1e-10): - """ Conjugate gradient algorithm """ - x = torch.zeros_like(loss_gradient) - - r = loss_gradient.clone() - p = loss_gradient.clone() - - rdotr = torch.dot(r, r) - - for i in range(nsteps): - avp = matrix_vector_operator(p) - alpha = rdotr / torch.dot(p, avp) - - x += alpha * p - r -= alpha * avp - - new_rdotr = torch.dot(r, r) - betta = new_rdotr / rdotr - p = r + betta * p - rdotr = new_rdotr - - if rdotr < rdotr_tol: - break - - return x - - -class TrpoPolicyGradient(AlgoBase): - """ Trust Region Policy Optimization - https://arxiv.org/abs/1502.05477 """ - - def __init__(self, max_kl, cg_iters, line_search_iters, cg_damping, entropy_coef, vf_iters, - discount_factor, gae_lambda, improvement_acceptance_ratio, max_grad_norm): - self.mak_kl = max_kl - self.cg_iters = cg_iters - self.line_search_iters = line_search_iters - self.cg_damping = cg_damping - self.entropy_coef = entropy_coef - self.vf_iters = vf_iters - self.discount_factor = discount_factor - self.gae_lambda = gae_lambda - self.improvement_acceptance_ratio = improvement_acceptance_ratio - self.max_grad_norm = max_grad_norm - - def process_rollout(self, batch_info, rollout: Rollout): - """ Process rollout for ALGO before any chunking/shuffling """ - assert isinstance(rollout, Trajectories), "TRPO requires trajectory rollouts" - - advantages = discount_bootstrap_gae( - rewards_buffer=rollout.transition_tensors['rewards'], - dones_buffer=rollout.transition_tensors['dones'], - values_buffer=rollout.transition_tensors['values'], - final_values=rollout.rollout_tensors['final_values'], - discount_factor=self.discount_factor, - gae_lambda=self.gae_lambda, - number_of_steps=rollout.num_steps - ) - - returns = advantages + rollout.transition_tensors['values'] - - rollout.transition_tensors['advantages'] = advantages - rollout.transition_tensors['returns'] = returns - - return rollout - - def optimize(self, batch_info, device, model, rollout): - """ Single optimization step for a model """ - rollout = rollout.to_transitions() - - # This algorithm makes quote strong assumptions about how does the model look - # so it does not make that much sense to switch to the evaluator interface - # As it would be more of a problem than actual benefit - - observations = rollout.batch_tensor('observations') - returns = rollout.batch_tensor('returns') - - # Evaluate model on the observations - policy_params = model.policy(observations) - policy_entropy = torch.mean(model.entropy(policy_params)) - - policy_loss = self.calc_policy_loss(model, policy_params, policy_entropy, rollout) - policy_grad = p2v(autograd.grad(policy_loss, model.policy_parameters(), retain_graph=True)).detach() - - # Calculate gradient of KL divergence of model with fixed version of itself - # Value of kl_divergence will be 0, but what we need is the gradient, actually the 2nd derivarive - kl_divergence = torch.mean(model.kl_divergence(policy_params.detach(), policy_params)) - kl_divergence_gradient = p2v(torch.autograd.grad(kl_divergence, model.policy_parameters(), create_graph=True)) - - step_direction = conjugate_gradient_method( - matrix_vector_operator=lambda x: self.fisher_vector_product(x, kl_divergence_gradient, model), - # Because we want to decrease the loss, we want to go into the direction of -gradient - loss_gradient=-policy_grad, - nsteps=self.cg_iters - ) - - shs = 0.5 * step_direction @ self.fisher_vector_product(step_direction, kl_divergence_gradient, model) - lm = torch.sqrt(shs / self.mak_kl) - full_step = step_direction / lm - - # Because we want to decrease the loss, we want to go into the direction of -gradient - expected_improvement = (-policy_grad) @ full_step - original_parameter_vec = p2v(model.policy_parameters()).detach_() - - (policy_optimization_success, ratio, policy_loss_improvement, new_policy_loss, kl_divergence_step) = ( - self.line_search( - model, rollout, policy_loss, policy_params, original_parameter_vec, full_step, expected_improvement - ) - ) - - gradient_norms = [] - - for i in range(self.vf_iters): - batch_info.optimizer.zero_grad() - value_loss = self.value_loss(model, observations, returns) - - value_loss.backward() - - # Gradient clipping - if self.max_grad_norm is not None: - grad_norm = torch.nn.utils.clip_grad_norm_( - filter(lambda p: p.requires_grad, model.parameters()), - max_norm=self.max_grad_norm - ) - - gradient_norms.append(grad_norm) - - batch_info.optimizer.step(closure=None) - - if gradient_norms: - gradient_norm = np.mean(gradient_norms) - else: - gradient_norm = 0.0 - - # noinspection PyUnboundLocalVariable - return { - 'new_policy_loss': new_policy_loss.item(), - 'policy_entropy': policy_entropy.item(), - 'value_loss': value_loss.item(), - 'policy_optimization_success': float(policy_optimization_success), - 'policy_improvement_ratio': ratio.item(), - 'kl_divergence_step': kl_divergence_step.item(), - 'policy_loss_improvement': policy_loss_improvement.item(), - 'grad_norm': gradient_norm, - 'advantage_norm': torch.norm(rollout.batch_tensor('advantages')).item(), - 'explained_variance': explained_variance(returns, rollout.batch_tensor('values')) - } - - def line_search(self, model, rollout, original_policy_loss, original_policy_params, original_parameter_vec, - full_step, expected_improvement_full): - """ Find the right stepsize to make sure policy improves """ - current_parameter_vec = original_parameter_vec.clone() - - for idx in range(self.line_search_iters): - stepsize = 0.5 ** idx - - new_parameter_vec = current_parameter_vec + stepsize * full_step - - # Update model parameters - v2p(new_parameter_vec, model.policy_parameters()) - - # Calculate new loss - with torch.no_grad(): - policy_params = model.policy(rollout.batch_tensor('observations')) - policy_entropy = torch.mean(model.entropy(policy_params)) - kl_divergence = torch.mean(model.kl_divergence(original_policy_params, policy_params)) - - new_loss = self.calc_policy_loss(model, policy_params, policy_entropy, rollout) - - actual_improvement = original_policy_loss - new_loss - expected_improvement = expected_improvement_full * stepsize - - ratio = actual_improvement / expected_improvement - - if kl_divergence.item() > self.mak_kl * 1.5: - # KL divergence bound exceeded - continue - elif ratio < expected_improvement: - # Not enough loss improvement - continue - else: - # Optimization successful - return True, ratio, actual_improvement, new_loss, kl_divergence - - # Optimization failed, revert to initial parameters - v2p(original_parameter_vec, model.policy_parameters()) - return False, torch.tensor(0.0), torch.tensor(0.0), torch.tensor(0.0), torch.tensor(0.0) - - def fisher_vector_product(self, vector, kl_divergence_gradient, model): - """ Calculate product Hessian @ vector """ - assert not vector.requires_grad, "Vector must not propagate gradient" - dot_product = vector @ kl_divergence_gradient - - # at least one dimension spans across two contiguous subspaces - double_gradient = torch.autograd.grad(dot_product, model.policy_parameters(), retain_graph=True) - fvp = p2v(x.contiguous() for x in double_gradient) - - return fvp + vector * self.cg_damping - - def value_loss(self, model, observations, discounted_rewards): - """ Loss of value estimator """ - value_outputs = model.value(observations) - value_loss = 0.5 * F.mse_loss(value_outputs, discounted_rewards) - return value_loss - - def calc_policy_loss(self, model, policy_params, policy_entropy, rollout): - """ - Policy gradient loss - calculate from probability distribution - - Calculate surrogate loss - advantage * policy_probability / fixed_initial_policy_probability - - Because we operate with logarithm of -probability (neglogp) we do - - advantage * exp(fixed_neglogps - model_neglogps) - """ - actions = rollout.batch_tensor('actions') - advantages = rollout.batch_tensor('advantages') - fixed_logprobs = rollout.batch_tensor('action:logprobs') - - model_logprobs = model.logprob(actions, policy_params) - - # Normalize advantages - advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) - - # We put - in front because we want to maximize the surrogate objective - policy_loss = -advantages * torch.exp(model_logprobs - fixed_logprobs) - - return policy_loss.mean() - policy_entropy * self.entropy_coef - - def metrics(self) -> list: - """ List of metrics to track for this learning process """ - return [ - AveragingNamedMetric("new_policy_loss"), - AveragingNamedMetric("policy_entropy"), - AveragingNamedMetric("value_loss"), - AveragingNamedMetric("policy_optimization_success"), - AveragingNamedMetric("policy_improvement_ratio"), - AveragingNamedMetric("kl_divergence_step"), - AveragingNamedMetric("policy_loss_improvement"), - AveragingNamedMetric("grad_norm"), - AveragingNamedMetric("advantage_norm"), - AveragingNamedMetric("explained_variance") - ] - - -def create(max_kl, cg_iters, line_search_iters, cg_damping, entropy_coef, vf_iters, discount_factor, - gae_lambda=1.0, improvement_acceptance_ratio=0.1, max_grad_norm=0.5): - """ Vel factory function """ - return TrpoPolicyGradient( - max_kl, int(cg_iters), int(line_search_iters), cg_damping, entropy_coef, vf_iters, - discount_factor=discount_factor, - gae_lambda=gae_lambda, - improvement_acceptance_ratio=improvement_acceptance_ratio, - max_grad_norm=max_grad_norm - ) diff --git a/vel/rl/algo/ppo.py b/vel/rl/algo/ppo.py new file mode 100644 index 00000000..483b4830 --- /dev/null +++ b/vel/rl/algo/ppo.py @@ -0,0 +1,193 @@ +import torch + +import numbers + +from vel.api import BackboneModel, BatchInfo, ModelFactory +from vel.calc.function import explained_variance +from vel.function.constant import ConstantSchedule +from vel.metric.base import AveragingNamedMetric + +from vel.rl.api import RlPolicy, Rollout, Trajectories +from vel.rl.discount_bootstrap import discount_bootstrap_gae + + +class PPO(RlPolicy): + """ Proximal Policy Optimization - https://arxiv.org/abs/1707.06347 """ + def __init__(self, policy: BackboneModel, + entropy_coefficient, value_coefficient, cliprange, discount_factor: float, + normalize_advantage: bool = True, gae_lambda: float = 1.0): + super().__init__(discount_factor) + + self.entropy_coefficient = entropy_coefficient + self.value_coefficient = value_coefficient + self.normalize_advantage = normalize_advantage + self.gae_lambda = gae_lambda + + if isinstance(cliprange, numbers.Number): + self.cliprange = ConstantSchedule(cliprange) + else: + self.cliprange = cliprange + + self.policy = policy + + def reset_weights(self): + """ Initialize properly model weights """ + self.policy.reset_weights() + + def forward(self, observation): + """ Calculate model outputs """ + return self.policy.forward(observation) + + def act(self, observation, state=None, deterministic=False): + """ Select actions based on model's output """ + action_pd_params, value_output = self(observation) + actions = self.policy.action_head.sample(action_pd_params, deterministic=deterministic) + + # log likelihood of selected action + logprobs = self.policy.action_head.logprob(actions, action_pd_params) + + return { + 'actions': actions, + 'values': value_output, + 'action:logprobs': logprobs + } + + def process_rollout(self, rollout: Rollout): + """ Process rollout for optimization before any chunking/shuffling """ + assert isinstance(rollout, Trajectories), "PPO requires trajectory rollouts" + + advantages = discount_bootstrap_gae( + rewards_buffer=rollout.transition_tensors['rewards'], + dones_buffer=rollout.transition_tensors['dones'], + values_buffer=rollout.transition_tensors['values'], + final_values=rollout.rollout_tensors['final_values'], + discount_factor=self.discount_factor, + gae_lambda=self.gae_lambda, + number_of_steps=rollout.num_steps + ) + + returns = advantages + rollout.transition_tensors['values'] + + rollout.transition_tensors['advantages'] = advantages + rollout.transition_tensors['returns'] = returns + + return rollout + + def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: + """ Calculate loss of the supplied rollout """ + observations = rollout.batch_tensor('observations') + + # Part 0.0 - Rollout values + actions = rollout.batch_tensor('actions') + advantages = rollout.batch_tensor('advantages') + returns = rollout.batch_tensor('returns') + rollout_values = rollout.batch_tensor('values') + + rollout_action_logprobs = rollout.batch_tensor('action:logprobs') + + # PART 0.1 - Model evaluation + pd_params, model_values = self(observations) + + model_action_logprobs = self.policy.action_head.logprob(actions, pd_params) + entropy = self.policy.action_head.entropy(pd_params) + + # Select the cliprange + current_cliprange = self.cliprange.value(batch_info['progress']) + + # Normalize the advantages? + if self.normalize_advantage: + advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) + + # PART 1 - policy entropy + policy_entropy = torch.mean(entropy) + + # PART 2 - value function + value_output_clipped = rollout_values + torch.clamp( + model_values - rollout_values, -current_cliprange, current_cliprange + ) + value_loss_part1 = (model_values - returns).pow(2) + value_loss_part2 = (value_output_clipped - returns).pow(2) + value_loss = 0.5 * torch.mean(torch.max(value_loss_part1, value_loss_part2)) + + # PART 3 - policy gradient loss + ratio = torch.exp(model_action_logprobs - rollout_action_logprobs) + + pg_loss_part1 = -advantages * ratio + pg_loss_part2 = -advantages * torch.clamp(ratio, 1.0 - current_cliprange, 1.0 + current_cliprange) + policy_loss = torch.mean(torch.max(pg_loss_part1, pg_loss_part2)) + + loss_value = ( + policy_loss - self.entropy_coefficient * policy_entropy + self.value_coefficient * value_loss + ) + + loss_value.backward() + + with torch.no_grad(): + approx_kl_divergence = 0.5 * torch.mean((model_action_logprobs - rollout_action_logprobs).pow(2)) + clip_fraction = torch.mean((torch.abs(ratio - 1.0) > current_cliprange).to(dtype=torch.float)) + + return { + 'policy_loss': policy_loss.item(), + 'value_loss': value_loss.item(), + 'policy_entropy': policy_entropy.item(), + 'approx_kl_divergence': approx_kl_divergence.item(), + 'clip_fraction': clip_fraction.item(), + 'advantage_norm': torch.norm(advantages).item(), + 'explained_variance': explained_variance(returns, rollout_values) + } + + def metrics(self) -> list: + """ List of metrics to track for this learning process """ + return [ + AveragingNamedMetric("policy_loss", scope="model"), + AveragingNamedMetric("value_loss", scope="model"), + AveragingNamedMetric("policy_entropy", scope="model"), + AveragingNamedMetric("approx_kl_divergence", scope="model"), + AveragingNamedMetric("clip_fraction", scope="model"), + AveragingNamedMetric("advantage_norm", scope="model"), + AveragingNamedMetric("explained_variance", scope="model") + ] + + +class PPOFactory(ModelFactory): + """ Factory class for policy gradient models """ + def __init__(self, policy: BackboneModel, + entropy_coefficient, value_coefficient, cliprange, discount_factor: float, + normalize_advantage: bool = True, gae_lambda: float = 1.0): + self.policy = policy + self.entropy_coefficient = entropy_coefficient + self.value_coefficient = value_coefficient + self.cliprange = cliprange + self.discount_factor = discount_factor + self.normalize_advantage = normalize_advantage + self.gae_lambda = gae_lambda + + def instantiate(self, **extra_args): + """ Instantiate the model """ + policy = self.policy.instantiate(**extra_args) + + return PPO( + policy=policy, + entropy_coefficient=self.entropy_coefficient, + value_coefficient=self.value_coefficient, + cliprange=self.cliprange, + discount_factor=self.discount_factor, + normalize_advantage=self.normalize_advantage, + gae_lambda=self.gae_lambda, + ) + + +def create(policy: BackboneModel, + entropy_coefficient, value_coefficient, cliprange, discount_factor: float, + normalize_advantage: bool = True, gae_lambda: float = 1.0): + """ Vel factory function """ + return PPOFactory( + policy=policy, + entropy_coefficient=entropy_coefficient, + value_coefficient=value_coefficient, + cliprange=cliprange, + discount_factor=discount_factor, + normalize_advantage=normalize_advantage, + gae_lambda=gae_lambda + ) + diff --git a/vel/rl/algo/trpo.py b/vel/rl/algo/trpo.py new file mode 100644 index 00000000..6c92d9dc --- /dev/null +++ b/vel/rl/algo/trpo.py @@ -0,0 +1,413 @@ +import gym +import numpy as np +import itertools as it + +import torch +import torch.autograd as autograd +import torch.nn.functional as F +import torch.nn.utils +import typing + +from vel.api import BatchInfo, VelOptimizer, BackboneModel, LinearBackboneModel, OptimizerFactory, ModelFactory +from vel.calc.function import explained_variance +from vel.metric.base import AveragingNamedMetric +from vel.module.input.identity import IdentityFactory + +from vel.rl.api import Rollout, Trajectories, RlPolicy +from vel.rl.discount_bootstrap import discount_bootstrap_gae +from vel.rl.module.stochastic_action_head import StochasticActionHead +from vel.rl.module.value_head import ValueHead + + +def p2v(params): + """ Parameters to vector - shorthand utility version """ + return torch.nn.utils.parameters_to_vector(params) + + +def v2p(vector, params): + """ Vector to parameters - shorthand utility version """ + return torch.nn.utils.vector_to_parameters(vector, params) + + +def conjugate_gradient_method(matrix_vector_operator, loss_gradient, nsteps, rdotr_tol=1e-10): + """ Conjugate gradient algorithm """ + x = torch.zeros_like(loss_gradient) + + r = loss_gradient.clone() + p = loss_gradient.clone() + + rdotr = torch.dot(r, r) + + for i in range(nsteps): + avp = matrix_vector_operator(p) + alpha = rdotr / torch.dot(p, avp) + + x += alpha * p + r -= alpha * avp + + new_rdotr = torch.dot(r, r) + betta = new_rdotr / rdotr + p = r + betta * p + rdotr = new_rdotr + + if rdotr < rdotr_tol: + break + + return x + + +class TRPO(RlPolicy): + """ Trust Region Policy Optimization - https://arxiv.org/abs/1502.05477 """ + + def __init__(self, + input_block: BackboneModel, + policy_backbone: LinearBackboneModel, value_backbone: LinearBackboneModel, + action_space: gym.Space, + max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, + discount_factor, gae_lambda, improvement_acceptance_ratio): + super().__init__(discount_factor) + + self.input_block = input_block + self.policy_backbone = policy_backbone + self.value_backbone = value_backbone + + self.action_head = StochasticActionHead( + action_space=action_space, + input_dim=self.policy_backbone.output_dim + ) + + self.value_head = ValueHead(input_dim=self.value_backbone.output_dim) + + self.mak_kl = max_kl + self.cg_iters = cg_iters + self.line_search_iters = line_search_iters + self.cg_damping = cg_damping + self.entropy_coefficient = entropy_coefficient + self.vf_iters = vf_iters + self.gae_lambda = gae_lambda + self.improvement_acceptance_ratio = improvement_acceptance_ratio + + def reset_weights(self): + """ Initialize properly model weights """ + self.input_block.reset_weights() + + self.policy_backbone.reset_weights() + self.value_backbone.reset_weights() + + self.action_head.reset_weights() + self.value_head.reset_weights() + + def forward(self, observations): + """ Calculate model outputs """ + input_data = self.input_block(observations) + + policy_base_output = self.policy_backbone(input_data) + value_base_output = self.value_backbone(input_data) + + action_output = self.action_head(policy_base_output) + value_output = self.value_head(value_base_output) + + return action_output, value_output + + def value(self, observations, state=None): + """ Calculate only value head for given state """ + input_data = self.input_block(observations) + base_output = self.value_backbone(input_data) + value_output = self.value_head(base_output) + return value_output + + def policy(self, observations): + """ Calculate only action head for given state """ + input_data = self.input_block(observations) + policy_base_output = self.policy_backbone(input_data) + policy_params = self.action_head(policy_base_output) + return policy_params + + def act(self, observation, state=None, deterministic=False): + """ Select actions based on model's output """ + action_pd_params, value_output = self(observation) + actions = self.action_head.sample(action_pd_params, deterministic=deterministic) + + # log likelihood of selected action + logprobs = self.action_head.logprob(actions, action_pd_params) + + return { + 'actions': actions, + 'values': value_output, + 'action:logprobs': logprobs + } + + def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: + """ Create optimizer for the purpose of optimizing this model """ + parameters = filter(lambda p: p.requires_grad, self.value_parameters()) + return optimizer_factory.instantiate(parameters) + + def policy_parameters(self): + """ Parameters of policy """ + return it.chain( + self.input_block.parameters(), + self.policy_backbone.parameters(), + self.action_head.parameters() + ) + + def value_parameters(self): + """ Parameters of value function """ + return it.chain( + self.input_block.parameters(), + self.value_backbone.parameters(), + self.value_head.parameters() + ) + + def process_rollout(self, rollout: Rollout): + """ Process rollout for optimization before any chunking/shuffling """ + assert isinstance(rollout, Trajectories), "PPO requires trajectory rollouts" + + advantages = discount_bootstrap_gae( + rewards_buffer=rollout.transition_tensors['rewards'], + dones_buffer=rollout.transition_tensors['dones'], + values_buffer=rollout.transition_tensors['values'], + final_values=rollout.rollout_tensors['final_values'], + discount_factor=self.discount_factor, + gae_lambda=self.gae_lambda, + number_of_steps=rollout.num_steps + ) + + returns = advantages + rollout.transition_tensors['values'] + + rollout.transition_tensors['advantages'] = advantages + rollout.transition_tensors['returns'] = returns + + return rollout + + def optimize(self, batch_info: BatchInfo, rollout: Rollout) -> dict: + """ Single optimization step for a model """ + rollout = rollout.to_transitions() + + observations = rollout.batch_tensor('observations') + returns = rollout.batch_tensor('returns') + + # Evaluate model on the observations + action_pd_params = self.policy(observations) + policy_entropy = torch.mean(self.action_head.entropy(action_pd_params)) + + policy_loss = self.calc_policy_loss(action_pd_params, policy_entropy, rollout) + policy_grad = p2v(autograd.grad(policy_loss, self.policy_parameters(), retain_graph=True)).detach() + + # Calculate gradient of KL divergence of model with fixed version of itself + # Value of kl_divergence will be 0, but what we need is the gradient, actually the 2nd derivarive + kl_divergence = torch.mean(self.action_head.kl_divergence(action_pd_params.detach(), action_pd_params)) + kl_divergence_gradient = p2v(torch.autograd.grad(kl_divergence, self.policy_parameters(), create_graph=True)) + + step_direction = conjugate_gradient_method( + matrix_vector_operator=lambda x: self.fisher_vector_product(x, kl_divergence_gradient), + # Because we want to decrease the loss, we want to go into the direction of -gradient + loss_gradient=-policy_grad, + nsteps=self.cg_iters + ) + + shs = 0.5 * step_direction @ self.fisher_vector_product(step_direction, kl_divergence_gradient) + lm = torch.sqrt(shs / self.mak_kl) + full_step = step_direction / lm + + # Because we want to decrease the loss, we want to go into the direction of -gradient + expected_improvement = (-policy_grad) @ full_step + original_parameter_vec = p2v(self.policy_parameters()).detach_() + + (policy_optimization_success, ratio, policy_loss_improvement, new_policy_loss, kl_divergence_step) = ( + self.line_search( + rollout, policy_loss, action_pd_params, original_parameter_vec, full_step, expected_improvement + ) + ) + + gradient_norms = [] + + for i in range(self.vf_iters): + batch_info.optimizer.zero_grad() + value_loss = self.value_loss(observations, returns) + + value_loss.backward() + + batch_info.optimizer.step(closure=None) + + if gradient_norms: + gradient_norm = np.mean(gradient_norms) + else: + gradient_norm = 0.0 + + # noinspection PyUnboundLocalVariable + return { + 'new_policy_loss': new_policy_loss.item(), + 'policy_entropy': policy_entropy.item(), + 'value_loss': value_loss.item(), + 'policy_optimization_success': float(policy_optimization_success), + 'policy_improvement_ratio': ratio.item(), + 'kl_divergence_step': kl_divergence_step.item(), + 'policy_loss_improvement': policy_loss_improvement.item(), + 'grad_norm': gradient_norm, + 'advantage_norm': torch.norm(rollout.batch_tensor('advantages')).item(), + 'explained_variance': explained_variance(returns, rollout.batch_tensor('values')) + } + + def line_search(self, rollout, original_policy_loss, original_policy_params, original_parameter_vec, + full_step, expected_improvement_full): + """ Find the right stepsize to make sure policy improves """ + current_parameter_vec = original_parameter_vec.clone() + + for idx in range(self.line_search_iters): + stepsize = 0.5 ** idx + + new_parameter_vec = current_parameter_vec + stepsize * full_step + + # Update model parameters + v2p(new_parameter_vec, self.policy_parameters()) + + # Calculate new loss + with torch.no_grad(): + policy_params = self.policy(rollout.batch_tensor('observations')) + policy_entropy = torch.mean(self.action_head.entropy(policy_params)) + kl_divergence = torch.mean(self.action_head.kl_divergence(original_policy_params, policy_params)) + + new_loss = self.calc_policy_loss(policy_params, policy_entropy, rollout) + + actual_improvement = original_policy_loss - new_loss + expected_improvement = expected_improvement_full * stepsize + + ratio = actual_improvement / expected_improvement + + if kl_divergence.item() > self.mak_kl * 1.5: + # KL divergence bound exceeded + continue + elif ratio < expected_improvement: + # Not enough loss improvement + continue + else: + # Optimization successful + return True, ratio, actual_improvement, new_loss, kl_divergence + + # Optimization failed, revert to initial parameters + v2p(original_parameter_vec, self.policy_parameters()) + return False, torch.tensor(0.0), torch.tensor(0.0), torch.tensor(0.0), torch.tensor(0.0) + + def fisher_vector_product(self, vector, kl_divergence_gradient): + """ Calculate product Hessian @ vector """ + assert not vector.requires_grad, "Vector must not propagate gradient" + dot_product = vector @ kl_divergence_gradient + + # at least one dimension spans across two contiguous subspaces + double_gradient = torch.autograd.grad(dot_product, self.policy_parameters(), retain_graph=True) + fvp = p2v(x.contiguous() for x in double_gradient) + + return fvp + vector * self.cg_damping + + def value_loss(self, observations, returns): + """ Loss of value function head """ + value_outputs = self.value(observations) + value_loss = 0.5 * F.mse_loss(value_outputs, returns) + return value_loss + + def calc_policy_loss(self, policy_params, policy_entropy, rollout): + """ + Policy gradient loss - calculate from probability distribution + + Calculate surrogate loss - advantage * policy_probability / fixed_initial_policy_probability + + Because we operate with logarithm of -probability (neglogp) we do + - advantage * exp(fixed_neglogps - model_neglogps) + """ + actions = rollout.batch_tensor('actions') + advantages = rollout.batch_tensor('advantages') + fixed_logprobs = rollout.batch_tensor('action:logprobs') + + model_logprobs = self.action_head.logprob(actions, policy_params) + + # Normalize advantages + advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) + + # We put - in front because we want to maximize the surrogate objective + policy_loss = -advantages * torch.exp(model_logprobs - fixed_logprobs) + + return policy_loss.mean() - policy_entropy * self.entropy_coefficient + + def metrics(self) -> list: + """ List of metrics to track for this learning process """ + return [ + AveragingNamedMetric("new_policy_loss", scope="model"), + AveragingNamedMetric("policy_entropy", scope="model"), + AveragingNamedMetric("value_loss", scope="model"), + AveragingNamedMetric("policy_optimization_success", scope="model"), + AveragingNamedMetric("policy_improvement_ratio", scope="model"), + AveragingNamedMetric("kl_divergence_step", scope="model"), + AveragingNamedMetric("policy_loss_improvement", scope="model"), + AveragingNamedMetric("advantage_norm", scope="model"), + AveragingNamedMetric("explained_variance", scope="model") + ] + + +class TRPOFactory(ModelFactory): + """ Factory class for policy gradient models """ + def __init__(self, input_block, policy_backbone: ModelFactory, value_backbone: ModelFactory, + max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, + discount_factor, gae_lambda, improvement_acceptance_ratio): + self.policy_backbone = policy_backbone + self.value_backbone = value_backbone + self.input_block = input_block + self.entropy_coefficient = entropy_coefficient + + self.mak_kl = max_kl + self.cg_iters = cg_iters + self.line_search_iters = line_search_iters + self.cg_damping = cg_damping + self.vf_iters = vf_iters + self.discount_factor = discount_factor + self.gae_lambda = gae_lambda + self.improvement_acceptance_ratio = improvement_acceptance_ratio + + def instantiate(self, **extra_args): + """ Instantiate the model """ + action_space = extra_args.pop('action_space') + + input_block = self.input_block.instantiate() + + policy_backbone = self.policy_backbone.instantiate(**extra_args) + value_backbone = self.value_backbone.instantiate(**extra_args) + + return TRPO( + input_block=input_block, + policy_backbone=policy_backbone, + value_backbone=value_backbone, + action_space=action_space, + max_kl=self.mak_kl, + cg_iters=self.cg_iters, + line_search_iters=self.line_search_iters, + cg_damping=self.cg_damping, + entropy_coefficient=self.entropy_coefficient, + vf_iters=self.vf_iters, + discount_factor=self.discount_factor, + gae_lambda=self.gae_lambda, + improvement_acceptance_ratio=self.improvement_acceptance_ratio + ) + + +def create(policy_backbone: ModelFactory, value_backbone: ModelFactory, + max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, + discount_factor, gae_lambda, improvement_acceptance_ratio, + input_block: typing.Optional[ModelFactory] = None): + """ Vel factory function """ + if input_block is None: + input_block = IdentityFactory() + + return TRPOFactory( + input_block=input_block, + policy_backbone=policy_backbone, + value_backbone=value_backbone, + max_kl=max_kl, + cg_iters=cg_iters, + line_search_iters=line_search_iters, + cg_damping=cg_damping, + entropy_coefficient=entropy_coefficient, + vf_iters=vf_iters, + discount_factor=discount_factor, + gae_lambda=gae_lambda, + improvement_acceptance_ratio=improvement_acceptance_ratio, + ) + diff --git a/vel/rl/api/__init__.py b/vel/rl/api/__init__.py index 4e80755c..699a8bdb 100644 --- a/vel/rl/api/__init__.py +++ b/vel/rl/api/__init__.py @@ -1,8 +1,6 @@ -from .algo_base import AlgoBase, OptimizerAlgoBase from .env_base import EnvFactory, VecEnvFactory from .env_roller import EnvRollerBase, ReplayEnvRollerBase, EnvRollerFactoryBase, ReplayEnvRollerFactoryBase -from .evaluator import Evaluator -from .policy import Policy -from .reinforcer_base import ReinforcerBase, ReinforcerFactory -from .replay_buffer import ReplayBuffer, ReplayBufferFactory from .rollout import Rollout, Trajectories, Transitions +from .rl_model import RlPolicy +from .reinforcer_base import Reinforcer, ReinforcerFactory +from .replay_buffer import ReplayBuffer, ReplayBufferFactory diff --git a/vel/rl/api/algo_base.py b/vel/rl/api/algo_base.py deleted file mode 100644 index b25374b1..00000000 --- a/vel/rl/api/algo_base.py +++ /dev/null @@ -1,47 +0,0 @@ - -class AlgoBase: - """ Base class for algo reinforcement calculations """ - - def initialize(self, training_info, model, environment, device): - """ Initialize algo from reinforcer settings """ - pass - - def process_rollout(self, batch_info, rollout): - """ Process rollout for ALGO before any chunking/shuffling """ - return rollout - - def optimize(self, batch_info, device, model, rollout): - """ Single optimization step for a model """ - raise NotImplementedError - - def metrics(self) -> list: - """ List of metrics to track for this learning process """ - return [] - - -class OptimizerAlgoBase(AlgoBase): - """ RL algo that does a simple optimizer update """ - - def calculate_gradient(self, batch_info, device, model, rollout): - """ Calculate loss of the supplied rollout """ - raise NotImplementedError - - def post_optimization_step(self, batch_info, device, model, rollout): - """ Steps to take after optimization has been done""" - pass - - def optimize(self, batch_info, device, model, rollout): - """ Single optimization step for a model """ - batch_info.optimizer.zero_grad() - - batch_result = self.calculate_gradient(batch_info=batch_info, device=device, model=model, rollout=rollout) - - batch_info.optimizer.step(closure=None) - - self.post_optimization_step(batch_info, device, model, rollout) - - return batch_result - - def metrics(self) -> list: - """ List of metrics to track for this learning process """ - return [] diff --git a/vel/rl/api/evaluator.py b/vel/rl/api/evaluator.py deleted file mode 100644 index e0c15d1c..00000000 --- a/vel/rl/api/evaluator.py +++ /dev/null @@ -1,154 +0,0 @@ -class EvaluatorMeta(type): - """ Metaclass for Evaluator - gathers all provider methods in a class attribute """ - def __new__(mcs, name, bases, attributes): - providers = {} - use_cache = {} - - for name, attr in attributes.items(): - if callable(attr): - proper_name = getattr(attr, '_vel_evaluator_provides', None) - if proper_name is not None: - providers[proper_name] = attr - - cache = getattr(attr, '_vel_use_cache', None) - if cache is not None: - use_cache[proper_name] = cache - - attributes['_use_cache'] = use_cache - attributes['_providers'] = providers - - return super().__new__(mcs, name, bases, attributes) - - -class Evaluator(metaclass=EvaluatorMeta): - """ - Different models may have different outputs and approach evaluating environment differently. - - Evaluator is an object that abstracts over that, providing unified interface between algorithms - which just need certain outputs from models and models that may provide them in different ways. - - I'll try to maintain here a dictionary of possible common values that can be requested from the evaluator. - Rollouts should communicate using the same names - - - rollout:estimated_returns - - Bootstrapped return (sum of discounted future rewards) estimated using returns and value estimates - - rollout:values - - Value estimates from the model that was used to generate the rollout - - rollout:estimated_advantages - - Advantage of a rollout (state, action) pair by the model that was used to generate the rollout - - rollout:actions - - Actions performed in a rollout - - rollout:logprobs - - Logarithm of probability for **all** actions of a policy used to perform rollout - (defined only for finite action spaces) - - rollout:action:logprobs - - Logarithm of probability only for selected actions - - rollout:dones - - Whether given observation is last in a trajectory - - rollout:dones - - Raw rewards received from the environment in this learning process - - rollout:final_values - - Value estimates for observation after final observation in the rollout - - rollout:observations - - Observations of the rollout - - rollout:observations_next - - Next observations in the rollout - - rollout:weights - - Error weights of rollout samples - - rollout:q - - Action-values for each action in current space - (defined only for finite action spaces) - - - model:logprobs - - Logarithm of probability of **all** actions in an environment as in current model policy - (defined only for finite action spaces) - - model:q - - Action-value for **all** actions - (defined only for finite action spaces) - - model:q_dist - - Action-value histogram for **all** actions - (defined only for finite action spaces) - - model:q_dist_next - - Action-value histogram for **all** actions from the 'next' state in the rollout - (defined only for finite action spaces) - - model:q_next - - Action-value for **all** actions from the 'next' state in the rollout - (defined only for finite action spaces) - - model:entropy - - Policy entropy for selected states - - model:action:q - - Action-value for actions selected in the rollout - - model:model_action:q - - Action-value for actions that model would perform (Deterministic policy only) - - model:actions - - Actions that model would perform (Deterministic policy only) - - model:action:logprobs - - Logarithm of probability for performed actions - - model:policy_params - - Parametrizations of policy for each state - - model:values - - Value estimates for each state, estimated by the current model - - model:values_next - - Value estimates for 'next' state of each transition - """ - - @staticmethod - def provides(name, cache=True): - """ Function decorator - value provided by the evaluator """ - def decorator(func): - func._vel_evaluator_provides = name - func._vel_use_cache = cache - - return func - - return decorator - - def __init__(self, rollout): - self._storage = {} - self.rollout = rollout - - def is_provided(self, name): - """ Capability check if evaluator provides given value """ - if name in self._storage: - return True - elif name in self._providers: - return True - elif name.startswith('rollout:'): - rollout_name = name[8:] - return self.is_provided(rollout_name) - else: - return False - - def get(self, name, cache=True): - """ - Return a value from this evaluator. - - Because tensor calculated is cached, it may lead to suble bugs if the same value is used multiple times - with and without no_grad() context. - - It is advised in such cases to not use no_grad and stick to .detach() - - If you want to disable the cache you can pass 'cache=False' to the decorator to disable it - for the attribute or to the get() function to disable it just for that call - """ - if name in self._use_cache and not self._use_cache[name]: - cache = False - - if name in self._storage and cache: - value = self._storage[name] - elif name in self._providers: - value = self._providers[name](self) - elif name.startswith('rollout:'): - rollout_name = name[8:] - value = self.rollout.batch_tensor(rollout_name) - else: - raise RuntimeError(f"Key {name} is not provided by this evaluator") - - if cache: - self._storage[name] = value - - return value - - def provide(self, name, value): - """ Provide given value under specified name """ - self._storage[name] = value diff --git a/vel/rl/api/policy.py b/vel/rl/api/policy.py deleted file mode 100644 index 34082364..00000000 --- a/vel/rl/api/policy.py +++ /dev/null @@ -1,22 +0,0 @@ -import torch -from vel.api import Model - - -class Policy(Model): - """ Base class for reinforcement learning policies """ - - def act(self, observation, state=None, deterministic=False) -> dict: - """ Make an action based on the observation from the environment. """ - raise NotImplementedError - - def value(self, observation, state=None) -> torch.tensor: - """ Return the expected reward from current state """ - return self.act(observation=observation, state=state)['value'] - - def reset_state(self, state, dones): - """ Reset the state after the episode has been terminated """ - raise NotImplementedError - - def evaluate(self, rollout) -> object: - """ Return an evaluator object evaluating given rollout that may be used for gradient computations etc. """ - raise NotImplementedError diff --git a/vel/rl/api/reinforcer_base.py b/vel/rl/api/reinforcer_base.py index 1db2ed34..3f9e7c66 100644 --- a/vel/rl/api/reinforcer_base.py +++ b/vel/rl/api/reinforcer_base.py @@ -1,9 +1,10 @@ import torch -from vel.api import TrainingInfo, EpochInfo, BatchInfo, Model +from vel.api import TrainingInfo, EpochInfo, BatchInfo +from vel.rl.api import RlPolicy -class ReinforcerBase: +class Reinforcer: """ Manages training process of a single model. Learner version for reinforcement-learning problems. @@ -26,13 +27,13 @@ def metrics(self) -> list: raise NotImplementedError @property - def model(self) -> Model: + def policy(self) -> RlPolicy: """ Model trained by this reinforcer """ raise NotImplementedError class ReinforcerFactory: """ A reinforcer factory """ - def instantiate(self, device: torch.device) -> ReinforcerBase: + def instantiate(self, device: torch.device) -> Reinforcer: """ Create new reinforcer instance """ raise NotImplementedError diff --git a/vel/rl/api/rl_model.py b/vel/rl/api/rl_model.py new file mode 100644 index 00000000..0e0eb609 --- /dev/null +++ b/vel/rl/api/rl_model.py @@ -0,0 +1,65 @@ +from vel.api import Model, VelOptimizer, OptimizerFactory, BatchInfo +from vel.rl.api import Rollout + + +class RlPolicy(Model): + """ Base class for reinforcement learning policies """ + + def __init__(self, discount_factor: float): + super().__init__() + + self.discount_factor = discount_factor + + def process_rollout(self, rollout: Rollout) -> Rollout: + """ Process rollout for optimization before any chunking/shuffling """ + raise NotImplementedError + + def act(self, observation, state=None, deterministic=False) -> dict: + """ + Make an action based on the observation from the environment. + Returned dictionary must have 'actions' key that contains an action per + each env in the observations + """ + raise NotImplementedError + + def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: + """ Create optimizer for the purpose of optimizing this model """ + parameters = filter(lambda p: p.requires_grad, self.parameters()) + return optimizer_factory.instantiate(parameters) + + def optimize(self, batch_info: BatchInfo, rollout: Rollout) -> dict: + """ + Perform one step of optimization of the policy based on provided rollout data + :returns a dictionary of metrics + """ + batch_info.optimizer.zero_grad() + + metrics = self.calculate_gradient(batch_info, rollout) + + opt_metrics = batch_info.optimizer.step() + + for key, value in opt_metrics.items(): + metrics[key] = value + + return metrics + + def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: + """ + Calculate gradient for given batch of training data. + :returns a dictionary of metrics + """ + raise NotImplementedError + + def reset_state(self, state, dones): + """ Reset the state after the episode has been terminated """ + raise NotImplementedError + + #################################################################################################################### + # Utility Methods - that provide default implementations but may be short circuited by some implementations + def value(self, observation, state=None): + """ Return value for given observation """ + return self.act(observation, state=state)['values'] + + def action(self, observation, state=None, deterministic=False): + """ Return policy action for given observation """ + return self.act(observation, state=state, deterministic=deterministic)['actions'] diff --git a/vel/rl/command/rl_train_command.py b/vel/rl/command/rl_train_command.py index 539cf5d3..a879a0f0 100644 --- a/vel/rl/command/rl_train_command.py +++ b/vel/rl/command/rl_train_command.py @@ -1,8 +1,9 @@ import typing -from vel.api import ModelConfig, EpochInfo, TrainingInfo, BatchInfo, OptimizerFactory, Storage, Callback -from vel.rl.api import ReinforcerFactory +from vel.api import ModelConfig, EpochInfo, TrainingInfo, BatchInfo, OptimizerFactory, Storage, Callback, VelOptimizer from vel.callback.time_tracker import TimeTracker +from vel.metric.samples_per_sec import SamplesPerSec +from vel.rl.api import ReinforcerFactory, Reinforcer import vel.openai.baselines.logger as openai_logger @@ -65,21 +66,13 @@ def run(self): # Reinforcer is the learner for the reinforcement learning model reinforcer = self.reinforcer.instantiate(device) - optimizer = self.optimizer_factory.instantiate(reinforcer.model) + optimizer = reinforcer.policy.create_optimizer(self.optimizer_factory) - # All callbacks used for learning - callbacks = self.gather_callbacks(optimizer) - # Metrics to track through this training - metrics = reinforcer.metrics() - - training_info = self.resume_training(reinforcer, callbacks, metrics) + training_info = self.start_training(reinforcer, optimizer) reinforcer.initialize_training(training_info) training_info.on_train_begin() - if training_info.optimizer_initial_state: - optimizer.load_state_dict(training_info.optimizer_initial_state) - global_epoch_idx = training_info.start_epoch_idx + 1 while training_info['frames'] < self.total_frames: @@ -95,7 +88,7 @@ def run(self): if self.openai_logging: self._openai_logging(epoch_info.result) - self.storage.checkpoint(epoch_info, reinforcer.model) + self.storage.checkpoint(epoch_info, reinforcer.policy) global_epoch_idx += 1 @@ -103,39 +96,47 @@ def run(self): return training_info - def gather_callbacks(self, optimizer) -> list: - """ Gather all the callbacks to be used in this training run """ + def start_training(self, reinforcer: Reinforcer, optimizer: VelOptimizer) -> TrainingInfo: + """ Possibly resume training from a saved state from the storage """ + + if self.model_config.resume_training: + start_epoch = self.storage.last_epoch_idx() + else: + start_epoch = 0 + callbacks = [FrameTracker(self.total_frames), TimeTracker()] if self.scheduler_factory is not None: - callbacks.append(self.scheduler_factory.instantiate(optimizer)) + callbacks.extend( + optimizer.create_scheduler(scheduler_factory=self.scheduler_factory, last_epoch=start_epoch-1) + ) callbacks.extend(self.callbacks) callbacks.extend(self.storage.streaming_callbacks()) - return callbacks - - def resume_training(self, reinforcer, callbacks, metrics) -> TrainingInfo: - """ Possibly resume training from a saved state from the storage """ - if self.model_config.resume_training: - start_epoch = self.storage.last_epoch_idx() - else: - start_epoch = 0 + # Metrics to track through this training + metrics = reinforcer.metrics() + optimizer.metrics() training_info = TrainingInfo( start_epoch_idx=start_epoch, - run_name=self.model_config.run_name, - metrics=metrics, callbacks=callbacks + metrics=metrics, + callbacks=callbacks ) if start_epoch == 0: + self.model_config.write_meta() self.storage.reset(self.model_config.render_configuration()) training_info.initialize() reinforcer.initialize_training(training_info) else: model_state, hidden_state = self.storage.load(training_info) + + training_info.restore(hidden_state) reinforcer.initialize_training(training_info, model_state, hidden_state) + if 'optimizer' in hidden_state: + optimizer.load_state_dict(hidden_state['optimizer']) + return training_info def _openai_logging(self, epoch_result): diff --git a/vel/rl/env_roller/step_env_roller.py b/vel/rl/env_roller/step_env_roller.py index c4ec4700..f749a895 100644 --- a/vel/rl/env_roller/step_env_roller.py +++ b/vel/rl/env_roller/step_env_roller.py @@ -3,7 +3,7 @@ from vel.api import BatchInfo from vel.openai.baselines.common.vec_env import VecEnv -from vel.rl.api import Trajectories, Rollout, EnvRollerBase, EnvRollerFactoryBase, Policy +from vel.rl.api import Trajectories, Rollout, EnvRollerBase, EnvRollerFactoryBase, RlPolicy from vel.rl.util.actor import PolicyActor from vel.util.tensor_accumulator import TensorAccumulator @@ -13,7 +13,7 @@ class StepEnvRoller(EnvRollerBase): Class calculating env rollouts. """ - def __init__(self, environment: VecEnv, policy: Policy, device: torch.device): + def __init__(self, environment: VecEnv, policy: RlPolicy, device: torch.device): self._environment = environment self.device = device diff --git a/vel/rl/env_roller/trajectory_replay_env_roller.py b/vel/rl/env_roller/trajectory_replay_env_roller.py index e0407a05..1f413f4f 100644 --- a/vel/rl/env_roller/trajectory_replay_env_roller.py +++ b/vel/rl/env_roller/trajectory_replay_env_roller.py @@ -4,7 +4,7 @@ from vel.api import BatchInfo from vel.openai.baselines.common.vec_env import VecEnv from vel.rl.api import ( - Trajectories, Rollout, ReplayEnvRollerBase, ReplayEnvRollerFactoryBase, ReplayBuffer, ReplayBufferFactory, Policy + Trajectories, Rollout, ReplayEnvRollerBase, ReplayEnvRollerFactoryBase, ReplayBuffer, ReplayBufferFactory, RlPolicy ) from vel.rl.util.actor import PolicyActor from vel.util.tensor_accumulator import TensorAccumulator @@ -17,7 +17,7 @@ class TrajectoryReplayEnvRoller(ReplayEnvRollerBase): Samples trajectories from the replay buffer (consecutive series of frames) """ - def __init__(self, environment: VecEnv, policy: Policy, device: torch.device, replay_buffer: ReplayBuffer): + def __init__(self, environment: VecEnv, policy: RlPolicy, device: torch.device, replay_buffer: ReplayBuffer): self._environment = environment self.device = device self.replay_buffer = replay_buffer diff --git a/vel/rl/env_roller/transition_replay_env_roller.py b/vel/rl/env_roller/transition_replay_env_roller.py index 25c1541a..14cb282e 100644 --- a/vel/rl/env_roller/transition_replay_env_roller.py +++ b/vel/rl/env_roller/transition_replay_env_roller.py @@ -7,7 +7,7 @@ from vel.openai.baselines.common.vec_env import VecEnv from vel.openai.baselines.common.running_mean_std import RunningMeanStd from vel.rl.api import ( - Trajectories, Rollout, ReplayEnvRollerBase, ReplayEnvRollerFactoryBase, ReplayBuffer, ReplayBufferFactory, Policy + Trajectories, Rollout, ReplayEnvRollerBase, ReplayEnvRollerFactoryBase, ReplayBuffer, ReplayBufferFactory, RlPolicy ) from vel.rl.util.actor import PolicyActor from vel.util.tensor_accumulator import TensorAccumulator @@ -20,7 +20,7 @@ class TransitionReplayEnvRoller(ReplayEnvRollerBase): Samples transitions from the replay buffer (individual frame transitions) """ - def __init__(self, environment: VecEnv, policy: Policy, device: torch.device, replay_buffer: ReplayBuffer, + def __init__(self, environment: VecEnv, policy: RlPolicy, device: torch.device, replay_buffer: ReplayBuffer, discount_factor: typing.Optional[float] = None, normalize_returns: bool = False, forward_steps: int = 1, action_noise: typing.Optional[nn.Module] = None): self._environment = environment diff --git a/vel/rl/policy/purgatory/deterministic_policy.py b/vel/rl/policy/purgatory/deterministic_policy.py index da7b31d0..58d908fe 100644 --- a/vel/rl/policy/purgatory/deterministic_policy.py +++ b/vel/rl/policy/purgatory/deterministic_policy.py @@ -5,7 +5,7 @@ from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, Evaluator, RlModel +from vel.rl.api import Rollout, Evaluator, RlPolicy from vel.rl.module.deterministic_action_head import DeterministicActionHead from vel.rl.module.deterministic_critic_head import DeterministicCriticHead @@ -45,7 +45,7 @@ def model_action_q(self): return self.model.value(observations, rollout_actions) -class DeterministicPolicyModel(RlModel): +class DeterministicPolicyModel(RlPolicy): """ Deterministic Policy Gradient - model """ def __init__(self, input_block: BackboneModel, policy_backbone: LinearBackboneModel, diff --git a/vel/rl/policy/purgatory/stochastic_policy.py b/vel/rl/policy/purgatory/old_stochastic_policy.py similarity index 97% rename from vel/rl/policy/purgatory/stochastic_policy.py rename to vel/rl/policy/purgatory/old_stochastic_policy.py index 1788ffc6..4fc5a16b 100644 --- a/vel/rl/policy/purgatory/stochastic_policy.py +++ b/vel/rl/policy/purgatory/old_stochastic_policy.py @@ -3,7 +3,7 @@ from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, Evaluator, RlModel +from vel.rl.api import Rollout, Evaluator, RlPolicy from vel.rl.module.action_head import StochasticActionHead from vel.rl.module.value_head import ValueHead @@ -33,7 +33,7 @@ def model_entropy(self): return self.model.entropy(policy_params) -class StochasticPolicyModel(RlModel): +class StochasticPolicyModel(RlPolicy): """ Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone """ diff --git a/vel/rl/policy/stochastic_rnn_policy.py b/vel/rl/policy/purgatory/old_stochastic_rnn_policy.py similarity index 76% rename from vel/rl/policy/stochastic_rnn_policy.py rename to vel/rl/policy/purgatory/old_stochastic_rnn_policy.py index a1a87f2e..25551144 100644 --- a/vel/rl/policy/stochastic_rnn_policy.py +++ b/vel/rl/policy/purgatory/old_stochastic_rnn_policy.py @@ -4,22 +4,22 @@ from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, Trajectories, Evaluator, Policy -from vel.rl.module.stochastic_action_head import StochasticActionHead +from vel.rl.api import Rollout, Trajectories, Evaluator, RlRnnModel +from vel.rl.module.action_head import StochasticActionHead from vel.rl.module.value_head import ValueHead class StochasticPolicyRnnEvaluator(Evaluator): """ Evaluate recurrent model from initial state """ - def __init__(self, model: 'StochasticRnnPolicy', rollout: Rollout): + def __init__(self, model: 'StochasticPolicyRnnModel', rollout: Rollout): assert isinstance(rollout, Trajectories), "For an RNN model, we must evaluate trajectories" super().__init__(rollout) self.model = model observation_trajectories = rollout.transition_tensors['observations'] - hidden_state = rollout.transition_tensors['state'][0] # Initial hidden state + hidden_state = rollout.rollout_tensors['initial_hidden_state'] action_accumulator = [] value_accumulator = [] @@ -45,10 +45,10 @@ def model_action_logprobs(self): @Evaluator.provides('model:entropy') def model_entropy(self): policy_params = self.get('model:policy_params') - return self.model.action_head.entropy(policy_params) + return self.model.entropy(policy_params) -class StochasticRnnPolicy(Policy): +class StochasticPolicyRnnModel(RlRnnModel): """ Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone RNN version @@ -61,8 +61,6 @@ def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, self.input_block = input_block self.backbone = backbone - assert self.backbone.is_stateful, "Must have a stateful backbone" - self.action_head = StochasticActionHead( action_space=action_space, input_dim=self.backbone.output_dim @@ -72,9 +70,9 @@ def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, assert self.backbone.is_stateful, "Backbone must be a recurrent model" @property - def is_stateful(self) -> bool: - """ If the model has a state that needs to be fed between individual observations """ - return True + def state_dim(self) -> int: + """ Dimension of model state """ + return self.backbone.state_dim def reset_weights(self): """ Initialize properly model weights """ @@ -93,9 +91,9 @@ def forward(self, observations, state): return action_output, value_output, new_state - def act(self, observation, state=None, deterministic=False) -> dict: + def step(self, observations, state, deterministic=False): """ Select actions based on model's output """ - action_pd_params, value_output, new_state = self(observation, state) + action_pd_params, value_output, new_state = self(observations, state) actions = self.action_head.sample(action_pd_params, deterministic=deterministic) # log likelihood of selected action @@ -112,26 +110,25 @@ def evaluate(self, rollout: Rollout) -> Evaluator: """ Evaluate model on a rollout """ return StochasticPolicyRnnEvaluator(self, rollout) - def value(self, observation, state=None): + def logprob(self, action_sample, policy_params): + """ Calculate - log(prob) of selected actions """ + return self.action_head.logprob(action_sample, policy_params) + + def value(self, observations, state): """ Calculate only value head for given state """ - input_data = self.input_block(observation) + input_data = self.input_block(observations) base_output, new_state = self.backbone(input_data, state) value_output = self.value_head(base_output) return value_output - def reset_state(self, state, dones): - """ Reset the state after the episode has been terminated """ - if (dones > 0).any().item(): - zero_state = self.backbone.zero_state(dones.shape[0]).to(state.device) - dones_expanded = dones.unsqueeze(-1) - return state * (1 - dones_expanded) + zero_state * dones_expanded - else: - return state + def entropy(self, action_pd_params): + """ Entropy of a probability distribution """ + return self.action_head.entropy(action_pd_params) -class StochasticRnnPolicyFactory(ModelFactory): +class PolicyGradientRnnModelFactory(ModelFactory): """ Factory class for policy gradient models """ def __init__(self, input_block: ModelFactory, backbone: ModelFactory): self.input_block = input_block @@ -142,7 +139,7 @@ def instantiate(self, **extra_args): input_block = self.input_block.instantiate() backbone = self.backbone.instantiate(**extra_args) - return StochasticRnnPolicy(input_block, backbone, extra_args['action_space']) + return StochasticPolicyRnnModel(input_block, backbone, extra_args['action_space']) def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): @@ -150,7 +147,7 @@ def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = if input_block is None: input_block = IdentityFactory() - return StochasticRnnPolicyFactory( + return PolicyGradientRnnModelFactory( input_block=input_block, backbone=backbone ) diff --git a/vel/rl/policy/purgatory/q_distributional_policy.py b/vel/rl/policy/purgatory/q_distributional_policy.py index 209b002b..4dde37cf 100644 --- a/vel/rl/policy/purgatory/q_distributional_policy.py +++ b/vel/rl/policy/purgatory/q_distributional_policy.py @@ -3,7 +3,7 @@ from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, RlModel, Evaluator +from vel.rl.api import Rollout, RlPolicy, Evaluator from vel.rl.module.q_distributional_head import QDistributionalHead @@ -53,7 +53,7 @@ def model_q_dist_next(self): return self.model(observations) -class QDistributionalModel(RlModel): +class QDistributionalModel(RlPolicy): """ A deterministic greedy action-value model that learns a value function distribution rather than just an expectation. diff --git a/vel/rl/policy/purgatory/q_model.py b/vel/rl/policy/purgatory/q_model.py index 7472e0bb..d162a4de 100644 --- a/vel/rl/policy/purgatory/q_model.py +++ b/vel/rl/policy/purgatory/q_model.py @@ -3,7 +3,7 @@ from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, RlModel, Evaluator +from vel.rl.api import Rollout, RlPolicy, Evaluator from vel.rl.module.q_head import QHead @@ -33,7 +33,7 @@ def model_q_next(self): return self.model(observations) -class QModel(RlModel): +class QModel(RlPolicy): """ Simple deterministic greedy action-value model. Supports only discrete action spaces (ones that can be enumerated) diff --git a/vel/rl/policy/purgatory/q_noisy_model.py b/vel/rl/policy/purgatory/q_noisy_model.py index b2d747bb..2ef6aab3 100644 --- a/vel/rl/policy/purgatory/q_noisy_model.py +++ b/vel/rl/policy/purgatory/q_noisy_model.py @@ -3,12 +3,12 @@ from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, RlModel, Evaluator +from vel.rl.api import Rollout, RlPolicy, Evaluator from vel.rl.model.q_model import QModelEvaluator from vel.rl.module.q_noisy_head import QNoisyHead -class NoisyQModel(RlModel): +class NoisyQModel(RlPolicy): """ NoisyNets action-value model. Supports only discrete action spaces (ones that can be enumerated) diff --git a/vel/rl/policy/purgatory/stochastic_rnn_policy.py b/vel/rl/policy/purgatory/stochastic_rnn_policy.py index 25551144..256346da 100644 --- a/vel/rl/policy/purgatory/stochastic_rnn_policy.py +++ b/vel/rl/policy/purgatory/stochastic_rnn_policy.py @@ -4,22 +4,22 @@ from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, Trajectories, Evaluator, RlRnnModel -from vel.rl.module.action_head import StochasticActionHead +from vel.rl.api import Rollout, Trajectories, Evaluator, RlPolicy +from vel.rl.module.stochastic_action_head import StochasticActionHead from vel.rl.module.value_head import ValueHead class StochasticPolicyRnnEvaluator(Evaluator): """ Evaluate recurrent model from initial state """ - def __init__(self, model: 'StochasticPolicyRnnModel', rollout: Rollout): + def __init__(self, model: 'StochasticRnnPolicy', rollout: Rollout): assert isinstance(rollout, Trajectories), "For an RNN model, we must evaluate trajectories" super().__init__(rollout) self.model = model observation_trajectories = rollout.transition_tensors['observations'] - hidden_state = rollout.rollout_tensors['initial_hidden_state'] + hidden_state = rollout.transition_tensors['state'][0] # Initial hidden state action_accumulator = [] value_accumulator = [] @@ -45,10 +45,10 @@ def model_action_logprobs(self): @Evaluator.provides('model:entropy') def model_entropy(self): policy_params = self.get('model:policy_params') - return self.model.entropy(policy_params) + return self.model.action_head.entropy(policy_params) -class StochasticPolicyRnnModel(RlRnnModel): +class StochasticRnnPolicy(RlPolicy): """ Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone RNN version @@ -61,6 +61,8 @@ def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, self.input_block = input_block self.backbone = backbone + assert self.backbone.is_stateful, "Must have a stateful backbone" + self.action_head = StochasticActionHead( action_space=action_space, input_dim=self.backbone.output_dim @@ -70,9 +72,9 @@ def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, assert self.backbone.is_stateful, "Backbone must be a recurrent model" @property - def state_dim(self) -> int: - """ Dimension of model state """ - return self.backbone.state_dim + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return True def reset_weights(self): """ Initialize properly model weights """ @@ -91,9 +93,9 @@ def forward(self, observations, state): return action_output, value_output, new_state - def step(self, observations, state, deterministic=False): + def act(self, observation, state=None, deterministic=False) -> dict: """ Select actions based on model's output """ - action_pd_params, value_output, new_state = self(observations, state) + action_pd_params, value_output, new_state = self(observation, state) actions = self.action_head.sample(action_pd_params, deterministic=deterministic) # log likelihood of selected action @@ -110,25 +112,26 @@ def evaluate(self, rollout: Rollout) -> Evaluator: """ Evaluate model on a rollout """ return StochasticPolicyRnnEvaluator(self, rollout) - def logprob(self, action_sample, policy_params): - """ Calculate - log(prob) of selected actions """ - return self.action_head.logprob(action_sample, policy_params) - - def value(self, observations, state): + def value(self, observation, state=None): """ Calculate only value head for given state """ - input_data = self.input_block(observations) + input_data = self.input_block(observation) base_output, new_state = self.backbone(input_data, state) value_output = self.value_head(base_output) return value_output - def entropy(self, action_pd_params): - """ Entropy of a probability distribution """ - return self.action_head.entropy(action_pd_params) + def reset_state(self, state, dones): + """ Reset the state after the episode has been terminated """ + if (dones > 0).any().item(): + zero_state = self.backbone.zero_state(dones.shape[0]).to(state.device) + dones_expanded = dones.unsqueeze(-1) + return state * (1 - dones_expanded) + zero_state * dones_expanded + else: + return state -class PolicyGradientRnnModelFactory(ModelFactory): +class StochasticRnnPolicyFactory(ModelFactory): """ Factory class for policy gradient models """ def __init__(self, input_block: ModelFactory, backbone: ModelFactory): self.input_block = input_block @@ -139,7 +142,7 @@ def instantiate(self, **extra_args): input_block = self.input_block.instantiate() backbone = self.backbone.instantiate(**extra_args) - return StochasticPolicyRnnModel(input_block, backbone, extra_args['action_space']) + return StochasticRnnPolicy(input_block, backbone, extra_args['action_space']) def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): @@ -147,7 +150,7 @@ def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = if input_block is None: input_block = IdentityFactory() - return PolicyGradientRnnModelFactory( + return StochasticRnnPolicyFactory( input_block=input_block, backbone=backbone ) diff --git a/vel/rl/algo/policy_gradient/__init__.py b/vel/rl/policy/semipurgatory/__init__.py similarity index 100% rename from vel/rl/algo/policy_gradient/__init__.py rename to vel/rl/policy/semipurgatory/__init__.py diff --git a/vel/rl/algo/policy_gradient/a2c.py b/vel/rl/policy/semipurgatory/a2c_rnn.py similarity index 92% rename from vel/rl/algo/policy_gradient/a2c.py rename to vel/rl/policy/semipurgatory/a2c_rnn.py index fdbbbb61..fc38671a 100644 --- a/vel/rl/algo/policy_gradient/a2c.py +++ b/vel/rl/policy/semipurgatory/a2c_rnn.py @@ -9,8 +9,8 @@ class A2CPolicyGradient(OptimizerAlgoBase): """ Simplest policy gradient - calculate loss as an advantage of an actor versus value function """ - def __init__(self, entropy_coefficient, value_coefficient, max_grad_norm, discount_factor: float, gae_lambda=1.0): - super().__init__(max_grad_norm) + def __init__(self, entropy_coefficient, value_coefficient, discount_factor: float, gae_lambda=1.0): + super().__init__() self.entropy_coefficient = entropy_coefficient self.value_coefficient = value_coefficient @@ -82,12 +82,11 @@ def metrics(self) -> list: ] -def create(entropy_coefficient, value_coefficient, max_grad_norm, discount_factor, gae_lambda=1.0): +def create(entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): """ Vel factory function """ return A2CPolicyGradient( entropy_coefficient, value_coefficient, - max_grad_norm, discount_factor, gae_lambda ) diff --git a/vel/rl/algo/policy_gradient/acer.py b/vel/rl/policy/semipurgatory/acer.py similarity index 100% rename from vel/rl/algo/policy_gradient/acer.py rename to vel/rl/policy/semipurgatory/acer.py diff --git a/vel/rl/algo/policy_gradient/ddpg.py b/vel/rl/policy/semipurgatory/ddpg.py similarity index 100% rename from vel/rl/algo/policy_gradient/ddpg.py rename to vel/rl/policy/semipurgatory/ddpg.py diff --git a/vel/rl/algo/dqn.py b/vel/rl/policy/semipurgatory/dqn.py similarity index 100% rename from vel/rl/algo/dqn.py rename to vel/rl/policy/semipurgatory/dqn.py diff --git a/vel/rl/algo/policy_gradient/ppo.py b/vel/rl/policy/semipurgatory/ppo_rnn.py similarity index 100% rename from vel/rl/algo/policy_gradient/ppo.py rename to vel/rl/policy/semipurgatory/ppo_rnn.py diff --git a/vel/rl/policy/stochastic_policy.py b/vel/rl/policy/stochastic_policy.py index 6ecabc12..23db9d13 100644 --- a/vel/rl/policy/stochastic_policy.py +++ b/vel/rl/policy/stochastic_policy.py @@ -1,40 +1,13 @@ import gym -import torch import typing from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, Evaluator, Policy from vel.rl.module.stochastic_action_head import StochasticActionHead from vel.rl.module.value_head import ValueHead -class StochasticPolicyEvaluator(Evaluator): - """ Evaluator for a policy gradient model """ - - def __init__(self, model: 'StochasticPolicy', rollout: Rollout): - super().__init__(rollout) - - self.model = model - - pd_params, estimated_values = model(self.rollout.batch_tensor('observations')) - - self.provide('model:pd_params', pd_params) - self.provide('model:values', estimated_values) - - @Evaluator.provides('model:action:logprobs') - def model_action_logprobs(self): - actions = self.get('rollout:actions') - pd_params = self.get('model:pd_params') - return self.model.action_head.logprob(actions, pd_params) - - @Evaluator.provides('model:entropy') - def model_entropy(self): - pd_params = self.get('model:pd_params') - return self.model.action_head.entropy(pd_params) - - -class StochasticPolicy(Policy): +class StochasticPolicy(BackboneModel): """ Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone """ @@ -74,30 +47,6 @@ def forward(self, observation): return action_output, value_output - def act(self, observation, state=None, deterministic=False): - """ Select actions based on model's output """ - action_pd_params, value_output = self(observation) - actions = self.action_head.sample(action_pd_params, deterministic=deterministic) - - # log likelihood of selected action - logprobs = self.action_head.logprob(actions, action_pd_params) - - return { - 'actions': actions, - 'values': value_output, - 'action:logprobs': logprobs - } - - def value(self, observation, state=None) -> torch.tensor: - """ Calculate value only - small optimization """ - input_data = self.input_block(observation) - base_output = self.backbone(input_data) - return self.value_head(base_output) - - def evaluate(self, rollout: Rollout) -> Evaluator: - """ Evaluate model on a rollout """ - return StochasticPolicyEvaluator(self, rollout) - class StochasticPolicyFactory(ModelFactory): """ Factory class for policy gradient models """ diff --git a/vel/rl/policy/purgatory/stochastic_policy_model_separate.py b/vel/rl/policy/stochastic_policy_separate.py similarity index 67% rename from vel/rl/policy/purgatory/stochastic_policy_model_separate.py rename to vel/rl/policy/stochastic_policy_separate.py index 3044459e..afced37c 100644 --- a/vel/rl/policy/purgatory/stochastic_policy_model_separate.py +++ b/vel/rl/policy/stochastic_policy_separate.py @@ -1,16 +1,13 @@ import gym -import itertools as it import typing from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, RlModel, Evaluator -from vel.rl.module.action_head import StochasticActionHead +from vel.rl.module.stochastic_action_head import StochasticActionHead from vel.rl.module.value_head import ValueHead -from vel.rl.model.stochastic_policy_model import StochasticPolicyEvaluator -class StochasticPolicyModelSeparate(RlModel): +class StochasticPolicyModelSeparate(BackboneModel): """ Policy gradient model class with an actor and critic heads that don't share a backbone """ @@ -53,29 +50,7 @@ def forward(self, observations): return action_output, value_output - def step(self, observation, deterministic=False): - """ Select actions based on model's output """ - policy_params, values = self(observation) - actions = self.action_head.sample(policy_params, deterministic=deterministic) - - # log likelihood of selected action - logprobs = self.action_head.logprob(actions, policy_params) - - return { - 'actions': actions, - 'values': values, - 'action:logprobs': logprobs - } - - def policy_parameters(self): - """ Parameters of policy """ - return it.chain(self.policy_backbone.parameters(), self.action_head.parameters()) - - def logprob(self, action_sample, policy_params): - """ Calculate - log(prob) of selected actions """ - return self.action_head.logprob(action_sample, policy_params) - - def value(self, observations): + def value(self, observations, state=None): """ Calculate only value head for given state """ input_data = self.input_block(observations) base_output = self.value_backbone(input_data) @@ -89,18 +64,6 @@ def policy(self, observations): policy_params = self.action_head(policy_base_output) return policy_params - def evaluate(self, rollout: Rollout) -> Evaluator: - """ Evaluate model on a rollout """ - return StochasticPolicyEvaluator(self, rollout) - - def entropy(self, policy_params): - """ Entropy of a probability distribution """ - return self.action_head.entropy(policy_params) - - def kl_divergence(self, pd_q, pd_p): - """ Calculate KL-divergence between two probability distributions """ - return self.action_head.kl_divergence(pd_q, pd_p) - class StochasticPolicyModelSeparateFactory(ModelFactory): """ Factory class for policy gradient models """ diff --git a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py index 4faa513f..f80694dc 100644 --- a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py @@ -7,7 +7,7 @@ from vel.api import TrainingInfo, EpochInfo, BatchInfo, Model, ModelFactory from vel.openai.baselines.common.vec_env import VecEnv from vel.rl.api import ( - ReinforcerBase, ReinforcerFactory, VecEnvFactory, ReplayEnvRollerBase, AlgoBase, ReplayEnvRollerFactoryBase + Reinforcer, ReinforcerFactory, VecEnvFactory, ReplayEnvRollerBase, AlgoBase, ReplayEnvRollerFactoryBase ) from vel.rl.metrics import ( FPSMetric, EpisodeLengthMetric, EpisodeRewardMetricQuantile, EpisodeRewardMetric, FramesMetric @@ -22,7 +22,7 @@ class BufferedMixedPolicyIterationReinforcerSettings: stochastic_experience_replay: bool = True -class BufferedMixedPolicyIterationReinforcer(ReinforcerBase): +class BufferedMixedPolicyIterationReinforcer(Reinforcer): """ A 'mixed' reinforcer that does both, on-policy learning from environment rollouts and off-policy learning from a replay buffer. @@ -57,19 +57,19 @@ def metrics(self) -> list: return my_metrics + self.algo.metrics() + self.env_roller.metrics() @property - def model(self) -> Model: + def policy(self) -> Model: """ Model trained by this reinforcer """ return self._trained_model def initialize_training(self, training_info: TrainingInfo, model_state=None, hidden_state=None): """ Prepare models for training """ if model_state is not None: - self.model.load_state_dict(model_state) + self.policy.load_state_dict(model_state) else: - self.model.reset_weights() + self.policy.reset_weights() self.algo.initialize( - training_info=training_info, model=self.model, environment=self.environment, device=self.device + training_info=training_info, model=self.policy, environment=self.environment, device=self.device ) def train_epoch(self, epoch_info: EpochInfo, interactive=True): @@ -111,14 +111,14 @@ def train_batch(self, batch_info: BatchInfo): def on_policy_train_batch(self, batch_info: BatchInfo): """ Perform an 'on-policy' training step of evaluating an env and a single backpropagation step """ - self.model.train() + self.policy.train() - rollout = self.env_roller.rollout(batch_info, self.model, self.settings.number_of_steps).to_device(self.device) + rollout = self.env_roller.rollout(batch_info, self.policy, self.settings.number_of_steps).to_device(self.device) batch_result = self.algo.optimize( batch_info=batch_info, device=self.device, - model=self.model, + model=self.policy, rollout=rollout ) @@ -128,14 +128,14 @@ def on_policy_train_batch(self, batch_info: BatchInfo): def off_policy_train_batch(self, batch_info: BatchInfo): """ Perform an 'off-policy' training step of sampling the replay buffer and gradient descent """ - self.model.train() + self.policy.train() - rollout = self.env_roller.sample(batch_info, self.model, self.settings.number_of_steps).to_device(self.device) + rollout = self.env_roller.sample(batch_info, self.policy, self.settings.number_of_steps).to_device(self.device) batch_result = self.algo.optimize( batch_info=batch_info, device=self.device, - model=self.model, + model=self.policy, rollout=rollout ) @@ -155,7 +155,7 @@ def __init__(self, settings, env_factory: VecEnvFactory, model_factory: ModelFac self.algo = algo self.seed = seed - def instantiate(self, device: torch.device) -> ReinforcerBase: + def instantiate(self, device: torch.device) -> Reinforcer: env = self.env_factory.instantiate(parallel_envs=self.parallel_envs, seed=self.seed) model = self.model_factory.instantiate(action_space=env.action_space) env_roller = self.env_roller_factory.instantiate(env, device) diff --git a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py index dbef9bd2..be04f2d7 100644 --- a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py @@ -7,7 +7,7 @@ from vel.api import TrainingInfo, EpochInfo, BatchInfo, Model, ModelFactory from vel.openai.baselines.common.vec_env import VecEnv from vel.rl.api import ( - ReinforcerBase, ReinforcerFactory, ReplayEnvRollerBase, AlgoBase, VecEnvFactory, ReplayEnvRollerFactoryBase + Reinforcer, ReinforcerFactory, ReplayEnvRollerBase, AlgoBase, VecEnvFactory, ReplayEnvRollerFactoryBase ) from vel.rl.metrics import ( FPSMetric, EpisodeLengthMetric, EpisodeRewardMetricQuantile, EpisodeRewardMetric, FramesMetric, @@ -26,7 +26,7 @@ class BufferedOffPolicyIterationReinforcerSettings: training_rounds: int = 1 -class BufferedOffPolicyIterationReinforcer(ReinforcerBase): +class BufferedOffPolicyIterationReinforcer(Reinforcer): """ An off-policy reinforcer that rolls out environment and stores transitions in a buffer. Afterwards, it samples experience batches from this buffer to train the policy. @@ -56,18 +56,18 @@ def metrics(self) -> list: return my_metrics + self.algo.metrics() + self.env_roller.metrics() @property - def model(self) -> Model: + def policy(self) -> Model: return self._trained_model def initialize_training(self, training_info: TrainingInfo, model_state=None, hidden_state=None): """ Prepare models for training """ if model_state is not None: - self.model.load_state_dict(model_state) + self.policy.load_state_dict(model_state) else: - self.model.reset_weights() + self.policy.reset_weights() self.algo.initialize( - training_info=training_info, model=self.model, environment=self.environment, device=self.device + training_info=training_info, model=self.policy, environment=self.environment, device=self.device ) def train_epoch(self, epoch_info: EpochInfo, interactive=True) -> None: @@ -108,10 +108,10 @@ def train_batch(self, batch_info: BatchInfo) -> None: def roll_out_and_store(self, batch_info): """ Roll out environment and store result in the replay buffer """ - self.model.train() + self.policy.train() if self.env_roller.is_ready_for_sampling(): - rollout = self.env_roller.rollout(batch_info, self.model, self.settings.rollout_steps) + rollout = self.env_roller.rollout(batch_info, self.policy, self.settings.rollout_steps) rollout = rollout.to_device(self.device) # Store some information about the rollout, no training phase @@ -123,7 +123,7 @@ def roll_out_and_store(self, batch_info): with tqdm.tqdm(desc="Populating memory", total=self.env_roller.initial_memory_size_hint()) as pbar: while not self.env_roller.is_ready_for_sampling(): - rollout = self.env_roller.rollout(batch_info, self.model, self.settings.rollout_steps) + rollout = self.env_roller.rollout(batch_info, self.policy, self.settings.rollout_steps) rollout = rollout.to_device(self.device) new_frames = rollout.frames() @@ -138,18 +138,18 @@ def roll_out_and_store(self, batch_info): def train_on_replay_memory(self, batch_info): """ Train agent on a memory gotten from replay buffer """ - self.model.train() + self.policy.train() # Algo will aggregate data into this list: batch_info['sub_batch_data'] = [] for i in range(self.settings.training_rounds): - sampled_rollout = self.env_roller.sample(batch_info, self.model, self.settings.training_steps) + sampled_rollout = self.env_roller.sample(batch_info, self.policy, self.settings.training_steps) batch_result = self.algo.optimize( batch_info=batch_info, device=self.device, - model=self.model, + model=self.policy, rollout=sampled_rollout.to_device(self.device) ) diff --git a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py index d9ff7ab9..93096a6b 100644 --- a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py @@ -4,10 +4,10 @@ import torch import tqdm -from vel.api import Model, ModelFactory, TrainingInfo, EpochInfo, BatchInfo +from vel.api import ModelFactory, TrainingInfo, EpochInfo, BatchInfo from vel.rl.api import ( - ReinforcerBase, ReinforcerFactory, VecEnvFactory, EnvRollerFactoryBase, EnvRollerBase, AlgoBase, - Policy + Reinforcer, ReinforcerFactory, VecEnvFactory, EnvRollerFactoryBase, EnvRollerBase, + RlPolicy ) from vel.rl.metrics import ( FPSMetric, EpisodeLengthMetric, EpisodeRewardMetricQuantile, @@ -30,20 +30,18 @@ class OnPolicyIterationReinforcerSettings: shuffle_transitions: bool = True -class OnPolicyIterationReinforcer(ReinforcerBase): +class OnPolicyIterationReinforcer(Reinforcer): """ A reinforcer that calculates on-policy environment rollouts and uses them to train policy directly. May split the sample into multiple batches and may replay batches a few times. """ - def __init__(self, device: torch.device, settings: OnPolicyIterationReinforcerSettings, policy: Policy, - algo: AlgoBase, env_roller: EnvRollerBase) -> None: + def __init__(self, device: torch.device, settings: OnPolicyIterationReinforcerSettings, policy: RlPolicy, + env_roller: EnvRollerBase) -> None: self.device = device self.settings = settings - self.env_roller = env_roller - self.algo = algo - self._trained_model = policy.to(self.device) + self._model: RlPolicy = policy.to(self.device) def metrics(self) -> list: """ List of metrics to track for this learning process """ @@ -56,23 +54,19 @@ def metrics(self) -> list: EpisodeLengthMetric("episode_length"), ] - return my_metrics + self.algo.metrics() + self.env_roller.metrics() + self.model.metrics() + return my_metrics + self.env_roller.metrics() + self.policy.metrics() @property - def model(self) -> Model: + def policy(self) -> RlPolicy: """ Model trained by this reinforcer """ - return self._trained_model + return self._model def initialize_training(self, training_info: TrainingInfo, model_state=None, hidden_state=None): """ Prepare models for training """ if model_state is not None: - self.model.load_state_dict(model_state) + self.policy.load_state_dict(model_state) else: - self.model.reset_weights() - - self.algo.initialize( - training_info=training_info, model=self.model, environment=self.env_roller.environment, device=self.device - ) + self.policy.reset_weights() def train_epoch(self, epoch_info: EpochInfo, interactive=True) -> None: """ Train model on an epoch of a fixed number of batch updates """ @@ -97,18 +91,18 @@ def train_batch(self, batch_info: BatchInfo) -> None: """ Batch - the most atomic unit of learning. - For this reinforforcer, that involves: + For this reinforcer, that involves: 1. Roll out the environmnent using current policy 2. Use that rollout to train the policy """ # Calculate environment rollout on the evaluation version of the model - self.model.train() + self.policy.train() rollout = self.env_roller.rollout(batch_info, self.settings.number_of_steps) - # Process rollout by the 'algo' (e.g. perform the advantage estimation) - rollout = self.algo.process_rollout(batch_info, rollout) + # Preprocessing of the rollout for this algorithm + rollout = self.policy.process_rollout(rollout) # Perform the training step # Algo will aggregate data into this list: @@ -119,7 +113,7 @@ def train_batch(self, batch_info: BatchInfo) -> None: if self.settings.stochastic_experience_replay: # Always play experience at least once - experience_replay_count = max(np.random.poisson(self.settings.experience_replay), 1) + experience_replay_count = 1 + np.random.poisson(self.settings.experience_replay - 1) else: experience_replay_count = self.settings.experience_replay @@ -127,25 +121,22 @@ def train_batch(self, batch_info: BatchInfo) -> None: for i in range(experience_replay_count): # We may potentially need to split rollout into multiple batches if self.settings.batch_size >= rollout.frames(): - batch_result = self.algo.optimize( + metrics = self.policy.optimize( batch_info=batch_info, - device=self.device, - model=self.model, - rollout=rollout.to_device(self.device) + rollout=rollout.to_device(self.device), ) - batch_info['sub_batch_data'].append(batch_result) + batch_info['sub_batch_data'].append(metrics) else: # Rollout too big, need to split in batches for batch_rollout in rollout.shuffled_batches(self.settings.batch_size): - batch_result = self.algo.optimize( + + metrics = self.policy.optimize( batch_info=batch_info, - device=self.device, - model=self.model, - rollout=batch_rollout.to_device(self.device) + rollout=batch_rollout.to_device(self.device), ) - batch_info['sub_batch_data'].append(batch_result) + batch_info['sub_batch_data'].append(metrics) batch_info['frames'] = rollout.frames() batch_info['episode_infos'] = rollout.episode_information() @@ -157,24 +148,23 @@ def train_batch(self, batch_info: BatchInfo) -> None: class OnPolicyIterationReinforcerFactory(ReinforcerFactory): """ Vel factory class for the PolicyGradientReinforcer """ def __init__(self, settings, parallel_envs: int, env_factory: VecEnvFactory, model_factory: ModelFactory, - algo: AlgoBase, env_roller_factory: EnvRollerFactoryBase, seed: int): + env_roller_factory: EnvRollerFactoryBase, seed: int): self.settings = settings self.parallel_envs = parallel_envs self.env_factory = env_factory self.model_factory = model_factory - self.algo = algo self.env_roller_factory = env_roller_factory self.seed = seed - def instantiate(self, device: torch.device) -> ReinforcerBase: + def instantiate(self, device: torch.device) -> Reinforcer: env = self.env_factory.instantiate(parallel_envs=self.parallel_envs, seed=self.seed) policy = self.model_factory.instantiate(action_space=env.action_space) env_roller = self.env_roller_factory.instantiate(environment=env, policy=policy, device=device) - return OnPolicyIterationReinforcer(device, self.settings, policy, self.algo, env_roller) + return OnPolicyIterationReinforcer(device, self.settings, policy, env_roller) -def create(model_config, model, vec_env, algo, env_roller, parallel_envs, number_of_steps, +def create(model_config, model, vec_env, env_roller, parallel_envs, number_of_steps, batch_size=256, experience_replay=1, stochastic_experience_replay=False, shuffle_transitions=True): """ Vel factory function """ settings = OnPolicyIterationReinforcerSettings( @@ -190,7 +180,6 @@ def create(model_config, model, vec_env, algo, env_roller, parallel_envs, number parallel_envs=parallel_envs, env_factory=vec_env, model_factory=model, - algo=algo, env_roller_factory=env_roller, seed=model_config.seed ) diff --git a/vel/rl/test/test_integration.py b/vel/rl/test/test_integration.py index ed73a972..dfac3a61 100644 --- a/vel/rl/test/test_integration.py +++ b/vel/rl/test/test_integration.py @@ -100,7 +100,7 @@ def test_a2c_breakout(): ) # Model optimizer - optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=7.0e-4, eps=1e-3) + optimizer = optim.RMSprop(reinforcer.policy.parameters(), lr=7.0e-4, eps=1e-3) # Overall information store for training information training_info = TrainingInfo( @@ -182,7 +182,7 @@ def test_ppo_breakout(): # Model optimizer # optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=7.0e-4, eps=1e-3) - optimizer = optim.Adam(reinforcer.model.parameters(), lr=2.5e-4, eps=1e-5) + optimizer = optim.Adam(reinforcer.policy.parameters(), lr=2.5e-4, eps=1e-5) # Overall information store for training information training_info = TrainingInfo( diff --git a/vel/rl/util/actor.py b/vel/rl/util/actor.py index a858c4a7..76fa9d94 100644 --- a/vel/rl/util/actor.py +++ b/vel/rl/util/actor.py @@ -1,13 +1,13 @@ import torch -from vel.rl.api import Policy +from vel.rl.api import RlPolicy from vel.util.tensor_util import to_device class PolicyActor: """ Evaluates policy on a fixed set of environments. Additionally tracks the state """ - def __init__(self, num_envs: int, policy: Policy, device: torch.device): + def __init__(self, num_envs: int, policy: RlPolicy, device: torch.device): self.num_envs = num_envs self.policy = policy.to(device) self.device = device From b5a068ee059578a874ada072b5252ba32fdefa40 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 26 Sep 2019 23:04:51 -0700 Subject: [PATCH 099/162] PPO and A2C RNN policies. --- .../atari/{purgatory => }/atari_a2c_lstm.yaml | 45 ++-- .../atari/{purgatory => }/atari_ppo_gru.yaml | 47 ++-- vel/rl/algo/a2c.py | 9 +- vel/rl/algo/a2c_rnn.py | 173 ++++++++++++++ vel/rl/algo/ppo_rnn.py | 220 ++++++++++++++++++ .../semipurgatory}/distributional_dqn.py | 0 .../{purgatory => }/stochastic_rnn_policy.py | 65 +----- 7 files changed, 449 insertions(+), 110 deletions(-) rename examples-configs/rl/atari/{purgatory => }/atari_a2c_lstm.yaml (53%) rename examples-configs/rl/atari/{purgatory => }/atari_ppo_gru.yaml (62%) create mode 100644 vel/rl/algo/a2c_rnn.py create mode 100644 vel/rl/algo/ppo_rnn.py rename vel/rl/{algo => policy/semipurgatory}/distributional_dqn.py (100%) rename vel/rl/policy/{purgatory => }/stochastic_rnn_policy.py (57%) diff --git a/examples-configs/rl/atari/purgatory/atari_a2c_lstm.yaml b/examples-configs/rl/atari/atari_a2c_lstm.yaml similarity index 53% rename from examples-configs/rl/atari/purgatory/atari_a2c_lstm.yaml rename to examples-configs/rl/atari/atari_a2c_lstm.yaml index f83f5c50..4db60264 100644 --- a/examples-configs/rl/atari/purgatory/atari_a2c_lstm.yaml +++ b/examples-configs/rl/atari/atari_a2c_lstm.yaml @@ -12,28 +12,28 @@ vec_env: model: - name: vel.rl.models.stochastic_policy_rnn_model + name: vel.rl.algo.a2c_rnn - input_block: - name: vel.modules.input.image_to_tensor + entropy_coefficient: 0.01 + value_coefficient: 0.5 + discount_factor: 0.99 - backbone: - name: vel.rl.models.backbone.nature_cnn_rnn - input_width: 84 - input_height: 84 - input_channels: 1 # The same as frame_history - rnn_type: 'lstm' + policy: + name: vel.rl.policy.stochastic_rnn_policy + input_block: + name: vel.module.input.image_to_tensor + + backbone: + name: vel.rl.backbone.nature_cnn_rnn + input_width: 84 + input_height: 84 + input_channels: 1 # The same as frame_history + rnn_type: 'lstm' -reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - algo: - name: vel.rl.algo.policy_gradient.a2c - entropy_coefficient: 0.01 - value_coefficient: 0.5 - max_grad_norm: 0.5 - discount_factor: 0.99 +reinforcer: + name: vel.rl.reinforcer.on_policy_iteration_reinforcer env_roller: name: vel.rl.env_roller.step_env_roller @@ -45,28 +45,29 @@ reinforcer: optimizer: - name: vel.optimizers.rmsprop + name: vel.optimizer.rmsprop lr: 7.0e-4 alpha: 0.99 epsilon: 1.0e-3 + max_grad_norm: 0.5 commands: train: - name: vel.rl.commands.rl_train_command + name: vel.rl.command.rl_train_command total_frames: 1.1e7 batches_per_epoch: 100 record: - name: vel.rl.commands.record_movie_command + name: vel.rl.command.record_movie_command takes: 10 videoname: 'atari_vid_{:04}.avi' evaluate: - name: vel.rl.commands.evaluate_env_command + name: vel.rl.command.evaluate_env_command parallel_envs: 16 # How many environments to run in parallel takes: 20 visdom: - name: vel.commands.vis_store_command + name: vel.command.vis_store_command diff --git a/examples-configs/rl/atari/purgatory/atari_ppo_gru.yaml b/examples-configs/rl/atari/atari_ppo_gru.yaml similarity index 62% rename from examples-configs/rl/atari/purgatory/atari_ppo_gru.yaml rename to examples-configs/rl/atari/atari_ppo_gru.yaml index 27303c92..afea6850 100644 --- a/examples-configs/rl/atari/purgatory/atari_ppo_gru.yaml +++ b/examples-configs/rl/atari/atari_ppo_gru.yaml @@ -11,39 +11,37 @@ vec_env: model: - name: vel.rl.policy.stochastic_rnn_policy + name: vel.rl.algo.ppo_rnn - input_block: - name: vel.module.input.image_to_tensor + entropy_coefficient: 0.01 + value_coefficient: 0.5 - backbone: - name: vel.rl.backbone.nature_cnn_rnn - rnn_type: 'gru' - hidden_units: 512 + discount_factor: 0.99 # Discount factor for the rewards + gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter - input_width: 84 - input_height: 84 - input_channels: 1 # The same as frame_history + cliprange: + name: vel.function.linear + initial_value: 0.1 + final_value: 0.0 + policy: + name: vel.rl.policy.stochastic_rnn_policy -reinforcer: - name: vel.rl.reinforcer.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.ppo + input_block: + name: vel.module.input.image_to_tensor - entropy_coefficient: 0.01 - value_coefficient: 0.5 + backbone: + name: vel.rl.backbone.nature_cnn_rnn + rnn_type: 'gru' + hidden_units: 512 - discount_factor: 0.99 # Discount factor for the rewards - gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter + input_width: 84 + input_height: 84 + input_channels: 1 # The same as frame_history - max_grad_norm: 0.5 # Gradient clipping parameter - cliprange: - name: vel.function.linear - initial_value: 0.1 - final_value: 0.0 +reinforcer: + name: vel.rl.reinforcer.on_policy_iteration_reinforcer env_roller: name: vel.rl.env_roller.step_env_roller @@ -60,6 +58,7 @@ optimizer: name: vel.optimizer.adam lr: 2.5e-4 epsilon: 1.0e-5 + max_grad_norm: 0.5 # Gradient clipping parameter scheduler: diff --git a/vel/rl/algo/a2c.py b/vel/rl/algo/a2c.py index 69b7926d..4fd529c7 100644 --- a/vel/rl/algo/a2c.py +++ b/vel/rl/algo/a2c.py @@ -21,17 +21,20 @@ def __init__(self, policy: BackboneModel, entropy_coefficient, value_coefficient self.policy = policy + assert not self.policy.is_stateful, "For stateful policies, try A2CRnn" + def reset_weights(self): """ Initialize properly model weights """ self.policy.reset_weights() - def forward(self, observation): + def forward(self, observation, state=None): """ Calculate model outputs """ - return self.policy(observation) + return self.policy(observation, state=state) def act(self, observation, state=None, deterministic=False): """ Select actions based on model's output """ - action_pd_params, value_output = self(observation) + action_pd_params, value_output = self(observation, state=state) + actions = self.policy.action_head.sample(action_pd_params, deterministic=deterministic) # log likelihood of selected action diff --git a/vel/rl/algo/a2c_rnn.py b/vel/rl/algo/a2c_rnn.py new file mode 100644 index 00000000..523b2f13 --- /dev/null +++ b/vel/rl/algo/a2c_rnn.py @@ -0,0 +1,173 @@ +import torch +import torch.nn.functional as F + +from vel.metric.base import AveragingNamedMetric +from vel.calc.function import explained_variance +from vel.api import BackboneModel, ModelFactory, BatchInfo + +from vel.rl.api import RlPolicy, Rollout, Trajectories +from vel.rl.discount_bootstrap import discount_bootstrap_gae + + +class A2CRnn(RlPolicy): + """ Simplest policy gradient - calculate loss as an advantage of an actor versus value function """ + def __init__(self, policy: BackboneModel, entropy_coefficient, value_coefficient, discount_factor: float, + gae_lambda=1.0): + super().__init__(discount_factor) + + self.entropy_coefficient = entropy_coefficient + self.value_coefficient = value_coefficient + self.gae_lambda = gae_lambda + + self.policy = policy + + assert self.policy.is_stateful, "Policy must be stateful" + + def reset_weights(self): + """ Initialize properly model weights """ + self.policy.reset_weights() + + def forward(self, observation, state=None): + """ Calculate model outputs """ + return self.policy(observation, state=state) + + def is_stateful(self) -> bool: + return self.policy.is_stateful + + def zero_state(self, batch_size): + return self.policy.zero_state(batch_size) + + def reset_state(self, state, dones): + return self.policy.reset_state(state, dones) + + def act(self, observation, state=None, deterministic=False): + """ Select actions based on model's output """ + action_pd_params, value_output, next_state = self(observation, state=state) + + actions = self.policy.action_head.sample(action_pd_params, deterministic=deterministic) + + # log likelihood of selected action + logprobs = self.policy.action_head.logprob(actions, action_pd_params) + + return { + 'actions': actions, + 'state': next_state, + 'values': value_output, + 'action:logprobs': logprobs + } + + def process_rollout(self, rollout: Rollout) -> Rollout: + """ Process rollout for optimization before any chunking/shuffling """ + assert isinstance(rollout, Trajectories), "A2C requires trajectory rollouts" + + advantages = discount_bootstrap_gae( + rewards_buffer=rollout.transition_tensors['rewards'], + dones_buffer=rollout.transition_tensors['dones'], + values_buffer=rollout.transition_tensors['values'], + final_values=rollout.rollout_tensors['final_values'], + discount_factor=self.discount_factor, + gae_lambda=self.gae_lambda, + number_of_steps=rollout.num_steps + ) + + returns = advantages + rollout.transition_tensors['values'] + + rollout.transition_tensors['advantages'] = advantages + rollout.transition_tensors['returns'] = returns + + return rollout + + def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: + """ Calculate loss of the supplied rollout """ + assert isinstance(rollout, Trajectories), "For an RNN model, we must evaluate trajectories" + + # rollout values + actions = rollout.batch_tensor('actions') + advantages = rollout.batch_tensor('advantages') + returns = rollout.batch_tensor('returns') + rollout_values = rollout.batch_tensor('values') + + # Let's evaluate the model + observations = rollout.transition_tensors['observations'] + hidden_state = rollout.transition_tensors['state'][0] # Initial hidden state + dones = rollout.transition_tensors['dones'] + + action_accumulator = [] + value_accumulator = [] + + # Evaluate recurrent network step by step + for i in range(observations.size(0)): + action_output, value_output, hidden_state = self(observations[i], hidden_state) + hidden_state = self.reset_state(hidden_state, dones[i]) + + action_accumulator.append(action_output) + value_accumulator.append(value_output) + + pd_params = torch.cat(action_accumulator, dim=0) + model_values = torch.cat(value_accumulator, dim=0) + + log_probs = self.policy.action_head.logprob(actions, pd_params) + entropy = self.policy.action_head.entropy(pd_params) + + # Actual calculations. Pretty trivial + policy_loss = -torch.mean(advantages * log_probs) + value_loss = 0.5 * F.mse_loss(model_values, returns) + policy_entropy = torch.mean(entropy) + + loss_value = ( + policy_loss - self.entropy_coefficient * policy_entropy + self.value_coefficient * value_loss + ) + + loss_value.backward() + + return { + 'policy_loss': policy_loss.item(), + 'value_loss': value_loss.item(), + 'policy_entropy': policy_entropy.item(), + 'advantage_norm': torch.norm(advantages).item(), + 'explained_variance': explained_variance(returns, rollout_values) + } + + def metrics(self) -> list: + """ List of metrics to track for this learning process """ + return [ + AveragingNamedMetric("value_loss", scope="model"), + AveragingNamedMetric("policy_entropy", scope="model"), + AveragingNamedMetric("policy_loss", scope="model"), + AveragingNamedMetric("advantage_norm", scope="model"), + AveragingNamedMetric("explained_variance", scope="model") + ] + + +class A2CRnnFactory(ModelFactory): + """ Factory class for policy gradient models """ + def __init__(self, policy, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): + self.policy = policy + self.entropy_coefficient = entropy_coefficient + self.value_coefficient = value_coefficient + self.discount_factor = discount_factor + self.gae_lambda = gae_lambda + + def instantiate(self, **extra_args): + """ Instantiate the model """ + # action_space = extra_args.pop('action_space') + policy = self.policy.instantiate(**extra_args) + + return A2CRnn( + policy=policy, + entropy_coefficient=self.entropy_coefficient, + value_coefficient=self.value_coefficient, + discount_factor=self.discount_factor, + gae_lambda=self.gae_lambda + ) + + +def create(policy: BackboneModel, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): + """ Vel factory function """ + return A2CRnnFactory( + policy=policy, + entropy_coefficient=entropy_coefficient, + value_coefficient=value_coefficient, + discount_factor=discount_factor, + gae_lambda=gae_lambda + ) diff --git a/vel/rl/algo/ppo_rnn.py b/vel/rl/algo/ppo_rnn.py new file mode 100644 index 00000000..76c2daad --- /dev/null +++ b/vel/rl/algo/ppo_rnn.py @@ -0,0 +1,220 @@ +import torch + +import numbers + +from vel.api import BackboneModel, BatchInfo, ModelFactory +from vel.calc.function import explained_variance +from vel.function.constant import ConstantSchedule +from vel.metric.base import AveragingNamedMetric + +from vel.rl.api import RlPolicy, Rollout, Trajectories +from vel.rl.discount_bootstrap import discount_bootstrap_gae + + +class PPORnn(RlPolicy): + """ Proximal Policy Optimization - https://arxiv.org/abs/1707.06347 """ + def __init__(self, policy: BackboneModel, + entropy_coefficient, value_coefficient, cliprange, discount_factor: float, + normalize_advantage: bool = True, gae_lambda: float = 1.0): + super().__init__(discount_factor) + + self.entropy_coefficient = entropy_coefficient + self.value_coefficient = value_coefficient + self.normalize_advantage = normalize_advantage + self.gae_lambda = gae_lambda + + if isinstance(cliprange, numbers.Number): + self.cliprange = ConstantSchedule(cliprange) + else: + self.cliprange = cliprange + + self.policy = policy + + assert self.policy.is_stateful, "Policy must be stateful" + + def reset_weights(self): + """ Initialize properly model weights """ + self.policy.reset_weights() + + def forward(self, observation, state=None): + """ Calculate model outputs """ + return self.policy.forward(observation, state=state) + + def is_stateful(self) -> bool: + return self.policy.is_stateful + + def zero_state(self, batch_size): + return self.policy.zero_state(batch_size) + + def reset_state(self, state, dones): + return self.policy.reset_state(state, dones) + + def act(self, observation, state=None, deterministic=False): + """ Select actions based on model's output """ + action_pd_params, value_output, next_state = self(observation, state=state) + actions = self.policy.action_head.sample(action_pd_params, deterministic=deterministic) + + # log likelihood of selected action + logprobs = self.policy.action_head.logprob(actions, action_pd_params) + + return { + 'actions': actions, + 'values': value_output, + 'state': next_state, + 'action:logprobs': logprobs + } + + def process_rollout(self, rollout: Rollout): + """ Process rollout for optimization before any chunking/shuffling """ + assert isinstance(rollout, Trajectories), "PPO requires trajectory rollouts" + + advantages = discount_bootstrap_gae( + rewards_buffer=rollout.transition_tensors['rewards'], + dones_buffer=rollout.transition_tensors['dones'], + values_buffer=rollout.transition_tensors['values'], + final_values=rollout.rollout_tensors['final_values'], + discount_factor=self.discount_factor, + gae_lambda=self.gae_lambda, + number_of_steps=rollout.num_steps + ) + + returns = advantages + rollout.transition_tensors['values'] + + rollout.transition_tensors['advantages'] = advantages + rollout.transition_tensors['returns'] = returns + + return rollout + + def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: + """ Calculate loss of the supplied rollout """ + assert isinstance(rollout, Trajectories), "For an RNN model, we must evaluate trajectories" + + # Part 0.0 - Rollout values + actions = rollout.batch_tensor('actions') + advantages = rollout.batch_tensor('advantages') + returns = rollout.batch_tensor('returns') + rollout_values = rollout.batch_tensor('values') + rollout_action_logprobs = rollout.batch_tensor('action:logprobs') + + # PART 0.1 - Model evaluation + observations = rollout.transition_tensors['observations'] + hidden_state = rollout.transition_tensors['state'][0] # Initial hidden state + dones = rollout.transition_tensors['dones'] + + action_accumulator = [] + value_accumulator = [] + + # Evaluate recurrent network step by step + for i in range(observations.size(0)): + action_output, value_output, hidden_state = self(observations[i], hidden_state) + hidden_state = self.reset_state(hidden_state, dones[i]) + + action_accumulator.append(action_output) + value_accumulator.append(value_output) + + pd_params = torch.cat(action_accumulator, dim=0) + model_values = torch.cat(value_accumulator, dim=0) + + model_action_logprobs = self.policy.action_head.logprob(actions, pd_params) + entropy = self.policy.action_head.entropy(pd_params) + + # Select the cliprange + current_cliprange = self.cliprange.value(batch_info['progress']) + + # Normalize the advantages? + if self.normalize_advantage: + advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) + + # PART 1 - policy entropy + policy_entropy = torch.mean(entropy) + + # PART 2 - value function + value_output_clipped = rollout_values + torch.clamp( + model_values - rollout_values, -current_cliprange, current_cliprange + ) + value_loss_part1 = (model_values - returns).pow(2) + value_loss_part2 = (value_output_clipped - returns).pow(2) + value_loss = 0.5 * torch.mean(torch.max(value_loss_part1, value_loss_part2)) + + # PART 3 - policy gradient loss + ratio = torch.exp(model_action_logprobs - rollout_action_logprobs) + + pg_loss_part1 = -advantages * ratio + pg_loss_part2 = -advantages * torch.clamp(ratio, 1.0 - current_cliprange, 1.0 + current_cliprange) + policy_loss = torch.mean(torch.max(pg_loss_part1, pg_loss_part2)) + + loss_value = ( + policy_loss - self.entropy_coefficient * policy_entropy + self.value_coefficient * value_loss + ) + + loss_value.backward() + + with torch.no_grad(): + approx_kl_divergence = 0.5 * torch.mean((model_action_logprobs - rollout_action_logprobs).pow(2)) + clip_fraction = torch.mean((torch.abs(ratio - 1.0) > current_cliprange).to(dtype=torch.float)) + + return { + 'policy_loss': policy_loss.item(), + 'value_loss': value_loss.item(), + 'policy_entropy': policy_entropy.item(), + 'approx_kl_divergence': approx_kl_divergence.item(), + 'clip_fraction': clip_fraction.item(), + 'advantage_norm': torch.norm(advantages).item(), + 'explained_variance': explained_variance(returns, rollout_values) + } + + def metrics(self) -> list: + """ List of metrics to track for this learning process """ + return [ + AveragingNamedMetric("policy_loss", scope="model"), + AveragingNamedMetric("value_loss", scope="model"), + AveragingNamedMetric("policy_entropy", scope="model"), + AveragingNamedMetric("approx_kl_divergence", scope="model"), + AveragingNamedMetric("clip_fraction", scope="model"), + AveragingNamedMetric("advantage_norm", scope="model"), + AveragingNamedMetric("explained_variance", scope="model") + ] + + +class PPORnnFactory(ModelFactory): + """ Factory class for policy gradient models """ + def __init__(self, policy: BackboneModel, + entropy_coefficient, value_coefficient, cliprange, discount_factor: float, + normalize_advantage: bool = True, gae_lambda: float = 1.0): + self.policy = policy + self.entropy_coefficient = entropy_coefficient + self.value_coefficient = value_coefficient + self.cliprange = cliprange + self.discount_factor = discount_factor + self.normalize_advantage = normalize_advantage + self.gae_lambda = gae_lambda + + def instantiate(self, **extra_args): + """ Instantiate the model """ + policy = self.policy.instantiate(**extra_args) + + return PPORnn( + policy=policy, + entropy_coefficient=self.entropy_coefficient, + value_coefficient=self.value_coefficient, + cliprange=self.cliprange, + discount_factor=self.discount_factor, + normalize_advantage=self.normalize_advantage, + gae_lambda=self.gae_lambda, + ) + + +def create(policy: BackboneModel, + entropy_coefficient, value_coefficient, cliprange, discount_factor: float, + normalize_advantage: bool = True, gae_lambda: float = 1.0): + """ Vel factory function """ + return PPORnnFactory( + policy=policy, + entropy_coefficient=entropy_coefficient, + value_coefficient=value_coefficient, + cliprange=cliprange, + discount_factor=discount_factor, + normalize_advantage=normalize_advantage, + gae_lambda=gae_lambda + ) + diff --git a/vel/rl/algo/distributional_dqn.py b/vel/rl/policy/semipurgatory/distributional_dqn.py similarity index 100% rename from vel/rl/algo/distributional_dqn.py rename to vel/rl/policy/semipurgatory/distributional_dqn.py diff --git a/vel/rl/policy/purgatory/stochastic_rnn_policy.py b/vel/rl/policy/stochastic_rnn_policy.py similarity index 57% rename from vel/rl/policy/purgatory/stochastic_rnn_policy.py rename to vel/rl/policy/stochastic_rnn_policy.py index 256346da..de8754b1 100644 --- a/vel/rl/policy/purgatory/stochastic_rnn_policy.py +++ b/vel/rl/policy/stochastic_rnn_policy.py @@ -1,54 +1,13 @@ import gym -import torch import typing from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, Trajectories, Evaluator, RlPolicy from vel.rl.module.stochastic_action_head import StochasticActionHead from vel.rl.module.value_head import ValueHead -class StochasticPolicyRnnEvaluator(Evaluator): - """ Evaluate recurrent model from initial state """ - - def __init__(self, model: 'StochasticRnnPolicy', rollout: Rollout): - assert isinstance(rollout, Trajectories), "For an RNN model, we must evaluate trajectories" - super().__init__(rollout) - - self.model = model - - observation_trajectories = rollout.transition_tensors['observations'] - hidden_state = rollout.transition_tensors['state'][0] # Initial hidden state - - action_accumulator = [] - value_accumulator = [] - - # Evaluate recurrent network step by step - for i in range(observation_trajectories.size(0)): - action_output, value_output, hidden_state = model(observation_trajectories[i], hidden_state) - action_accumulator.append(action_output) - value_accumulator.append(value_output) - - policy_params = torch.cat(action_accumulator, dim=0) - estimated_values = torch.cat(value_accumulator, dim=0) - - self.provide('model:policy_params', policy_params) - self.provide('model:values', estimated_values) - - @Evaluator.provides('model:action:logprobs') - def model_action_logprobs(self): - actions = self.get('rollout:actions') - policy_params = self.get('model:policy_params') - return self.model.action_head.logprob(actions, policy_params) - - @Evaluator.provides('model:entropy') - def model_entropy(self): - policy_params = self.get('model:policy_params') - return self.model.action_head.entropy(policy_params) - - -class StochasticRnnPolicy(RlPolicy): +class StochasticRnnPolicy(BackboneModel): """ Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone RNN version @@ -76,6 +35,9 @@ def is_stateful(self) -> bool: """ If the model has a state that needs to be fed between individual observations """ return True + def zero_state(self, batch_size): + return self.backbone.zero_state(batch_size) + def reset_weights(self): """ Initialize properly model weights """ self.input_block.reset_weights() @@ -93,25 +55,6 @@ def forward(self, observations, state): return action_output, value_output, new_state - def act(self, observation, state=None, deterministic=False) -> dict: - """ Select actions based on model's output """ - action_pd_params, value_output, new_state = self(observation, state) - actions = self.action_head.sample(action_pd_params, deterministic=deterministic) - - # log likelihood of selected action - logprobs = self.action_head.logprob(actions, action_pd_params) - - return { - 'actions': actions, - 'values': value_output, - 'action:logprobs': logprobs, - 'state': new_state - } - - def evaluate(self, rollout: Rollout) -> Evaluator: - """ Evaluate model on a rollout """ - return StochasticPolicyRnnEvaluator(self, rollout) - def value(self, observation, state=None): """ Calculate only value head for given state """ input_data = self.input_block(observation) From 62e82ffb0a0b315577fe465d0213108b7ea43cd4 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Tue, 1 Oct 2019 21:26:45 -0700 Subject: [PATCH 100/162] Updated README. --- README.md | 62 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 5f678ffd..60563e3f 100644 --- a/README.md +++ b/README.md @@ -9,12 +9,13 @@ Bring **velocity** to deep-learning research. This project hosts a collection of **highly modular** deep learning components that are tested to be working well together. -A simple yaml-based system ties these modules together declaratively using configuration files. +A simple yaml-based system ties these modules declaratively using configuration files. -This is still an early version and a hobby project so documentation is unfortunately nonexistent. I've tried to make the -code as clear as possible, and provide many usage examples, but whenever there was a tradeoff to be made between -simplicity and modularity I've chosen modularity first and simplicity second. +This is still an early version and a hobby project, so documentation is unfortunately nonexistent. +I've made an effort to make the code clear and provide many usage examples, +but whenever there was a tradeoff to be made between simplicity and modularity +I've chosen modularity first and simplicity second. Therefore, high emphasis is made on interfaces between components. Having conducted a few research projects, I've gathered a small collection of repositories @@ -97,14 +98,14 @@ that are ready to run and easy to modify for other similar usecases: # Implemented models - Reinforcement learning -- Continuous and discrete action spaces +- Support for continuous and discrete environment action spaces - Basic support for recurrent policies for A2C and PPO - Following policy gradient reinforcement learning algorithms: - Advantage Actor-Critic (A2C) - - Deep Deterministic Policy Gradient (DDPG) - Proximal Policy Optimization (PPO) - Trust Region Policy Optimization (TRPO) - Actor-Critic with Experience Replay (ACER) + - Deep Deterministic Policy Gradient (DDPG) - Deep Q-Learning (DQN) as described by DeepMind in their Nature publication with following improvements: - Double DQN @@ -215,6 +216,55 @@ Code quality: - Factor action noise back into the policy +# Directories + +Below I'll list brief explanation about contents of main top-level directories. + +- `docs` - Few markdown documents about the framework +- `examples-configs` - Ready to run configs with tried and tested models, usually heavily inspired by existing + literature. +- `examples-notebooks` - A few examples of how to interact with `vel` from the level of IPython notebook +- `vel` - Root for the Python source of the package + - `vel.api` - Interfaces and base classes used all over the codebase. To be used in source code only and not + referenced from config files. + - `vel.callback` - Definitions of callbacks that can be used in the training process. Can be referenced both by code + and by the config files. + - `vel.command` - Commands that can be used in your configuration files, and there isn't much need to refer to + them from code. + - `vel.data` - Various classes for handling data sources and data transformations. Referenced both by source code + and config files. + - `vel.function` - Interfaces for creating various functions/interpolators, to be refereced by config files. + - `vel.internal` - Functions and classes to be only used by `vel` internally, and not by by user code nor configs. + - `vel.metric` - Code for tracking metrics during training of your models. To be used by both code and configs. + - `vel.model` - Definition of models, which is kind of an end-package that references all other packages. Models + contain most other parts of the pipeline and define a training procedure. + - `vel.module` - Various useful definitions of PyTorch modules, to be used when defining your own `models` and + `layers`. + - `vel.net` - "Network" module that may be referenced by a model to define neural network architecture used. + - `vel.net.layer` - Modular layer system for defining networks declaratively in configuration files. + - `vel.notebook` - Utilities for interfacing with `vel` using IPython notebooks + - `vel.openai` - Imported parts of the codebase of `openai/baselines` that I didn't want to bring as a package + dependency. To be referenced in code mostly. + - `vel.optimizer` - Various implementations of deep learning optimizers. To be referenced mostly by scripts. + - `vel.rl` - Meta package for everything related to Reinforcement Learning + - `vel.rl.api` - Interfaces and base classes to be used for Reinforcement Learning models and other classes. + - `vel.rl.buffer` - All classes relating to experience replay and experience buffers + - `vel.rl.command` - Commands used for RL training + - `vel.rl.env` - Basic reinforcement learning environments, mostly based on OpenAI gym + - `vel.rl.env_roller` - Classes for generating environment rollouts + - `vel.rl.layer` - Layers desined especially for RL + - `vel.rl.module` - PyTorch modules designed for RL + - `vel.rl.policy` - Equivalent of `vel.model` for RL + - `vel.rl.reinforcer` - Reinforcer manages RL training, and corresponds to `Trainer` in Supervised Learning + - `vel.rl.vecenv` - Utilities for vectorizing environments and stepping multiple environments at the same time + - `vel.scheduler` - Classes helping to set up learning rate schedules for the optimizers. To be referenced mostly + by scripts. + - `vel.storage` - Everything about persisting models and metrics. To be referenced mostly by configuration files. + - `vel.train` - Utilities for defining more generic training loops of models. To be referenced in both code and + config. + - `vel.util` - Collection of various utility functions to be used by all other modules. + + # Citing If you use `vel` in your research, you can cite it as follows: From 43369d55959e4f1f4dd32d13fe07b2996e2eb63e Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Tue, 1 Oct 2019 23:14:26 -0700 Subject: [PATCH 101/162] Continuing with major net/rl code refactoring. --- examples-configs/rl/atari/atari_a2c.yaml | 21 ++-- examples-configs/rl/atari/atari_ppo.yaml | 20 ++-- .../atari/{ => purgatory}/atari_a2c_lstm.yaml | 0 .../rl/atari/purgatory/atari_ddqn.yaml | 0 .../rl/atari/purgatory/atari_dqn.yaml | 82 +++++++++++++ .../atari/{ => purgatory}/atari_ppo_gru.yaml | 0 .../rl/atari/purgatory/atari_rainbow.yaml | 0 .../rl/atari/{ => purgatory}/atari_trpo.yaml | 0 vel/api/__init__.py | 4 +- vel/api/model.py | 43 ++----- vel/api/model_factory.py | 4 +- vel/api/network.py | 28 +++++ vel/api/size_hint.py | 72 +++++++++++ vel/module/input/embedding.py | 4 - vel/module/input/flatten.py | 4 - vel/module/input/identity.py | 4 - vel/module/input/image_to_tensor.py | 40 +++---- vel/module/input/normalize_observations.py | 4 - vel/module/input/one_hot_encoding.py | 4 - vel/module/input/sequence.py | 4 - vel/{rl/backbone => net}/__init__.py | 0 .../purgatory => net/layer}/__init__.py | 0 .../layer/input}/__init__.py | 0 vel/net/layer/input/image_to_tensor.py | 47 ++++++++ vel/net/layer/util/__init__.py | 0 vel/net/layer/util/repeat_tensor.py | 41 +++++++ vel/net/layer_base.py | 31 +++++ vel/net/modular.py | 113 ++++++++++++++++++ vel/rl/api/__init__.py | 2 +- vel/rl/api/{rl_model.py => policy.py} | 10 +- vel/rl/layer/__init__.py | 0 vel/rl/{backbone => layer}/nature_cnn.py | 54 ++++++--- vel/rl/layer/premade/__init__.py | 0 vel/rl/layer/premade/purgatory/__init__.py | 0 .../premade/purgatory}/double_nature_cnn.py | 0 .../purgatory}/double_noisy_nature_cnn.py | 0 .../premade/purgatory}/mlp.py | 0 .../premade/purgatory}/mlp_rnn.py | 0 .../premade/purgatory}/nature_cnn_rnn.py | 4 - .../premade/purgatory}/nature_cnn_small.py | 0 .../premade/purgatory}/noisy_nature_cnn.py | 4 - .../premade/purgatory}/rnn.py | 0 vel/rl/{algo => policy}/a2c.py | 59 +++++---- vel/rl/{algo => policy}/a2c_rnn.py | 0 vel/rl/policy/{semipurgatory => }/dqn.py | 38 +++--- vel/rl/{algo => policy}/ppo.py | 57 ++++++--- vel/rl/{algo => policy}/ppo_rnn.py | 0 vel/rl/{algo => policy}/trpo.py | 32 +---- vel/rl/xpolicy/__init__.py | 0 vel/rl/xpolicy/purgatory/__init__.py | 0 .../purgatory/deterministic_policy.py | 0 .../purgatory/old_stochastic_policy.py | 0 .../purgatory/old_stochastic_rnn_policy.py | 0 .../purgatory/q_distributional_policy.py | 0 .../purgatory/q_dueling_policy.py | 0 .../{policy => xpolicy}/purgatory/q_model.py | 50 ++++---- .../purgatory/q_noisy_model.py | 0 .../purgatory/q_rainbow_model.py | 0 .../purgatory/q_stochastic_policy_model.py | 0 vel/rl/xpolicy/semipurgatory/__init__.py | 0 .../semipurgatory/a2c_rnn.py | 0 .../{policy => xpolicy}/semipurgatory/acer.py | 0 .../{policy => xpolicy}/semipurgatory/ddpg.py | 0 .../semipurgatory/distributional_dqn.py | 0 .../semipurgatory/ppo_rnn.py | 0 .../{policy => xpolicy}/stochastic_policy.py | 0 .../stochastic_policy_separate.py | 0 .../stochastic_rnn_policy.py | 0 vel/{calc => util}/process.py | 0 vel/{calc/function.py => util/stats.py} | 0 70 files changed, 633 insertions(+), 247 deletions(-) rename examples-configs/rl/atari/{ => purgatory}/atari_a2c_lstm.yaml (100%) rename vel/calc/__init__.py => examples-configs/rl/atari/purgatory/atari_ddqn.yaml (100%) create mode 100644 examples-configs/rl/atari/purgatory/atari_dqn.yaml rename examples-configs/rl/atari/{ => purgatory}/atari_ppo_gru.yaml (100%) rename vel/rl/algo/__init__.py => examples-configs/rl/atari/purgatory/atari_rainbow.yaml (100%) rename examples-configs/rl/atari/{ => purgatory}/atari_trpo.yaml (100%) create mode 100644 vel/api/network.py create mode 100644 vel/api/size_hint.py rename vel/{rl/backbone => net}/__init__.py (100%) rename vel/{rl/policy/purgatory => net/layer}/__init__.py (100%) rename vel/{rl/policy/semipurgatory => net/layer/input}/__init__.py (100%) create mode 100644 vel/net/layer/input/image_to_tensor.py create mode 100644 vel/net/layer/util/__init__.py create mode 100644 vel/net/layer/util/repeat_tensor.py create mode 100644 vel/net/layer_base.py create mode 100644 vel/net/modular.py rename vel/rl/api/{rl_model.py => policy.py} (88%) create mode 100644 vel/rl/layer/__init__.py rename vel/rl/{backbone => layer}/nature_cnn.py (67%) create mode 100644 vel/rl/layer/premade/__init__.py create mode 100644 vel/rl/layer/premade/purgatory/__init__.py rename vel/rl/{backbone => layer/premade/purgatory}/double_nature_cnn.py (100%) rename vel/rl/{backbone => layer/premade/purgatory}/double_noisy_nature_cnn.py (100%) rename vel/rl/{backbone => layer/premade/purgatory}/mlp.py (100%) rename vel/rl/{backbone => layer/premade/purgatory}/mlp_rnn.py (100%) rename vel/rl/{backbone => layer/premade/purgatory}/nature_cnn_rnn.py (96%) rename vel/rl/{backbone => layer/premade/purgatory}/nature_cnn_small.py (100%) rename vel/rl/{backbone => layer/premade/purgatory}/noisy_nature_cnn.py (98%) rename vel/rl/{backbone => layer/premade/purgatory}/rnn.py (100%) rename vel/rl/{algo => policy}/a2c.py (70%) rename vel/rl/{algo => policy}/a2c_rnn.py (100%) rename vel/rl/policy/{semipurgatory => }/dqn.py (74%) rename vel/rl/{algo => policy}/ppo.py (79%) rename vel/rl/{algo => policy}/ppo_rnn.py (100%) rename vel/rl/{algo => policy}/trpo.py (93%) create mode 100644 vel/rl/xpolicy/__init__.py create mode 100644 vel/rl/xpolicy/purgatory/__init__.py rename vel/rl/{policy => xpolicy}/purgatory/deterministic_policy.py (100%) rename vel/rl/{policy => xpolicy}/purgatory/old_stochastic_policy.py (100%) rename vel/rl/{policy => xpolicy}/purgatory/old_stochastic_rnn_policy.py (100%) rename vel/rl/{policy => xpolicy}/purgatory/q_distributional_policy.py (100%) rename vel/rl/{policy => xpolicy}/purgatory/q_dueling_policy.py (100%) rename vel/rl/{policy => xpolicy}/purgatory/q_model.py (70%) rename vel/rl/{policy => xpolicy}/purgatory/q_noisy_model.py (100%) rename vel/rl/{policy => xpolicy}/purgatory/q_rainbow_model.py (100%) rename vel/rl/{policy => xpolicy}/purgatory/q_stochastic_policy_model.py (100%) create mode 100644 vel/rl/xpolicy/semipurgatory/__init__.py rename vel/rl/{policy => xpolicy}/semipurgatory/a2c_rnn.py (100%) rename vel/rl/{policy => xpolicy}/semipurgatory/acer.py (100%) rename vel/rl/{policy => xpolicy}/semipurgatory/ddpg.py (100%) rename vel/rl/{policy => xpolicy}/semipurgatory/distributional_dqn.py (100%) rename vel/rl/{policy => xpolicy}/semipurgatory/ppo_rnn.py (100%) rename vel/rl/{policy => xpolicy}/stochastic_policy.py (100%) rename vel/rl/{policy => xpolicy}/stochastic_policy_separate.py (100%) rename vel/rl/{policy => xpolicy}/stochastic_rnn_policy.py (100%) rename vel/{calc => util}/process.py (100%) rename vel/{calc/function.py => util/stats.py} (100%) diff --git a/examples-configs/rl/atari/atari_a2c.yaml b/examples-configs/rl/atari/atari_a2c.yaml index cdacb76c..cbe9dc46 100644 --- a/examples-configs/rl/atari/atari_a2c.yaml +++ b/examples-configs/rl/atari/atari_a2c.yaml @@ -12,23 +12,20 @@ vec_env: model: - name: vel.rl.algo.a2c + name: vel.rl.policy.a2c entropy_coefficient: 0.01 value_coefficient: 0.5 discount_factor: 0.99 - policy: - name: vel.rl.policy.stochastic_policy - input_block: - name: vel.module.input.image_to_tensor - - backbone: - name: vel.rl.backbone.nature_cnn - - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history + net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.image_to_tensor + size: [84, 84, 4] # Number of channels is frame history + - name: vel.rl.layer.nature_cnn + - name: vel.net.layer.util.repeat_tensor + times: 2 # Need to repeat output twice, for action and value heads reinforcer: diff --git a/examples-configs/rl/atari/atari_ppo.yaml b/examples-configs/rl/atari/atari_ppo.yaml index c96c518c..12d043e0 100644 --- a/examples-configs/rl/atari/atari_ppo.yaml +++ b/examples-configs/rl/atari/atari_ppo.yaml @@ -12,7 +12,7 @@ vec_env: model: - name: vel.rl.algo.ppo + name: vel.rl.policy.ppo cliprange: name: vel.function.linear @@ -25,17 +25,15 @@ model: discount_factor: 0.99 # Discount factor for the rewards gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter - policy: - name: vel.rl.policy.stochastic_policy + net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.image_to_tensor + size: [84, 84, 4] # Number of channels is frame history + - name: vel.rl.layer.nature_cnn + - name: vel.net.layer.util.repeat_tensor + times: 2 # Need to repeat output twice, for action and value heads - input_block: - name: vel.module.input.image_to_tensor - - backbone: - name: vel.rl.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history reinforcer: diff --git a/examples-configs/rl/atari/atari_a2c_lstm.yaml b/examples-configs/rl/atari/purgatory/atari_a2c_lstm.yaml similarity index 100% rename from examples-configs/rl/atari/atari_a2c_lstm.yaml rename to examples-configs/rl/atari/purgatory/atari_a2c_lstm.yaml diff --git a/vel/calc/__init__.py b/examples-configs/rl/atari/purgatory/atari_ddqn.yaml similarity index 100% rename from vel/calc/__init__.py rename to examples-configs/rl/atari/purgatory/atari_ddqn.yaml diff --git a/examples-configs/rl/atari/purgatory/atari_dqn.yaml b/examples-configs/rl/atari/purgatory/atari_dqn.yaml new file mode 100644 index 00000000..a811880a --- /dev/null +++ b/examples-configs/rl/atari/purgatory/atari_dqn.yaml @@ -0,0 +1,82 @@ +name: 'atari_dqn' + + +env: + name: vel.rl.env.classic_atari + game: !param game = 'BreakoutNoFrameskip-v4' + + +vec_env: + name: vel.rl.vecenv.dummy + frame_history: 4 # How many stacked frames go into a single observation + + +model: + name: vel.rl.algo.dqn + + target_update_frequency: 10_000 # After how many batches to update the target network + discount_factor: 0.99 + + backbone: + name: vel.module.sequence + modules: + - name: vel.modules.input.image_to_tensor + - name: vel.rl.models.backbone.nature_cnn + input_width: 84 + input_height: 84 + input_channels: 4 # The same as frame_history + + +reinforcer: + name: vel.rl.reinforcer.buffered_off_policy_iteration_reinforcer + + env_roller: + name: vel.rl.env_roller.transition_replay_env_roller + + replay_buffer: + name: vel.rl.buffer.circular_replay_buffer + + buffer_initial_size: 30_000 # How many samples we need in the buffer before we start using replay buffer + buffer_capacity: 250_000 + + # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer + frame_stack_compensation: true + frame_history: 4 # How many stacked frames go into a single observation + + action_noise: + name: vel.rl.module.noise.eps_greedy + + epsilon: + name: vel.function.linear_and_constant + end_of_interpolation: 0.1 + initial_value: 1.0 + final_value: 0.1 + + rollout_steps: 4 # How many environment steps (per env) to perform per batch of training + training_steps: 32 # How many environment steps (per env) to perform per training round + parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper + + +optimizer: + name: vel.optimizer.rmsprop + lr: 2.5e-4 + alpha: 0.95 + momentum: 0.95 + epsilon: 1.0e-1 + max_grad_norm: 0.5 + + +commands: + train: + name: vel.rl.command.rl_train_command + total_frames: 1.1e7 # 11M + batches_per_epoch: 2500 + + record: + name: vel.rl.command.record_movie_command + takes: 10 + videoname: 'atari_vid_{:04}.avi' + + evaluate: + name: vel.rl.command.evaluate_env_command + takes: 100 diff --git a/examples-configs/rl/atari/atari_ppo_gru.yaml b/examples-configs/rl/atari/purgatory/atari_ppo_gru.yaml similarity index 100% rename from examples-configs/rl/atari/atari_ppo_gru.yaml rename to examples-configs/rl/atari/purgatory/atari_ppo_gru.yaml diff --git a/vel/rl/algo/__init__.py b/examples-configs/rl/atari/purgatory/atari_rainbow.yaml similarity index 100% rename from vel/rl/algo/__init__.py rename to examples-configs/rl/atari/purgatory/atari_rainbow.yaml diff --git a/examples-configs/rl/atari/atari_trpo.yaml b/examples-configs/rl/atari/purgatory/atari_trpo.yaml similarity index 100% rename from examples-configs/rl/atari/atari_trpo.yaml rename to examples-configs/rl/atari/purgatory/atari_trpo.yaml diff --git a/vel/api/__init__.py b/vel/api/__init__.py index 358a356f..9ddf09d8 100644 --- a/vel/api/__init__.py +++ b/vel/api/__init__.py @@ -1,7 +1,9 @@ from .callback import Callback from .info import BatchInfo, EpochInfo, TrainingInfo +from .size_hint import SizeHint, SizeHints +from .network import Network, BackboneNetwork from .model import ( - Model, OptimizedModel, GradientModel, LossFunctionModel, BackboneModel, LinearBackboneModel + Model, ValidatedModel, OptimizedModel, GradientModel, LossFunctionModel ) from .model_config import ModelConfig from .model_factory import ModelFactory diff --git a/vel/api/model.py b/vel/api/model.py index 9032e098..693d6a46 100644 --- a/vel/api/model.py +++ b/vel/api/model.py @@ -4,14 +4,14 @@ import vel.util.module_util as mu from vel.api.optimizer import VelOptimizer, OptimizerFactory -from vel.api.scheduler import SchedulerFactory -from vel.api.callback import Callback from vel.metric.loss_metric import Loss from vel.util.summary import summary +from .network import Network -class Model(nn.Module): - """ Class representing full neural network model """ + +class Model(Network): + """ Class representing full neural network model, generally used to solve some problem """ def metrics(self) -> list: """ Set of metrics for this model """ @@ -48,19 +48,6 @@ def summary(self, input_size=None): else: summary(self, input_size) - def reset_weights(self): - """ Call proper initializers for the weights """ - pass - - @property - def is_stateful(self) -> bool: - """ If the model has a state that needs to be fed between individual observations """ - return False - - def zero_state(self, batch_size): - """ Potential state for the model """ - return None - class OptimizedModel(Model): """ Model that is being optimized by an 'optimizer' """ @@ -77,6 +64,10 @@ def optimize(self, data: dict, optimizer: VelOptimizer) -> dict: """ raise NotImplementedError + +class ValidatedModel(OptimizedModel): + """ Model that also has a validation operation """ + def validate(self, data: dict) -> dict: """ Perform one step of model inference without optimization @@ -85,7 +76,7 @@ def validate(self, data: dict) -> dict: raise NotImplementedError -class GradientModel(OptimizedModel): +class GradientModel(ValidatedModel): """ Model that calculates a single gradient and optimizes it """ def optimize(self, data: dict, optimizer: VelOptimizer) -> dict: @@ -144,19 +135,3 @@ def calculate_gradient(self, data: dict) -> dict: def loss_value(self, x_data, y_true, y_pred) -> torch.tensor: """ Calculate a value of loss function """ raise NotImplementedError - - -class BackboneModel(Model): - """ Model that serves as a backbone network to connect your heads to """ - - -class LinearBackboneModel(BackboneModel): - """ - Model that serves as a backbone network to connect your heads to. - Has a final output of a single-dimensional linear layer. - """ - - @property - def output_dim(self) -> int: - """ Final dimension of model output """ - raise NotImplementedError diff --git a/vel/api/model_factory.py b/vel/api/model_factory.py index eeb533a0..0015f006 100644 --- a/vel/api/model_factory.py +++ b/vel/api/model_factory.py @@ -1,11 +1,11 @@ -from .model import Model +from .network import Network from vel.internal.generic_factory import GenericFactory class ModelFactory: """ Factory class for models """ - def instantiate(self, **extra_args) -> Model: + def instantiate(self, **extra_args) -> Network: raise NotImplementedError @staticmethod diff --git a/vel/api/network.py b/vel/api/network.py new file mode 100644 index 00000000..b6c8b97d --- /dev/null +++ b/vel/api/network.py @@ -0,0 +1,28 @@ +import torch.nn as nn + +from .size_hint import SizeHints + + +class Network(nn.Module): + """ Vel wrapper over nn.Module offering a few internally useful utilities """ + + def reset_weights(self): + """ Call proper initializers for the weights """ + pass + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return False + + def zero_state(self, batch_size): + """ Potential state for the model """ + return None + + +class BackboneNetwork(Network): + """ Network, whose output feeds into other models. Needs to provide size hints. """ + + def size_hints(self) -> SizeHints: + """ Size hints for this network """ + raise NotImplementedError diff --git a/vel/api/size_hint.py b/vel/api/size_hint.py new file mode 100644 index 00000000..d6a3879b --- /dev/null +++ b/vel/api/size_hint.py @@ -0,0 +1,72 @@ +import typing +import collections.abc as abc + +from vel.exception import VelException + + +class SizeHint(tuple): + """ Neural network hint of a layer size. Should consist of either integers or Nones """ + + def __new__(cls, *args): + return super().__new__(cls, tuple(args)) + + def last(self) -> int: + """ Return last part of the size hint, make sure it's not None """ + assert self[-1] is not None, "Size hint shouldn't be None" + return self[-1] + + def __repr__(self): + internal = ", ".join([self._inner_repr(s) for s in self]) + return f"{self.__class__.__name__}({internal})" + + def _inner_repr(self, x): + if x is None: + return '-' + else: + return repr(x) + + +SizeTuple = typing.Tuple[SizeHint] +SizeDict = typing.Dict[str, SizeHint] + + +class SizeHints: + """ SizeHint, tuple of size hints or dict of size hints """ + + TYPE_NONE = 0 + TYPE_SIZE = 1 + TYPE_TUPLE = 2 + TYPE_DICT = 3 + + def __init__(self, size_hints: typing.Union[SizeHint, SizeTuple, SizeDict] = None): + self.size_hints = size_hints + + if self.size_hints is None: + self.type = self.TYPE_NONE + elif isinstance(self.size_hints, SizeHint): + self.type = self.TYPE_SIZE + elif isinstance(self.size_hints, abc.Sequence): + self.size_hints = tuple(self.size_hints) + self.type = self.TYPE_TUPLE + elif isinstance(self.size_hints, abc.Mapping): + self.type = self.TYPE_DICT + else: + raise VelException("Invalid size hints: {}".format(self.size_hints)) + + def assert_tuple(self, length) -> SizeTuple: + """ Assert given size hints is a tuple """ + assert self.type == self.TYPE_TUPLE, "Network needs to return a tuple" + assert len(self.size_hints) == length, "Network must return {} results".format(length) + return self.size_hints + + def assert_single(self, length: typing.Optional[int] = None) -> SizeHint: + """ Make sure there is a single tensor as a size hint """ + assert self.type == self.TYPE_SIZE, "Layer input must be single tensor" + + if length is not None: + assert len(self.size_hints) == length, f"Layer input must have shape [{length}]" + + return self.size_hints + + def __repr__(self): + return repr(self.size_hints) diff --git a/vel/module/input/embedding.py b/vel/module/input/embedding.py index d055e257..b576ddfa 100644 --- a/vel/module/input/embedding.py +++ b/vel/module/input/embedding.py @@ -41,7 +41,3 @@ def instantiate(**_): return EmbeddingInput(alphabet_size, output_dim, pretrained=pretrained, frozen=frozen, source=source) return ModelFactory.generic(instantiate) - - -# Scripting interface -EmbeddingInputFactory = create diff --git a/vel/module/input/flatten.py b/vel/module/input/flatten.py index 0972616d..faf424df 100644 --- a/vel/module/input/flatten.py +++ b/vel/module/input/flatten.py @@ -20,7 +20,3 @@ def instantiate(**_): return Flatten() return ModelFactory.generic(instantiate) - - -# Scripting interface -FlattenInputFactory = create diff --git a/vel/module/input/identity.py b/vel/module/input/identity.py index 7018051e..6b9ee547 100644 --- a/vel/module/input/identity.py +++ b/vel/module/input/identity.py @@ -19,7 +19,3 @@ def instantiate(**_): return Identity() return ModelFactory.generic(instantiate) - - -# Scripting interface -IdentityFactory = create diff --git a/vel/module/input/image_to_tensor.py b/vel/module/input/image_to_tensor.py index 13b58ebd..b02a3229 100644 --- a/vel/module/input/image_to_tensor.py +++ b/vel/module/input/image_to_tensor.py @@ -1,36 +1,26 @@ import torch -from vel.api import BackboneModel, ModelFactory +from vel.api import Network -class ImageToTensor(BackboneModel): +def image_to_tensor(image: torch.Tensor) -> torch.Tensor: + """ Convert pytorch image (b, w, h, c) into tensor (b, c, w, h) float32 """ + result = image.permute(0, 3, 1, 2).contiguous() + + if result.dtype == torch.uint8: + result = result.type(torch.float) / 255.0 + else: + result = result.type(torch.float) + + return result + + +class ImageToTensor(Network): """ Convert simple image to tensor. Flip channels to a [C, W, H] order and potentially convert 8-bit color values to floats """ - def __init__(self): - super().__init__() - - def reset_weights(self): - pass - def forward(self, image): - result = image.permute(0, 3, 1, 2).contiguous() - - if result.dtype == torch.uint8: - result = result.type(torch.float) / 255.0 - else: - result = result.type(torch.float) - - return result - - -def create(): - """ Vel factory function """ - return ModelFactory.generic(ImageToTensor) - - -# Scripting interface -ImageToTensorFactory = create + return image_to_tensor(image) diff --git a/vel/module/input/normalize_observations.py b/vel/module/input/normalize_observations.py index 914cb5f6..d3013238 100644 --- a/vel/module/input/normalize_observations.py +++ b/vel/module/input/normalize_observations.py @@ -58,7 +58,3 @@ def instantiate(**_): return NormalizeObservations(input_shape) return ModelFactory.generic(instantiate) - - -# Scripting interface -NormalizeObservationsFactory = create diff --git a/vel/module/input/one_hot_encoding.py b/vel/module/input/one_hot_encoding.py index eaee642c..125bdb47 100644 --- a/vel/module/input/one_hot_encoding.py +++ b/vel/module/input/one_hot_encoding.py @@ -27,7 +27,3 @@ def instantiate(**_): return OneHotEncodingInput(alphabet_size) return ModelFactory.generic(instantiate) - - -# Scripting interface -OneHotEncodingInputFactory = create diff --git a/vel/module/input/sequence.py b/vel/module/input/sequence.py index 43d41ad1..51c50c82 100644 --- a/vel/module/input/sequence.py +++ b/vel/module/input/sequence.py @@ -19,7 +19,3 @@ def instantiate(**_): return SequenceInput([f.instantiate() for f in modules]) return ModelFactory.generic(instantiate) - - -# Scripting interface -SequenceInputFactory = create diff --git a/vel/rl/backbone/__init__.py b/vel/net/__init__.py similarity index 100% rename from vel/rl/backbone/__init__.py rename to vel/net/__init__.py diff --git a/vel/rl/policy/purgatory/__init__.py b/vel/net/layer/__init__.py similarity index 100% rename from vel/rl/policy/purgatory/__init__.py rename to vel/net/layer/__init__.py diff --git a/vel/rl/policy/semipurgatory/__init__.py b/vel/net/layer/input/__init__.py similarity index 100% rename from vel/rl/policy/semipurgatory/__init__.py rename to vel/net/layer/input/__init__.py diff --git a/vel/net/layer/input/image_to_tensor.py b/vel/net/layer/input/image_to_tensor.py new file mode 100644 index 00000000..cd034320 --- /dev/null +++ b/vel/net/layer/input/image_to_tensor.py @@ -0,0 +1,47 @@ +import typing + +from vel.api import SizeHints, SizeHint +from vel.net.modular import LayerFactory, Layer +from vel.module.input.image_to_tensor import image_to_tensor + + +class ImageToTensorLayer(Layer): + """ + Convert simple image to tensor. + + Flip channels to a [C, W, H] order and potentially convert 8-bit color values to floats + """ + def __init__(self, name: str, size: tuple = None): + super().__init__(name) + + if size is not None: + assert len(size) == 3, "Images must have three dimensions" + self.w, self.h, self.c = size + else: + self.w, self.h, self.c = (None, None, None) + + def forward(self, direct, state: dict = None, context: dict = None): + return image_to_tensor(direct) + + def size_hints(self) -> SizeHints: + return SizeHints(SizeHint(None, self.c, self.w, self.h)) + + +class ImageToTensorLayerFactory(LayerFactory): + def __init__(self, size: tuple = None): + self.size = size + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "image_to_tensor" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + """ Create a given layer object """ + # Potential improvement here is to use either direct input or size parameter + return ImageToTensorLayer(name=name, size=self.size) + + +def create(size: tuple = None): + """ Vel factory function """ + return ImageToTensorLayerFactory(size=size) diff --git a/vel/net/layer/util/__init__.py b/vel/net/layer/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/net/layer/util/repeat_tensor.py b/vel/net/layer/util/repeat_tensor.py new file mode 100644 index 00000000..58ea5dc1 --- /dev/null +++ b/vel/net/layer/util/repeat_tensor.py @@ -0,0 +1,41 @@ +import typing + +from vel.api import SizeHints, SizeHint +from vel.net.modular import LayerFactory, Layer + + +class RepeatTensor(Layer): + """ Repeat single tensor multiple times """ + + def __init__(self, name: str, times: int, size_hint: SizeHint): + super().__init__(name) + self.times = times + self.size_hint = size_hint + + def forward(self, direct, state: dict = None, context: dict = None): + return tuple([direct] * self.times) + + def size_hints(self) -> SizeHints: + return SizeHints(tuple([self.size_hint] * self.times)) + + +class RepeatTensorFactory(LayerFactory): + def __init__(self, times: int): + self.times = times + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "repeat_tensor" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + return RepeatTensor( + name=name, + times=self.times, + size_hint=direct_input.assert_single() + ) + + +def create(times: int): + """ Vel factory function """ + return RepeatTensorFactory(times=times) diff --git a/vel/net/layer_base.py b/vel/net/layer_base.py new file mode 100644 index 00000000..62dcabcb --- /dev/null +++ b/vel/net/layer_base.py @@ -0,0 +1,31 @@ +import typing + +from vel.api import BackboneNetwork, SizeHints, SizeHint + + +class Layer(BackboneNetwork): + def __init__(self, name: str): + super().__init__() + self.name = name + + def state_size_hints(self) -> typing.Dict[str, SizeHint]: + """ Size hints for state part of this network """ + return {} + + def forward(self, direct, state: dict = None, context: dict = None): + """ Forward propagation of a single layer """ + raise NotImplementedError + + +class LayerFactory: + """ Factory for layers """ + + @property + def name_base(self) -> str: + """ Base of layer name """ + raise NotImplementedError + + def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + """ Create a given layer object """ + raise NotImplementedError + diff --git a/vel/net/modular.py b/vel/net/modular.py new file mode 100644 index 00000000..e3147c3f --- /dev/null +++ b/vel/net/modular.py @@ -0,0 +1,113 @@ +import typing +import collections +import torch.nn as nn + +from vel.api import Network, BackboneNetwork, ModelFactory, SizeHints, SizeHint + +from .layer_base import Layer, LayerFactory + + +def instantiate_layers(layers: [LayerFactory]) -> nn.Module: + """ Instantiate list of layer factories into PyTorch Module """ + size_hint = SizeHints() # Empty input at first + module_dict = collections.OrderedDict() + context = {} + + for idx, layer_factory in enumerate(layers): + counter = idx + 1 + name = "{}_{:04d}".format(layer_factory.name_base, counter) + + layer = layer_factory.instantiate(name=name, direct_input=size_hint, context=context) + size_hint = layer.size_hints() + + module_dict[name] = layer + + return nn.Sequential(module_dict) + + +class ModularNetwork(BackboneNetwork): + """ Network that is built from layers """ + + def __init__(self, layers: nn.Module): + super().__init__() + + self.layers = layers + assert not any(l.is_stateful for l in self.layers), "Does not support stateful layers" + + def reset_weights(self): + """ Call proper initializers for the weights """ + for l in self.layers: + l.reset_weights() + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return False + + def size_hints(self) -> SizeHints: + return self.layers[-1].size_hints() + + def zero_state(self, batch_size): + """ Potential state for the model """ + return None + + def reset_state(self, state, dones): + """ Reset the state after the episode has been terminated """ + raise NotImplementedError + + def forward(self, input_data, state=None): + return self.layers(input_data) + + +class StatefulModularNetwork(BackboneNetwork): + """ Modular network handling the state between the episodes """ + + def __init__(self, layers: nn.Module): + super().__init__() + + self.layers = layers + + def reset_weights(self): + """ Call proper initializers for the weights """ + for l in self.layers: + l.reset_weights() + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return True + + def size_hints(self) -> SizeHints: + return self.layers[-1].size_hints() + + def zero_state(self, batch_size): + """ Potential state for the model """ + raise NotImplementedError + + def reset_state(self, state, dones): + """ Reset the state after the episode has been terminated """ + raise NotImplementedError + + def forward(self, input_data, state=None): + raise NotImplementedError + + +class ModularNetworkFactory(ModelFactory): + """ Factory class for the modular network """ + def __init__(self, layers: [LayerFactory]): + self.layers = layers + + def instantiate(self, **extra_args) -> BackboneNetwork: + """ Create either stateful or not modular network instance """ + layers = instantiate_layers(self.layers) + is_stateful = any(l.is_stateful for l in layers) + + if is_stateful: + return StatefulModularNetwork(layers) + else: + return ModularNetwork(layers) + + +def create(layers: [LayerFactory]): + """ Vel factory function """ + return ModularNetworkFactory(layers) diff --git a/vel/rl/api/__init__.py b/vel/rl/api/__init__.py index 699a8bdb..8102ab03 100644 --- a/vel/rl/api/__init__.py +++ b/vel/rl/api/__init__.py @@ -1,6 +1,6 @@ from .env_base import EnvFactory, VecEnvFactory from .env_roller import EnvRollerBase, ReplayEnvRollerBase, EnvRollerFactoryBase, ReplayEnvRollerFactoryBase from .rollout import Rollout, Trajectories, Transitions -from .rl_model import RlPolicy +from .policy import RlPolicy from .reinforcer_base import Reinforcer, ReinforcerFactory from .replay_buffer import ReplayBuffer, ReplayBufferFactory diff --git a/vel/rl/api/rl_model.py b/vel/rl/api/policy.py similarity index 88% rename from vel/rl/api/rl_model.py rename to vel/rl/api/policy.py index 0e0eb609..e5701d4d 100644 --- a/vel/rl/api/rl_model.py +++ b/vel/rl/api/policy.py @@ -1,8 +1,8 @@ -from vel.api import Model, VelOptimizer, OptimizerFactory, BatchInfo +from vel.api import OptimizedModel, VelOptimizer, OptimizerFactory, BatchInfo from vel.rl.api import Rollout -class RlPolicy(Model): +class RlPolicy(OptimizedModel): """ Base class for reinforcement learning policies """ def __init__(self, discount_factor: float): @@ -41,6 +41,8 @@ def optimize(self, batch_info: BatchInfo, rollout: Rollout) -> dict: for key, value in opt_metrics.items(): metrics[key] = value + self.post_optimization_step(batch_info, rollout) + return metrics def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: @@ -50,6 +52,10 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: """ raise NotImplementedError + def post_optimization_step(self, batch_info: BatchInfo, rollout: Rollout): + """ Optional operations to perform after optimization """ + pass + def reset_state(self, state, dones): """ Reset the state after the episode has been terminated """ raise NotImplementedError diff --git a/vel/rl/layer/__init__.py b/vel/rl/layer/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/rl/backbone/nature_cnn.py b/vel/rl/layer/nature_cnn.py similarity index 67% rename from vel/rl/backbone/nature_cnn.py rename to vel/rl/layer/nature_cnn.py index 8ce20ce6..b44b84d9 100644 --- a/vel/rl/backbone/nature_cnn.py +++ b/vel/rl/layer/nature_cnn.py @@ -5,6 +5,7 @@ Under MIT license. """ import numpy as np +import typing import torch.nn as nn import torch.nn.init as init @@ -12,15 +13,18 @@ import vel.util.network as net_util -from vel.api import LinearBackboneModel, ModelFactory +from vel.api import ModelFactory, SizeHint, SizeHints +from vel.net.modular import Layer, LayerFactory -class NatureCnn(LinearBackboneModel): + +class NatureCnn(Layer): """ Neural network as defined in the paper 'Human-level control through deep reinforcement learning' """ - def __init__(self, input_width, input_height, input_channels, output_dim=512): - super().__init__() - self._output_dim = output_dim + def __init__(self, name: str, input_width, input_height, input_channels, output_dim=512): + super().__init__(name) + + self.output_dim = output_dim self.conv1 = nn.Conv2d( in_channels=input_channels, @@ -57,11 +61,6 @@ def __init__(self, input_width, input_height, input_channels, output_dim=512): self.output_dim ) - @property - def output_dim(self) -> int: - """ Final dimension of model output """ - return self._output_dim - def reset_weights(self): """ Call proper initializers for the weights """ for m in self.modules(): @@ -74,7 +73,10 @@ def reset_weights(self): init.orthogonal_(m.weight, gain=np.sqrt(2)) init.constant_(m.bias, 0.0) - def forward(self, image): + def size_hints(self) -> SizeHints: + return SizeHints(SizeHint(None, self.output_dim)) + + def forward(self, image, state: dict = None, context: dict = None): result = image result = F.relu(self.conv1(result)) result = F.relu(self.conv2(result)) @@ -83,16 +85,30 @@ def forward(self, image): return F.relu(self.linear_layer(flattened)) -def create(input_width, input_height, input_channels=1, output_dim=512): - """ Vel factory function """ - def instantiate(**_): +class NatureCnnFactory(LayerFactory): + """ Nature Cnn Network Factory """ + + def __init__(self, output_dim: int = 512): + self.output_dim = output_dim + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "nature_cnn" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + (b, c, w, h) = direct_input.assert_single(4) + return NatureCnn( - input_width=input_width, input_height=input_height, input_channels=input_channels, - output_dim=output_dim + name=name, + input_width=w, + input_height=h, + input_channels=c, + output_dim=self.output_dim ) - return ModelFactory.generic(instantiate) +def create(output_dim=512): + """ Vel factory function """ + return NatureCnnFactory(output_dim=output_dim) -# Scripting interface -NatureCnnFactory = create diff --git a/vel/rl/layer/premade/__init__.py b/vel/rl/layer/premade/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/rl/layer/premade/purgatory/__init__.py b/vel/rl/layer/premade/purgatory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/rl/backbone/double_nature_cnn.py b/vel/rl/layer/premade/purgatory/double_nature_cnn.py similarity index 100% rename from vel/rl/backbone/double_nature_cnn.py rename to vel/rl/layer/premade/purgatory/double_nature_cnn.py diff --git a/vel/rl/backbone/double_noisy_nature_cnn.py b/vel/rl/layer/premade/purgatory/double_noisy_nature_cnn.py similarity index 100% rename from vel/rl/backbone/double_noisy_nature_cnn.py rename to vel/rl/layer/premade/purgatory/double_noisy_nature_cnn.py diff --git a/vel/rl/backbone/mlp.py b/vel/rl/layer/premade/purgatory/mlp.py similarity index 100% rename from vel/rl/backbone/mlp.py rename to vel/rl/layer/premade/purgatory/mlp.py diff --git a/vel/rl/backbone/mlp_rnn.py b/vel/rl/layer/premade/purgatory/mlp_rnn.py similarity index 100% rename from vel/rl/backbone/mlp_rnn.py rename to vel/rl/layer/premade/purgatory/mlp_rnn.py diff --git a/vel/rl/backbone/nature_cnn_rnn.py b/vel/rl/layer/premade/purgatory/nature_cnn_rnn.py similarity index 96% rename from vel/rl/backbone/nature_cnn_rnn.py rename to vel/rl/layer/premade/purgatory/nature_cnn_rnn.py index 8888da6e..6dccd7c9 100644 --- a/vel/rl/backbone/nature_cnn_rnn.py +++ b/vel/rl/layer/premade/purgatory/nature_cnn_rnn.py @@ -56,7 +56,3 @@ def instantiate(**_): ) return ModelFactory.generic(instantiate) - - -# Add this to make nicer scripting interface -NatureCnnFactory = create diff --git a/vel/rl/backbone/nature_cnn_small.py b/vel/rl/layer/premade/purgatory/nature_cnn_small.py similarity index 100% rename from vel/rl/backbone/nature_cnn_small.py rename to vel/rl/layer/premade/purgatory/nature_cnn_small.py diff --git a/vel/rl/backbone/noisy_nature_cnn.py b/vel/rl/layer/premade/purgatory/noisy_nature_cnn.py similarity index 98% rename from vel/rl/backbone/noisy_nature_cnn.py rename to vel/rl/layer/premade/purgatory/noisy_nature_cnn.py index d258543e..08ff71ba 100644 --- a/vel/rl/backbone/noisy_nature_cnn.py +++ b/vel/rl/layer/premade/purgatory/noisy_nature_cnn.py @@ -101,7 +101,3 @@ def instantiate(**_): ) return ModelFactory.generic(instantiate) - - -# Scripting interface -NatureCnnFactory = create diff --git a/vel/rl/backbone/rnn.py b/vel/rl/layer/premade/purgatory/rnn.py similarity index 100% rename from vel/rl/backbone/rnn.py rename to vel/rl/layer/premade/purgatory/rnn.py diff --git a/vel/rl/algo/a2c.py b/vel/rl/policy/a2c.py similarity index 70% rename from vel/rl/algo/a2c.py rename to vel/rl/policy/a2c.py index 4fd529c7..858cf5e4 100644 --- a/vel/rl/algo/a2c.py +++ b/vel/rl/policy/a2c.py @@ -1,17 +1,21 @@ +import gym import torch import torch.nn.functional as F from vel.metric.base import AveragingNamedMetric -from vel.calc.function import explained_variance -from vel.api import BackboneModel, ModelFactory, BatchInfo +from vel.util.stats import explained_variance +from vel.api import ModelFactory, BatchInfo, BackboneNetwork from vel.rl.api import RlPolicy, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae +from vel.rl.module.stochastic_action_head import StochasticActionHead +from vel.rl.module.value_head import ValueHead class A2C(RlPolicy): """ Simplest policy gradient - calculate loss as an advantage of an actor versus value function """ - def __init__(self, policy: BackboneModel, entropy_coefficient, value_coefficient, discount_factor: float, + def __init__(self, net: BackboneNetwork, action_space: gym.Space, + entropy_coefficient, value_coefficient, discount_factor: float, gae_lambda=1.0): super().__init__(discount_factor) @@ -19,26 +23,40 @@ def __init__(self, policy: BackboneModel, entropy_coefficient, value_coefficient self.value_coefficient = value_coefficient self.gae_lambda = gae_lambda - self.policy = policy + self.net = net - assert not self.policy.is_stateful, "For stateful policies, try A2CRnn" + assert not self.net.is_stateful, "For stateful policies, use A2CRnn" + + # Make sure network returns two results + (action_size, value_size) = self.net.size_hints().assert_tuple(2) + + self.action_head = StochasticActionHead( + action_space=action_space, + input_dim=action_size.last(), + ) + + self.value_head = ValueHead( + input_dim=value_size.last() + ) def reset_weights(self): """ Initialize properly model weights """ - self.policy.reset_weights() + self.net.reset_weights() + self.action_head.reset_weights() + self.value_head.reset_weights() def forward(self, observation, state=None): """ Calculate model outputs """ - return self.policy(observation, state=state) + action_hidden, value_hidden = self.net(observation, state=state) + return self.action_head(action_hidden), self.value_head(value_hidden) def act(self, observation, state=None, deterministic=False): """ Select actions based on model's output """ - action_pd_params, value_output = self(observation, state=state) - - actions = self.policy.action_head.sample(action_pd_params, deterministic=deterministic) + action_pd_params, value_output = self(observation) + actions = self.action_head.sample(action_pd_params, deterministic=deterministic) # log likelihood of selected action - logprobs = self.policy.action_head.logprob(actions, action_pd_params) + logprobs = self.action_head.logprob(actions, action_pd_params) return { 'actions': actions, @@ -78,8 +96,8 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: pd_params, model_values = self(observations) - log_probs = self.policy.action_head.logprob(actions, pd_params) - entropy = self.policy.action_head.entropy(pd_params) + log_probs = self.action_head.logprob(actions, pd_params) + entropy = self.action_head.entropy(pd_params) # Actual calculations. Pretty trivial policy_loss = -torch.mean(advantages * log_probs) @@ -113,8 +131,8 @@ def metrics(self) -> list: class A2CFactory(ModelFactory): """ Factory class for policy gradient models """ - def __init__(self, policy, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): - self.policy = policy + def __init__(self, net, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): + self.net = net self.entropy_coefficient = entropy_coefficient self.value_coefficient = value_coefficient self.discount_factor = discount_factor @@ -122,11 +140,12 @@ def __init__(self, policy, entropy_coefficient, value_coefficient, discount_fact def instantiate(self, **extra_args): """ Instantiate the model """ - # action_space = extra_args.pop('action_space') - policy = self.policy.instantiate(**extra_args) + action_space = extra_args.pop('action_space') + net = self.net.instantiate(**extra_args) return A2C( - policy=policy, + net=net, + action_space=action_space, entropy_coefficient=self.entropy_coefficient, value_coefficient=self.value_coefficient, discount_factor=self.discount_factor, @@ -134,10 +153,10 @@ def instantiate(self, **extra_args): ) -def create(policy: BackboneModel, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): +def create(net: ModelFactory, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): """ Vel factory function """ return A2CFactory( - policy=policy, + net=net, entropy_coefficient=entropy_coefficient, value_coefficient=value_coefficient, discount_factor=discount_factor, diff --git a/vel/rl/algo/a2c_rnn.py b/vel/rl/policy/a2c_rnn.py similarity index 100% rename from vel/rl/algo/a2c_rnn.py rename to vel/rl/policy/a2c_rnn.py diff --git a/vel/rl/policy/semipurgatory/dqn.py b/vel/rl/policy/dqn.py similarity index 74% rename from vel/rl/policy/semipurgatory/dqn.py rename to vel/rl/policy/dqn.py index bd3c355e..c6ea4933 100644 --- a/vel/rl/policy/semipurgatory/dqn.py +++ b/vel/rl/policy/dqn.py @@ -2,20 +2,20 @@ import torch.nn.functional as F import torch.nn.utils -from vel.api import ModelFactory +from vel.api import ModelFactory, BackboneModel from vel.metric import AveragingNamedMetric -from vel.rl.api import OptimizerAlgoBase +from vel.rl.api import RlPolicy -class DeepQLearning(OptimizerAlgoBase): +class DeepQLearning(RlPolicy): """ Deep Q-Learning algorithm """ - def __init__(self, model_factory: ModelFactory, discount_factor: float, double_dqn: bool, - target_update_frequency: int, max_grad_norm: float): - super().__init__(max_grad_norm) + def __init__(self, backbone: BackboneModel, + discount_factor: float, double_dqn: bool, + target_update_frequency: int): + super().__init__(discount_factor) - self.model_factory = model_factory - self.discount_factor = discount_factor + self.backbone = backbone self.double_dqn = double_dqn self.target_update_frequency = target_update_frequency @@ -28,7 +28,7 @@ def initialize(self, training_info, model, environment, device): self.target_model.load_state_dict(model.state_dict()) self.target_model.eval() - def calculate_gradient(self, batch_info, device, model, rollout): + def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: """ Calculate loss of the supplied rollout """ evaluator = model.evaluate(rollout) @@ -74,29 +74,29 @@ def calculate_gradient(self, batch_info, device, model, rollout): 'average_q_target': torch.mean(estimated_return).item() } - def post_optimization_step(self, batch_info, device, model, rollout): + def post_optimization_step(self, batch_info, rollout): """ Steps to take after optimization has been done""" if batch_info.aggregate_batch_number % self.target_update_frequency == 0: - self.target_model.load_state_dict(model.state_dict()) + self.target_model.load_state_dict(self.state_dict()) self.target_model.eval() def metrics(self) -> list: """ List of metrics to track for this learning process """ return [ - AveragingNamedMetric("loss"), - AveragingNamedMetric("average_q_selected"), - AveragingNamedMetric("average_q_target"), - AveragingNamedMetric("grad_norm"), + AveragingNamedMetric("loss", scope="model"), + AveragingNamedMetric("average_q_selected", scope="model"), + AveragingNamedMetric("average_q_target", scope="model") ] -def create(model: ModelFactory, discount_factor: float, target_update_frequency: int, - max_grad_norm: float, double_dqn: bool = False): +def create(backbone: ModelFactory, + discount_factor: float, target_update_frequency: int, + double_dqn: bool = False): """ Vel factory function """ + return DeepQLearning( - model_factory=model, + backbone=backbone, discount_factor=discount_factor, double_dqn=double_dqn, target_update_frequency=target_update_frequency, - max_grad_norm=max_grad_norm ) diff --git a/vel/rl/algo/ppo.py b/vel/rl/policy/ppo.py similarity index 79% rename from vel/rl/algo/ppo.py rename to vel/rl/policy/ppo.py index 483b4830..58be0a22 100644 --- a/vel/rl/algo/ppo.py +++ b/vel/rl/policy/ppo.py @@ -1,19 +1,23 @@ +import gym import torch import numbers -from vel.api import BackboneModel, BatchInfo, ModelFactory -from vel.calc.function import explained_variance +from vel.api import BatchInfo, ModelFactory, BackboneNetwork +from vel.util.stats import explained_variance from vel.function.constant import ConstantSchedule from vel.metric.base import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae +from vel.rl.module.stochastic_action_head import StochasticActionHead +from vel.rl.module.value_head import ValueHead + class PPO(RlPolicy): """ Proximal Policy Optimization - https://arxiv.org/abs/1707.06347 """ - def __init__(self, policy: BackboneModel, + def __init__(self, net: BackboneNetwork, action_space: gym.Space, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, normalize_advantage: bool = True, gae_lambda: float = 1.0): super().__init__(discount_factor) @@ -28,23 +32,40 @@ def __init__(self, policy: BackboneModel, else: self.cliprange = cliprange - self.policy = policy + self.net = net + + assert not self.net.is_stateful, "For stateful policies, use PPORnn" + + # Make sure network returns two results + (action_size, value_size) = self.net.size_hints().assert_tuple(2) + + self.action_head = StochasticActionHead( + action_space=action_space, + input_dim=action_size.last(), + ) + + self.value_head = ValueHead( + input_dim=value_size.last() + ) def reset_weights(self): """ Initialize properly model weights """ - self.policy.reset_weights() + self.net.reset_weights() + self.action_head.reset_weights() + self.value_head.reset_weights() def forward(self, observation): """ Calculate model outputs """ - return self.policy.forward(observation) + action_hidden, value_hidden = self.net(observation) + return self.action_head(action_hidden), self.value_head(value_hidden) def act(self, observation, state=None, deterministic=False): """ Select actions based on model's output """ action_pd_params, value_output = self(observation) - actions = self.policy.action_head.sample(action_pd_params, deterministic=deterministic) + actions = self.action_head.sample(action_pd_params, deterministic=deterministic) # log likelihood of selected action - logprobs = self.policy.action_head.logprob(actions, action_pd_params) + logprobs = self.action_head.logprob(actions, action_pd_params) return { 'actions': actions, @@ -88,8 +109,8 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: # PART 0.1 - Model evaluation pd_params, model_values = self(observations) - model_action_logprobs = self.policy.action_head.logprob(actions, pd_params) - entropy = self.policy.action_head.entropy(pd_params) + model_action_logprobs = self.action_head.logprob(actions, pd_params) + entropy = self.action_head.entropy(pd_params) # Select the cliprange current_cliprange = self.cliprange.value(batch_info['progress']) @@ -151,10 +172,9 @@ def metrics(self) -> list: class PPOFactory(ModelFactory): """ Factory class for policy gradient models """ - def __init__(self, policy: BackboneModel, - entropy_coefficient, value_coefficient, cliprange, discount_factor: float, + def __init__(self, net, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, normalize_advantage: bool = True, gae_lambda: float = 1.0): - self.policy = policy + self.net = net self.entropy_coefficient = entropy_coefficient self.value_coefficient = value_coefficient self.cliprange = cliprange @@ -164,10 +184,12 @@ def __init__(self, policy: BackboneModel, def instantiate(self, **extra_args): """ Instantiate the model """ - policy = self.policy.instantiate(**extra_args) + action_space = extra_args.pop('action_space') + net = self.net.instantiate(**extra_args) return PPO( - policy=policy, + net=net, + action_space=action_space, entropy_coefficient=self.entropy_coefficient, value_coefficient=self.value_coefficient, cliprange=self.cliprange, @@ -177,12 +199,11 @@ def instantiate(self, **extra_args): ) -def create(policy: BackboneModel, - entropy_coefficient, value_coefficient, cliprange, discount_factor: float, +def create(net: ModelFactory, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, normalize_advantage: bool = True, gae_lambda: float = 1.0): """ Vel factory function """ return PPOFactory( - policy=policy, + net=net, entropy_coefficient=entropy_coefficient, value_coefficient=value_coefficient, cliprange=cliprange, diff --git a/vel/rl/algo/ppo_rnn.py b/vel/rl/policy/ppo_rnn.py similarity index 100% rename from vel/rl/algo/ppo_rnn.py rename to vel/rl/policy/ppo_rnn.py diff --git a/vel/rl/algo/trpo.py b/vel/rl/policy/trpo.py similarity index 93% rename from vel/rl/algo/trpo.py rename to vel/rl/policy/trpo.py index 6c92d9dc..586d33c2 100644 --- a/vel/rl/algo/trpo.py +++ b/vel/rl/policy/trpo.py @@ -6,12 +6,10 @@ import torch.autograd as autograd import torch.nn.functional as F import torch.nn.utils -import typing from vel.api import BatchInfo, VelOptimizer, BackboneModel, LinearBackboneModel, OptimizerFactory, ModelFactory from vel.calc.function import explained_variance from vel.metric.base import AveragingNamedMetric -from vel.module.input.identity import IdentityFactory from vel.rl.api import Rollout, Trajectories, RlPolicy from vel.rl.discount_bootstrap import discount_bootstrap_gae @@ -60,14 +58,12 @@ class TRPO(RlPolicy): """ Trust Region Policy Optimization - https://arxiv.org/abs/1502.05477 """ def __init__(self, - input_block: BackboneModel, policy_backbone: LinearBackboneModel, value_backbone: LinearBackboneModel, action_space: gym.Space, max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, discount_factor, gae_lambda, improvement_acceptance_ratio): super().__init__(discount_factor) - self.input_block = input_block self.policy_backbone = policy_backbone self.value_backbone = value_backbone @@ -89,8 +85,6 @@ def __init__(self, def reset_weights(self): """ Initialize properly model weights """ - self.input_block.reset_weights() - self.policy_backbone.reset_weights() self.value_backbone.reset_weights() @@ -99,10 +93,8 @@ def reset_weights(self): def forward(self, observations): """ Calculate model outputs """ - input_data = self.input_block(observations) - - policy_base_output = self.policy_backbone(input_data) - value_base_output = self.value_backbone(input_data) + policy_base_output = self.policy_backbone(observations) + value_base_output = self.value_backbone(observations) action_output = self.action_head(policy_base_output) value_output = self.value_head(value_base_output) @@ -111,15 +103,13 @@ def forward(self, observations): def value(self, observations, state=None): """ Calculate only value head for given state """ - input_data = self.input_block(observations) - base_output = self.value_backbone(input_data) + base_output = self.value_backbone(observations) value_output = self.value_head(base_output) return value_output def policy(self, observations): """ Calculate only action head for given state """ - input_data = self.input_block(observations) - policy_base_output = self.policy_backbone(input_data) + policy_base_output = self.policy_backbone(observations) policy_params = self.action_head(policy_base_output) return policy_params @@ -145,7 +135,6 @@ def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: def policy_parameters(self): """ Parameters of policy """ return it.chain( - self.input_block.parameters(), self.policy_backbone.parameters(), self.action_head.parameters() ) @@ -153,7 +142,6 @@ def policy_parameters(self): def value_parameters(self): """ Parameters of value function """ return it.chain( - self.input_block.parameters(), self.value_backbone.parameters(), self.value_head.parameters() ) @@ -345,12 +333,11 @@ def metrics(self) -> list: class TRPOFactory(ModelFactory): """ Factory class for policy gradient models """ - def __init__(self, input_block, policy_backbone: ModelFactory, value_backbone: ModelFactory, + def __init__(self, policy_backbone: ModelFactory, value_backbone: ModelFactory, max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, discount_factor, gae_lambda, improvement_acceptance_ratio): self.policy_backbone = policy_backbone self.value_backbone = value_backbone - self.input_block = input_block self.entropy_coefficient = entropy_coefficient self.mak_kl = max_kl @@ -366,13 +353,10 @@ def instantiate(self, **extra_args): """ Instantiate the model """ action_space = extra_args.pop('action_space') - input_block = self.input_block.instantiate() - policy_backbone = self.policy_backbone.instantiate(**extra_args) value_backbone = self.value_backbone.instantiate(**extra_args) return TRPO( - input_block=input_block, policy_backbone=policy_backbone, value_backbone=value_backbone, action_space=action_space, @@ -390,14 +374,10 @@ def instantiate(self, **extra_args): def create(policy_backbone: ModelFactory, value_backbone: ModelFactory, max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, - discount_factor, gae_lambda, improvement_acceptance_ratio, - input_block: typing.Optional[ModelFactory] = None): + discount_factor, gae_lambda, improvement_acceptance_ratio): """ Vel factory function """ - if input_block is None: - input_block = IdentityFactory() return TRPOFactory( - input_block=input_block, policy_backbone=policy_backbone, value_backbone=value_backbone, max_kl=max_kl, diff --git a/vel/rl/xpolicy/__init__.py b/vel/rl/xpolicy/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/rl/xpolicy/purgatory/__init__.py b/vel/rl/xpolicy/purgatory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/rl/policy/purgatory/deterministic_policy.py b/vel/rl/xpolicy/purgatory/deterministic_policy.py similarity index 100% rename from vel/rl/policy/purgatory/deterministic_policy.py rename to vel/rl/xpolicy/purgatory/deterministic_policy.py diff --git a/vel/rl/policy/purgatory/old_stochastic_policy.py b/vel/rl/xpolicy/purgatory/old_stochastic_policy.py similarity index 100% rename from vel/rl/policy/purgatory/old_stochastic_policy.py rename to vel/rl/xpolicy/purgatory/old_stochastic_policy.py diff --git a/vel/rl/policy/purgatory/old_stochastic_rnn_policy.py b/vel/rl/xpolicy/purgatory/old_stochastic_rnn_policy.py similarity index 100% rename from vel/rl/policy/purgatory/old_stochastic_rnn_policy.py rename to vel/rl/xpolicy/purgatory/old_stochastic_rnn_policy.py diff --git a/vel/rl/policy/purgatory/q_distributional_policy.py b/vel/rl/xpolicy/purgatory/q_distributional_policy.py similarity index 100% rename from vel/rl/policy/purgatory/q_distributional_policy.py rename to vel/rl/xpolicy/purgatory/q_distributional_policy.py diff --git a/vel/rl/policy/purgatory/q_dueling_policy.py b/vel/rl/xpolicy/purgatory/q_dueling_policy.py similarity index 100% rename from vel/rl/policy/purgatory/q_dueling_policy.py rename to vel/rl/xpolicy/purgatory/q_dueling_policy.py diff --git a/vel/rl/policy/purgatory/q_model.py b/vel/rl/xpolicy/purgatory/q_model.py similarity index 70% rename from vel/rl/policy/purgatory/q_model.py rename to vel/rl/xpolicy/purgatory/q_model.py index d162a4de..2fbd4513 100644 --- a/vel/rl/policy/purgatory/q_model.py +++ b/vel/rl/xpolicy/purgatory/q_model.py @@ -3,34 +3,34 @@ from vel.api import LinearBackboneModel, ModelFactory, BackboneModel from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, RlPolicy, Evaluator +from vel.rl.api import Rollout, RlPolicy from vel.rl.module.q_head import QHead -class QModelEvaluator(Evaluator): - """ Evaluate simple q-model """ - def __init__(self, model: 'QModel', rollout: Rollout): - super().__init__(rollout) - self.model = model - - @Evaluator.provides('model:q') - def model_q(self): - """ Action values for all (discrete) actions """ - observations = self.get('rollout:observations') - return self.model(observations) - - @Evaluator.provides('model:action:q') - def model_action_q(self): - """ Action values for selected actions in the rollout """ - q = self.get('model:q') - actions = self.get('rollout:actions') - return q.gather(1, actions.unsqueeze(1)).squeeze(1) - - @Evaluator.provides('model:q_next') - def model_q_next(self): - """ Action values for all (discrete) actions """ - observations = self.get('rollout:observations_next') - return self.model(observations) +# class QModelEvaluator(Evaluator): +# """ Evaluate simple q-model """ +# def __init__(self, model: 'QModel', rollout: Rollout): +# super().__init__(rollout) +# self.model = model +# +# @Evaluator.provides('model:q') +# def model_q(self): +# """ Action values for all (discrete) actions """ +# observations = self.get('rollout:observations') +# return self.model(observations) +# +# @Evaluator.provides('model:action:q') +# def model_action_q(self): +# """ Action values for selected actions in the rollout """ +# q = self.get('model:q') +# actions = self.get('rollout:actions') +# return q.gather(1, actions.unsqueeze(1)).squeeze(1) +# +# @Evaluator.provides('model:q_next') +# def model_q_next(self): +# """ Action values for all (discrete) actions """ +# observations = self.get('rollout:observations_next') +# return self.model(observations) class QModel(RlPolicy): diff --git a/vel/rl/policy/purgatory/q_noisy_model.py b/vel/rl/xpolicy/purgatory/q_noisy_model.py similarity index 100% rename from vel/rl/policy/purgatory/q_noisy_model.py rename to vel/rl/xpolicy/purgatory/q_noisy_model.py diff --git a/vel/rl/policy/purgatory/q_rainbow_model.py b/vel/rl/xpolicy/purgatory/q_rainbow_model.py similarity index 100% rename from vel/rl/policy/purgatory/q_rainbow_model.py rename to vel/rl/xpolicy/purgatory/q_rainbow_model.py diff --git a/vel/rl/policy/purgatory/q_stochastic_policy_model.py b/vel/rl/xpolicy/purgatory/q_stochastic_policy_model.py similarity index 100% rename from vel/rl/policy/purgatory/q_stochastic_policy_model.py rename to vel/rl/xpolicy/purgatory/q_stochastic_policy_model.py diff --git a/vel/rl/xpolicy/semipurgatory/__init__.py b/vel/rl/xpolicy/semipurgatory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/rl/policy/semipurgatory/a2c_rnn.py b/vel/rl/xpolicy/semipurgatory/a2c_rnn.py similarity index 100% rename from vel/rl/policy/semipurgatory/a2c_rnn.py rename to vel/rl/xpolicy/semipurgatory/a2c_rnn.py diff --git a/vel/rl/policy/semipurgatory/acer.py b/vel/rl/xpolicy/semipurgatory/acer.py similarity index 100% rename from vel/rl/policy/semipurgatory/acer.py rename to vel/rl/xpolicy/semipurgatory/acer.py diff --git a/vel/rl/policy/semipurgatory/ddpg.py b/vel/rl/xpolicy/semipurgatory/ddpg.py similarity index 100% rename from vel/rl/policy/semipurgatory/ddpg.py rename to vel/rl/xpolicy/semipurgatory/ddpg.py diff --git a/vel/rl/policy/semipurgatory/distributional_dqn.py b/vel/rl/xpolicy/semipurgatory/distributional_dqn.py similarity index 100% rename from vel/rl/policy/semipurgatory/distributional_dqn.py rename to vel/rl/xpolicy/semipurgatory/distributional_dqn.py diff --git a/vel/rl/policy/semipurgatory/ppo_rnn.py b/vel/rl/xpolicy/semipurgatory/ppo_rnn.py similarity index 100% rename from vel/rl/policy/semipurgatory/ppo_rnn.py rename to vel/rl/xpolicy/semipurgatory/ppo_rnn.py diff --git a/vel/rl/policy/stochastic_policy.py b/vel/rl/xpolicy/stochastic_policy.py similarity index 100% rename from vel/rl/policy/stochastic_policy.py rename to vel/rl/xpolicy/stochastic_policy.py diff --git a/vel/rl/policy/stochastic_policy_separate.py b/vel/rl/xpolicy/stochastic_policy_separate.py similarity index 100% rename from vel/rl/policy/stochastic_policy_separate.py rename to vel/rl/xpolicy/stochastic_policy_separate.py diff --git a/vel/rl/policy/stochastic_rnn_policy.py b/vel/rl/xpolicy/stochastic_rnn_policy.py similarity index 100% rename from vel/rl/policy/stochastic_rnn_policy.py rename to vel/rl/xpolicy/stochastic_rnn_policy.py diff --git a/vel/calc/process.py b/vel/util/process.py similarity index 100% rename from vel/calc/process.py rename to vel/util/process.py diff --git a/vel/calc/function.py b/vel/util/stats.py similarity index 100% rename from vel/calc/function.py rename to vel/util/stats.py From bc211ec75e62df9daa86881bbf1838f7bcb26e0d Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Tue, 1 Oct 2019 23:32:29 -0700 Subject: [PATCH 102/162] Restored TRPO. --- .../rl/atari/{purgatory => }/atari_trpo.yaml | 29 ++++----- vel/rl/layer/nature_cnn.py | 4 +- .../purgatory => }/nature_cnn_small.py | 50 ++++++++++----- vel/rl/policy/trpo.py | 64 ++++++++++--------- 4 files changed, 81 insertions(+), 66 deletions(-) rename examples-configs/rl/atari/{purgatory => }/atari_trpo.yaml (75%) rename vel/rl/layer/{premade/purgatory => }/nature_cnn_small.py (63%) diff --git a/examples-configs/rl/atari/purgatory/atari_trpo.yaml b/examples-configs/rl/atari/atari_trpo.yaml similarity index 75% rename from examples-configs/rl/atari/purgatory/atari_trpo.yaml rename to examples-configs/rl/atari/atari_trpo.yaml index df2446ab..56af94db 100644 --- a/examples-configs/rl/atari/purgatory/atari_trpo.yaml +++ b/examples-configs/rl/atari/atari_trpo.yaml @@ -12,7 +12,7 @@ vec_env: model: - name: vel.rl.algo.trpo + name: vel.rl.policy.trpo max_kl: 0.001 cg_iters: 10 @@ -25,20 +25,19 @@ model: gae_lambda: 1.00 # Generalized Advantage Estimator Lambda parameter - input_block: - name: vel.module.input.image_to_tensor - - policy_backbone: - name: vel.rl.backbone.nature_cnn_small - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - value_backbone: - name: vel.rl.backbone.nature_cnn_small - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history + policy_net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.image_to_tensor + size: [84, 84, 4] # Number of channels is frame history + - name: vel.rl.layer.nature_cnn_small + + value_net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.image_to_tensor + size: [84, 84, 4] # Number of channels is frame history + - name: vel.rl.layer.nature_cnn_small reinforcer: diff --git a/vel/rl/layer/nature_cnn.py b/vel/rl/layer/nature_cnn.py index b44b84d9..16cfed1b 100644 --- a/vel/rl/layer/nature_cnn.py +++ b/vel/rl/layer/nature_cnn.py @@ -5,7 +5,6 @@ Under MIT license. """ import numpy as np -import typing import torch.nn as nn import torch.nn.init as init @@ -13,8 +12,7 @@ import vel.util.network as net_util -from vel.api import ModelFactory, SizeHint, SizeHints - +from vel.api import SizeHint, SizeHints from vel.net.modular import Layer, LayerFactory diff --git a/vel/rl/layer/premade/purgatory/nature_cnn_small.py b/vel/rl/layer/nature_cnn_small.py similarity index 63% rename from vel/rl/layer/premade/purgatory/nature_cnn_small.py rename to vel/rl/layer/nature_cnn_small.py index 29359262..c9ac77bb 100644 --- a/vel/rl/layer/premade/purgatory/nature_cnn_small.py +++ b/vel/rl/layer/nature_cnn_small.py @@ -12,18 +12,19 @@ import vel.util.network as net_util -from vel.api import LinearBackboneModel, ModelFactory +from vel.api import SizeHint, SizeHints +from vel.net.modular import Layer, LayerFactory -class NatureCnnSmall(LinearBackboneModel): +class NatureCnnSmall(Layer): """ Neural network as defined in the paper 'Human-level control through deep reinforcement learning' Smaller version. """ - def __init__(self, input_width, input_height, input_channels, output_dim=128): - super().__init__() + def __init__(self, name: str, input_width, input_height, input_channels, output_dim=128): + super().__init__(name) - self._output_dim = output_dim + self.output_dim = output_dim self.conv1 = nn.Conv2d( in_channels=input_channels, @@ -54,11 +55,6 @@ def __init__(self, input_width, input_height, input_channels, output_dim=128): self.output_dim ) - @property - def output_dim(self) -> int: - """ Final dimension of model output """ - return self._output_dim - def reset_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): @@ -70,7 +66,10 @@ def reset_weights(self): init.orthogonal_(m.weight, gain=np.sqrt(2)) init.constant_(m.bias, 0.0) - def forward(self, image): + def size_hints(self) -> SizeHints: + return SizeHints(SizeHint(None, self.output_dim)) + + def forward(self, image, state: dict = None, context: dict = None): result = image result = F.relu(self.conv1(result)) result = F.relu(self.conv2(result)) @@ -78,12 +77,29 @@ def forward(self, image): return F.relu(self.linear_layer(flattened)) -def create(input_width, input_height, input_channels=1): - """ Vel factory function """ - def instantiate(**_): - return NatureCnnSmall(input_width=input_width, input_height=input_height, input_channels=input_channels) +class NatureCnnSmallFactory(LayerFactory): + """ Nature Cnn Network Factory """ - return ModelFactory.generic(instantiate) + def __init__(self, output_dim: int = 128): + self.output_dim = output_dim + @property + def name_base(self) -> str: + """ Base of layer name """ + return "nature_cnn_small" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + (b, c, w, h) = direct_input.assert_single(4) + + return NatureCnnSmall( + name=name, + input_width=w, + input_height=h, + input_channels=c, + output_dim=self.output_dim + ) -NatureCnnSmallFactory = create + +def create(output_dim: int = 128): + """ Vel factory function """ + return NatureCnnSmallFactory(output_dim=output_dim) diff --git a/vel/rl/policy/trpo.py b/vel/rl/policy/trpo.py index 586d33c2..4d614cd8 100644 --- a/vel/rl/policy/trpo.py +++ b/vel/rl/policy/trpo.py @@ -7,8 +7,8 @@ import torch.nn.functional as F import torch.nn.utils -from vel.api import BatchInfo, VelOptimizer, BackboneModel, LinearBackboneModel, OptimizerFactory, ModelFactory -from vel.calc.function import explained_variance +from vel.api import BatchInfo, VelOptimizer, OptimizerFactory, ModelFactory, BackboneNetwork +from vel.util.stats import explained_variance from vel.metric.base import AveragingNamedMetric from vel.rl.api import Rollout, Trajectories, RlPolicy @@ -58,22 +58,12 @@ class TRPO(RlPolicy): """ Trust Region Policy Optimization - https://arxiv.org/abs/1502.05477 """ def __init__(self, - policy_backbone: LinearBackboneModel, value_backbone: LinearBackboneModel, + policy_net: BackboneNetwork, value_net: BackboneNetwork, action_space: gym.Space, max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, discount_factor, gae_lambda, improvement_acceptance_ratio): super().__init__(discount_factor) - self.policy_backbone = policy_backbone - self.value_backbone = value_backbone - - self.action_head = StochasticActionHead( - action_space=action_space, - input_dim=self.policy_backbone.output_dim - ) - - self.value_head = ValueHead(input_dim=self.value_backbone.output_dim) - self.mak_kl = max_kl self.cg_iters = cg_iters self.line_search_iters = line_search_iters @@ -83,18 +73,30 @@ def __init__(self, self.gae_lambda = gae_lambda self.improvement_acceptance_ratio = improvement_acceptance_ratio + self.policy_net = policy_net + self.value_net = value_net + + self.action_head = StochasticActionHead( + action_space=action_space, + input_dim=self.policy_net.size_hints().assert_single(2).last() + ) + + self.value_head = ValueHead( + input_dim=self.value_net.size_hints().assert_single(2).last() + ) + def reset_weights(self): """ Initialize properly model weights """ - self.policy_backbone.reset_weights() - self.value_backbone.reset_weights() + self.policy_net.reset_weights() + self.value_net.reset_weights() self.action_head.reset_weights() self.value_head.reset_weights() def forward(self, observations): """ Calculate model outputs """ - policy_base_output = self.policy_backbone(observations) - value_base_output = self.value_backbone(observations) + policy_base_output = self.policy_net(observations) + value_base_output = self.value_net(observations) action_output = self.action_head(policy_base_output) value_output = self.value_head(value_base_output) @@ -103,13 +105,13 @@ def forward(self, observations): def value(self, observations, state=None): """ Calculate only value head for given state """ - base_output = self.value_backbone(observations) + base_output = self.value_net(observations) value_output = self.value_head(base_output) return value_output def policy(self, observations): """ Calculate only action head for given state """ - policy_base_output = self.policy_backbone(observations) + policy_base_output = self.policy_net(observations) policy_params = self.action_head(policy_base_output) return policy_params @@ -135,14 +137,14 @@ def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: def policy_parameters(self): """ Parameters of policy """ return it.chain( - self.policy_backbone.parameters(), + self.policy_net.parameters(), self.action_head.parameters() ) def value_parameters(self): """ Parameters of value function """ return it.chain( - self.value_backbone.parameters(), + self.value_net.parameters(), self.value_head.parameters() ) @@ -333,11 +335,11 @@ def metrics(self) -> list: class TRPOFactory(ModelFactory): """ Factory class for policy gradient models """ - def __init__(self, policy_backbone: ModelFactory, value_backbone: ModelFactory, + def __init__(self, policy_net: ModelFactory, value_net: ModelFactory, max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, discount_factor, gae_lambda, improvement_acceptance_ratio): - self.policy_backbone = policy_backbone - self.value_backbone = value_backbone + self.policy_net = policy_net + self.value_net = value_net self.entropy_coefficient = entropy_coefficient self.mak_kl = max_kl @@ -353,12 +355,12 @@ def instantiate(self, **extra_args): """ Instantiate the model """ action_space = extra_args.pop('action_space') - policy_backbone = self.policy_backbone.instantiate(**extra_args) - value_backbone = self.value_backbone.instantiate(**extra_args) + policy_net = self.policy_net.instantiate(**extra_args) + value_net = self.value_net.instantiate(**extra_args) return TRPO( - policy_backbone=policy_backbone, - value_backbone=value_backbone, + policy_net=policy_net, + value_net=value_net, action_space=action_space, max_kl=self.mak_kl, cg_iters=self.cg_iters, @@ -372,14 +374,14 @@ def instantiate(self, **extra_args): ) -def create(policy_backbone: ModelFactory, value_backbone: ModelFactory, +def create(policy_net: ModelFactory, value_net: ModelFactory, max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, discount_factor, gae_lambda, improvement_acceptance_ratio): """ Vel factory function """ return TRPOFactory( - policy_backbone=policy_backbone, - value_backbone=value_backbone, + policy_net=policy_net, + value_net=value_net, max_kl=max_kl, cg_iters=cg_iters, line_search_iters=line_search_iters, From 8a17a973635bdacfe829d9c26d86f1d09129ea04 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Wed, 2 Oct 2019 10:38:42 -0700 Subject: [PATCH 103/162] Restored `atari_a2c_tf_rmsprop` example. --- .../{purgatory => }/atari_a2c_tf_rmsprop.yaml | 40 +++++++++---------- vel/optimizer/rmsprop_tf.py | 3 ++ 2 files changed, 22 insertions(+), 21 deletions(-) rename examples-configs/rl/atari/{purgatory => }/atari_a2c_tf_rmsprop.yaml (50%) diff --git a/examples-configs/rl/atari/purgatory/atari_a2c_tf_rmsprop.yaml b/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml similarity index 50% rename from examples-configs/rl/atari/purgatory/atari_a2c_tf_rmsprop.yaml rename to examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml index 2786a6a4..13de0fef 100644 --- a/examples-configs/rl/atari/purgatory/atari_a2c_tf_rmsprop.yaml +++ b/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml @@ -12,37 +12,35 @@ vec_env: model: - name: vel.rl.models.stochastic_policy_model + name: vel.rl.policy.a2c - input_block: - name: vel.modules.input.image_to_tensor + entropy_coefficient: 0.01 + value_coefficient: 0.5 + discount_factor: 0.99 - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history + net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.image_to_tensor + size: [84, 84, 4] # Number of channels is frame history + - name: vel.rl.layer.nature_cnn + - name: vel.net.layer.util.repeat_tensor + times: 2 # Need to repeat output twice, for action and value heads reinforcer: - name: vel.rl.reinforcers.on_policy_iteration_reinforcer - - algo: - name: vel.rl.algo.policy_gradient.a2c - entropy_coefficient: 0.01 - value_coefficient: 0.5 - max_grad_norm: 0.5 - discount_factor: 0.99 + name: vel.rl.reinforcer.on_policy_iteration_reinforcer env_roller: name: vel.rl.env_roller.step_env_roller number_of_steps: 5 # How many environment steps go into a single batch parallel_envs: 16 # How many environments to run in parallel + batch_size: 256 # How many samples can go into the model once optimizer: - name: vel.optimizers.rmsprop_tf + name: vel.optimizer.rmsprop_tf lr: 7.0e-4 alpha: 0.99 epsilon: 1.0e-6 @@ -50,20 +48,20 @@ optimizer: commands: train: - name: vel.rl.commands.rl_train_command + name: vel.rl.command.rl_train_command total_frames: 1.1e7 batches_per_epoch: 100 record: - name: vel.rl.commands.record_movie_command + name: vel.rl.command.record_movie_command takes: 10 videoname: 'atari_vid_{:04}.avi' evaluate: - name: vel.rl.commands.evaluate_env_command + name: vel.rl.command.evaluate_env_command parallel_envs: 16 # How many environments to run in parallel takes: 20 visdom: - name: vel.commands.vis_store_command + name: vel.command.vis_store_command diff --git a/vel/optimizer/rmsprop_tf.py b/vel/optimizer/rmsprop_tf.py index 4e80a401..934d5090 100644 --- a/vel/optimizer/rmsprop_tf.py +++ b/vel/optimizer/rmsprop_tf.py @@ -1,3 +1,6 @@ +import torch +import typing + from torch.optim.optimizer import Optimizer import vel.util.module_util as mu From 1ccc0a4ef4890b6960f5eac7d5e3b511ca385191 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Wed, 2 Oct 2019 13:24:35 -0700 Subject: [PATCH 104/162] ACER works again. --- .../rl/atari/{purgatory => }/atari_acer.yaml | 51 ++--- vel/rl/api/policy.py | 2 +- vel/rl/env_roller/step_env_roller.py | 14 +- .../trajectory_replay_env_roller.py | 11 +- .../semipurgatory => module/head}/__init__.py | 0 .../{ => head}/deterministic_action_head.py | 0 .../{ => head}/deterministic_critic_head.py | 0 .../{ => head}/q_distributional_head.py | 0 .../q_distributional_noisy_dueling_head.py | 0 vel/rl/module/{ => head}/q_dueling_head.py | 0 vel/rl/module/{ => head}/q_head.py | 0 vel/rl/module/{ => head}/q_noisy_head.py | 0 .../{ => head}/stochastic_action_head.py | 56 ++---- vel/rl/module/{ => head}/value_head.py | 0 vel/rl/module/stochastic_policy.py | 42 +++++ vel/rl/policy/a2c.py | 34 +--- .../{xpolicy/semipurgatory => policy}/acer.py | 176 ++++++++++++++---- vel/rl/policy/ppo.py | 37 +--- vel/rl/policy/purgatory/__init__.py | 0 vel/rl/policy/{ => purgatory}/a2c_rnn.py | 0 .../purgatory}/ddpg.py | 0 .../purgatory}/distributional_dqn.py | 0 vel/rl/policy/{ => purgatory}/dqn.py | 0 vel/rl/policy/{ => purgatory}/ppo_rnn.py | 0 ...fered_mixed_policy_iteration_reinforcer.py | 57 +++--- .../on_policy_iteration_reinforcer.py | 10 +- vel/rl/xpolicy/semipurgatory/a2c_rnn.py | 92 --------- vel/rl/xpolicy/semipurgatory/ppo_rnn.py | 133 ------------- vel/rl/xpolicy/stochastic_policy.py | 70 ------- 29 files changed, 283 insertions(+), 502 deletions(-) rename examples-configs/rl/atari/{purgatory => }/atari_acer.yaml (61%) rename vel/rl/{xpolicy/semipurgatory => module/head}/__init__.py (100%) rename vel/rl/module/{ => head}/deterministic_action_head.py (100%) rename vel/rl/module/{ => head}/deterministic_critic_head.py (100%) rename vel/rl/module/{ => head}/q_distributional_head.py (100%) rename vel/rl/module/{ => head}/q_distributional_noisy_dueling_head.py (100%) rename vel/rl/module/{ => head}/q_dueling_head.py (100%) rename vel/rl/module/{ => head}/q_head.py (100%) rename vel/rl/module/{ => head}/q_noisy_head.py (100%) rename vel/rl/module/{ => head}/stochastic_action_head.py (71%) rename vel/rl/module/{ => head}/value_head.py (100%) create mode 100644 vel/rl/module/stochastic_policy.py rename vel/rl/{xpolicy/semipurgatory => policy}/acer.py (58%) create mode 100644 vel/rl/policy/purgatory/__init__.py rename vel/rl/policy/{ => purgatory}/a2c_rnn.py (100%) rename vel/rl/{xpolicy/semipurgatory => policy/purgatory}/ddpg.py (100%) rename vel/rl/{xpolicy/semipurgatory => policy/purgatory}/distributional_dqn.py (100%) rename vel/rl/policy/{ => purgatory}/dqn.py (100%) rename vel/rl/policy/{ => purgatory}/ppo_rnn.py (100%) delete mode 100644 vel/rl/xpolicy/semipurgatory/a2c_rnn.py delete mode 100644 vel/rl/xpolicy/semipurgatory/ppo_rnn.py delete mode 100644 vel/rl/xpolicy/stochastic_policy.py diff --git a/examples-configs/rl/atari/purgatory/atari_acer.yaml b/examples-configs/rl/atari/atari_acer.yaml similarity index 61% rename from examples-configs/rl/atari/purgatory/atari_acer.yaml rename to examples-configs/rl/atari/atari_acer.yaml index 52e47b84..256b09bc 100644 --- a/examples-configs/rl/atari/purgatory/atari_acer.yaml +++ b/examples-configs/rl/atari/atari_acer.yaml @@ -10,28 +10,39 @@ vec_env: name: vel.rl.vecenv.shared_mem frame_history: 4 # How many stacked frames go into a single observation + algo: + name: vel.rl.algo.policy_gradient.acer model: - name: vel.rl.models.q_stochastic_policy_model + name: vel.rl.policy.acer + + entropy_coefficient: 0.01 + q_coefficient: 0.5 + rho_cap: 10.0 + retrace_rho_cap: 1.0 + + discount_factor: 0.99 - input_block: - name: vel.modules.input.image_to_tensor + trust_region: true - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history + net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.image_to_tensor + size: [84, 84, 4] # Number of channels is frame history + - name: vel.rl.layer.nature_cnn + - name: vel.net.layer.util.repeat_tensor + times: 2 # Need to repeat output twice, for action and value heads reinforcer: - name: vel.rl.reinforcers.buffered_mixed_policy_iteration_reinforcer + name: vel.rl.reinforcer.buffered_mixed_policy_iteration_reinforcer env_roller: name: vel.rl.env_roller.trajectory_replay_env_roller replay_buffer: - name: vel.rl.buffers.circular_replay_buffer + name: vel.rl.buffer.circular_replay_buffer buffer_initial_size: 1_000 # How many samples we need in the buffer before we start using replay buffer buffer_capacity: 50_000 @@ -40,17 +51,6 @@ reinforcer: frame_stack_compensation: true frame_history: 4 # How many stacked frames go into a single observation - algo: - name: vel.rl.algo.policy_gradient.acer - entropy_coefficient: 0.01 - q_coefficient: 0.5 - rho_cap: 10.0 - retrace_rho_cap: 1.0 - - max_grad_norm: 10.0 - discount_factor: 0.99 - - trust_region: false parallel_envs: 12 # How many environments to run in parallel number_of_steps: 20 # How many environment steps go into a single batch @@ -58,25 +58,26 @@ reinforcer: optimizer: - name: vel.optimizers.rmsprop + name: vel.optimizer.rmsprop lr: 7.0e-4 alpha: 0.99 # epsilon: 1.0e-5 epsilon: 1.0e-3 + max_grad_norm: 10.0 commands: train: - name: vel.rl.commands.rl_train_command + name: vel.rl.command.rl_train_command total_frames: 1.1e7 batches_per_epoch: 30 record: - name: vel.rl.commands.record_movie_command + name: vel.rl.command.record_movie_command takes: 10 videoname: 'atari_vid_{:04}.avi' evaluate: - name: vel.rl.commands.evaluate_env_command + name: vel.rl.command.evaluate_env_command takes: 100 parallel_envs: 12 # How many environments to run in parallel diff --git a/vel/rl/api/policy.py b/vel/rl/api/policy.py index e5701d4d..3a73003e 100644 --- a/vel/rl/api/policy.py +++ b/vel/rl/api/policy.py @@ -12,7 +12,7 @@ def __init__(self, discount_factor: float): def process_rollout(self, rollout: Rollout) -> Rollout: """ Process rollout for optimization before any chunking/shuffling """ - raise NotImplementedError + return rollout def act(self, observation, state=None, deterministic=False) -> dict: """ diff --git a/vel/rl/env_roller/step_env_roller.py b/vel/rl/env_roller/step_env_roller.py index f749a895..2b959f6e 100644 --- a/vel/rl/env_roller/step_env_roller.py +++ b/vel/rl/env_roller/step_env_roller.py @@ -58,18 +58,22 @@ def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: episode_information.append(new_infos) - final_values = self.actor.value(self.last_observation.to(self.device)).cpu() - accumulated_tensors = accumulator.result() + # Perform last agent step, without advancing the state + final_obs = self.actor.act(self.last_observation.to(self.device), advance_state=False) + + rollout_tensors = {} + + for key, value in final_obs.items(): + rollout_tensors[f"final_{key}"] = value.cpu() + return Trajectories( num_steps=accumulated_tensors['observations'].size(0), num_envs=accumulated_tensors['observations'].size(1), environment_information=episode_information, transition_tensors=accumulated_tensors, - rollout_tensors={ - 'final_values': final_values - } + rollout_tensors=rollout_tensors ) diff --git a/vel/rl/env_roller/trajectory_replay_env_roller.py b/vel/rl/env_roller/trajectory_replay_env_roller.py index 1f413f4f..7e347edb 100644 --- a/vel/rl/env_roller/trajectory_replay_env_roller.py +++ b/vel/rl/env_roller/trajectory_replay_env_roller.py @@ -81,14 +81,19 @@ def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: accumulated_tensors = accumulator.result() + final_obs = self.actor.act(self.last_observation.to(self.device), advance_state=False) + + rollout_tensors = {} + + for key, value in final_obs.items(): + rollout_tensors[f"final_{key}"] = value.cpu() + return Trajectories( num_steps=accumulated_tensors['observations'].size(0), num_envs=accumulated_tensors['observations'].size(1), environment_information=episode_information, transition_tensors=accumulated_tensors, - rollout_tensors={ - 'final_values': self.actor.value(self.last_observation).cpu() - } + rollout_tensors=rollout_tensors ) def sample(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: diff --git a/vel/rl/xpolicy/semipurgatory/__init__.py b/vel/rl/module/head/__init__.py similarity index 100% rename from vel/rl/xpolicy/semipurgatory/__init__.py rename to vel/rl/module/head/__init__.py diff --git a/vel/rl/module/deterministic_action_head.py b/vel/rl/module/head/deterministic_action_head.py similarity index 100% rename from vel/rl/module/deterministic_action_head.py rename to vel/rl/module/head/deterministic_action_head.py diff --git a/vel/rl/module/deterministic_critic_head.py b/vel/rl/module/head/deterministic_critic_head.py similarity index 100% rename from vel/rl/module/deterministic_critic_head.py rename to vel/rl/module/head/deterministic_critic_head.py diff --git a/vel/rl/module/q_distributional_head.py b/vel/rl/module/head/q_distributional_head.py similarity index 100% rename from vel/rl/module/q_distributional_head.py rename to vel/rl/module/head/q_distributional_head.py diff --git a/vel/rl/module/q_distributional_noisy_dueling_head.py b/vel/rl/module/head/q_distributional_noisy_dueling_head.py similarity index 100% rename from vel/rl/module/q_distributional_noisy_dueling_head.py rename to vel/rl/module/head/q_distributional_noisy_dueling_head.py diff --git a/vel/rl/module/q_dueling_head.py b/vel/rl/module/head/q_dueling_head.py similarity index 100% rename from vel/rl/module/q_dueling_head.py rename to vel/rl/module/head/q_dueling_head.py diff --git a/vel/rl/module/q_head.py b/vel/rl/module/head/q_head.py similarity index 100% rename from vel/rl/module/q_head.py rename to vel/rl/module/head/q_head.py diff --git a/vel/rl/module/q_noisy_head.py b/vel/rl/module/head/q_noisy_head.py similarity index 100% rename from vel/rl/module/q_noisy_head.py rename to vel/rl/module/head/q_noisy_head.py diff --git a/vel/rl/module/stochastic_action_head.py b/vel/rl/module/head/stochastic_action_head.py similarity index 71% rename from vel/rl/module/stochastic_action_head.py rename to vel/rl/module/head/stochastic_action_head.py index 2d54cbab..d0ce774f 100644 --- a/vel/rl/module/stochastic_action_head.py +++ b/vel/rl/module/head/stochastic_action_head.py @@ -1,3 +1,4 @@ +import gym import numpy as np import torch @@ -110,7 +111,7 @@ def sample(self, logits, deterministic=False): if deterministic: return torch.argmax(logits, dim=-1) else: - # Gumbel-softmax trick + # Gumbel-Softmax trick u = torch.rand_like(logits) return torch.argmax(logits - torch.log(-torch.log(u)), dim=-1) @@ -134,47 +135,12 @@ def kl_divergence(self, logits_q, logits_p): return (torch.exp(logits_q) * (logits_q - logits_p)).sum(1, keepdim=True) -class StochasticActionHead(nn.Module): - """ - Network head for action determination. Returns probability distribution parametrization - """ - - def __init__(self, input_dim, action_space): - super().__init__() - - self.action_space = action_space - - if isinstance(action_space, spaces.Box): - assert len(action_space.shape) == 1 - self.head = DiagGaussianActionHead(input_dim, action_space.shape[0]) - elif isinstance(action_space, spaces.Discrete): - self.head = CategoricalActionHead(input_dim, action_space.n) - # elif isinstance(action_space, spaces.MultiDiscrete): - # return MultiCategoricalPdType(action_space.nvec) - # elif isinstance(action_space, spaces.MultiBinary): - # return BernoulliPdType(action_space.n) - else: - raise NotImplementedError - - def forward(self, input_data): - return self.head(input_data) - - def sample(self, policy_params, **kwargs): - """ Sample from a probability space of all actions """ - return self.head.sample(policy_params, **kwargs) - - def reset_weights(self): - """ Initialize weights to sane defaults """ - self.head.reset_weights() - - def entropy(self, policy_params): - """ Entropy calculation - sum probs * log(probs) """ - return self.head.entropy(policy_params) - - def kl_divergence(self, params_q, params_p): - """ Kullback–Leibler divergence between two sets of parameters """ - return self.head.kl_divergence(params_q, params_p) - - def logprob(self, action_sample, policy_params): - """ - log probabilty of selected actions """ - return self.head.logprob(action_sample, policy_params) +def make_stockastic_action_head(input_dim: int, action_space: gym.Space): + """ Instantiate stochastic action space relevant for the task """ + if isinstance(action_space, spaces.Box): + assert len(action_space.shape) == 1 + return DiagGaussianActionHead(input_dim, action_space.shape[0]) + elif isinstance(action_space, spaces.Discrete): + return CategoricalActionHead(input_dim, action_space.n) + else: + raise NotImplementedError diff --git a/vel/rl/module/value_head.py b/vel/rl/module/head/value_head.py similarity index 100% rename from vel/rl/module/value_head.py rename to vel/rl/module/head/value_head.py diff --git a/vel/rl/module/stochastic_policy.py b/vel/rl/module/stochastic_policy.py new file mode 100644 index 00000000..47a22e3f --- /dev/null +++ b/vel/rl/module/stochastic_policy.py @@ -0,0 +1,42 @@ +import gym + +from vel.api import Network, BackboneNetwork + +from vel.rl.module.head.stochastic_action_head import make_stockastic_action_head +from vel.rl.module.head.value_head import ValueHead + + +class StochasticPolicy(Network): + """ + Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone + """ + + def __init__(self, net: BackboneNetwork, action_space: gym.Space): + super().__init__() + + self.net = net + + assert not self.net.is_stateful, "Backbone shouldn't have state" + + (action_size, value_size) = self.net.size_hints().assert_tuple(2) + + self.action_head = make_stockastic_action_head( + action_space=action_space, + input_dim=action_size.last(), + ) + + self.value_head = ValueHead( + input_dim=value_size.last() + ) + + def reset_weights(self): + """ Initialize properly model weights """ + self.net.reset_weights() + self.action_head.reset_weights() + self.value_head.reset_weights() + + def forward(self, observation): + """ Calculate model outputs """ + action_hidden, value_hidden = self.net(observation) + return self.action_head(action_hidden), self.value_head(value_hidden) + diff --git a/vel/rl/policy/a2c.py b/vel/rl/policy/a2c.py index 858cf5e4..cd03eead 100644 --- a/vel/rl/policy/a2c.py +++ b/vel/rl/policy/a2c.py @@ -8,8 +8,7 @@ from vel.rl.api import RlPolicy, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae -from vel.rl.module.stochastic_action_head import StochasticActionHead -from vel.rl.module.value_head import ValueHead +from vel.rl.module.stochastic_policy import StochasticPolicy class A2C(RlPolicy): @@ -23,40 +22,23 @@ def __init__(self, net: BackboneNetwork, action_space: gym.Space, self.value_coefficient = value_coefficient self.gae_lambda = gae_lambda - self.net = net - - assert not self.net.is_stateful, "For stateful policies, use A2CRnn" - - # Make sure network returns two results - (action_size, value_size) = self.net.size_hints().assert_tuple(2) - - self.action_head = StochasticActionHead( - action_space=action_space, - input_dim=action_size.last(), - ) - - self.value_head = ValueHead( - input_dim=value_size.last() - ) + self.policy = StochasticPolicy(net, action_space) def reset_weights(self): """ Initialize properly model weights """ - self.net.reset_weights() - self.action_head.reset_weights() - self.value_head.reset_weights() + self.policy.reset_weights() def forward(self, observation, state=None): """ Calculate model outputs """ - action_hidden, value_hidden = self.net(observation, state=state) - return self.action_head(action_hidden), self.value_head(value_hidden) + return self.policy(observation) def act(self, observation, state=None, deterministic=False): """ Select actions based on model's output """ action_pd_params, value_output = self(observation) - actions = self.action_head.sample(action_pd_params, deterministic=deterministic) + actions = self.policy.action_head.sample(action_pd_params, deterministic=deterministic) # log likelihood of selected action - logprobs = self.action_head.logprob(actions, action_pd_params) + logprobs = self.policy.action_head.logprob(actions, action_pd_params) return { 'actions': actions, @@ -96,8 +78,8 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: pd_params, model_values = self(observations) - log_probs = self.action_head.logprob(actions, pd_params) - entropy = self.action_head.entropy(pd_params) + log_probs = self.policy.action_head.logprob(actions, pd_params) + entropy = self.policy.action_head.entropy(pd_params) # Actual calculations. Pretty trivial policy_loss = -torch.mean(advantages * log_probs) diff --git a/vel/rl/xpolicy/semipurgatory/acer.py b/vel/rl/policy/acer.py similarity index 58% rename from vel/rl/xpolicy/semipurgatory/acer.py rename to vel/rl/policy/acer.py index 6d78a603..f4d62580 100644 --- a/vel/rl/xpolicy/semipurgatory/acer.py +++ b/vel/rl/policy/acer.py @@ -1,8 +1,12 @@ +import gym import torch import torch.nn.functional as F +from vel.api import BackboneNetwork, ModelFactory, BatchInfo, Network from vel.metric.base import AveragingNamedMetric -from vel.rl.api import Trajectories, OptimizerAlgoBase +from vel.rl.api import Trajectories, RlPolicy, Rollout +from vel.rl.module.head.stochastic_action_head import make_stockastic_action_head +from vel.rl.module.head.q_head import QHead def select_indices(tensor, indices): @@ -10,18 +14,57 @@ def select_indices(tensor, indices): return tensor.gather(1, indices.unsqueeze(1)).squeeze() -class AcerPolicyGradient(OptimizerAlgoBase): +class QStochasticPolicy(Network): + """ + A policy model with an action-value critic head (instead of more common state-value critic head). + Supports only discrete action spaces (ones that can be enumerated) + """ + + def __init__(self, net: BackboneNetwork, action_space: gym.Space): + super().__init__() + + assert isinstance(action_space, gym.spaces.Discrete) + + self.net = net + + (action_size, value_size) = self.net.size_hints().assert_tuple(2) + + self.action_head = make_stockastic_action_head( + input_dim=action_size.last(), + action_space=action_space + ) + + self.q_head = QHead( + input_dim=value_size.last(), + action_space=action_space + ) + + def reset_weights(self): + """ Initialize properly model weights """ + self.net.reset_weights() + self.action_head.reset_weights() + self.q_head.reset_weights() + + def forward(self, observations): + """ Calculate model outputs """ + action_hidden, q_hidden = self.net(observations) + policy_params = self.action_head(action_hidden) + + q = self.q_head(q_hidden) + + return policy_params, q + + +class ACER(RlPolicy): """ Actor-Critic with Experience Replay - policy gradient calculations """ - def __init__(self, model_factory, discount_factor, trust_region: bool = True, entropy_coefficient: float = 0.01, + def __init__(self, net: BackboneNetwork, net_factory: ModelFactory, action_space: gym.Space, + discount_factor: float, trust_region: bool = True, entropy_coefficient: float = 0.01, q_coefficient: float = 0.5, rho_cap: float = 10.0, retrace_rho_cap: float = 1.0, - max_grad_norm: float = None, average_model_alpha: float = 0.99, trust_region_delta: float = 1.0): - super().__init__(max_grad_norm) - - self.discount_factor = discount_factor + average_model_alpha: float = 0.99, trust_region_delta: float = 1.0): + super().__init__(discount_factor) self.trust_region = trust_region - self.model_factory = model_factory self.entropy_coefficient = entropy_coefficient self.q_coefficient = q_coefficient @@ -30,39 +73,67 @@ def __init__(self, model_factory, discount_factor, trust_region: bool = True, en self.retrace_rho_cap = retrace_rho_cap # Trust region settings - self.average_model = None self.average_model_alpha = average_model_alpha self.trust_region_delta = trust_region_delta - def initialize(self, training_info, model, environment, device): - """ Initialize policy gradient from reinforcer settings """ + self.policy = QStochasticPolicy(net, action_space) + if self.trust_region: - self.average_model = self.model_factory.instantiate(action_space=environment.action_space).to(device) - self.average_model.load_state_dict(model.state_dict()) + self.target_policy = QStochasticPolicy(net_factory.instantiate(), action_space) + else: + self.target_policy = None + + def reset_weights(self): + """ Initialize properly model weights """ + self.policy.reset_weights() + + if self.trust_region: + self.target_policy.load_state_dict(self.policy.state_dict()) + + def forward(self, observation, state=None): + """ Calculate model outputs """ + return self.policy(observation) + + def act(self, observation, state=None, deterministic=False): + """ Select actions based on model's output """ + logprobs, q = self(observation) + actions = self.policy.action_head.sample(logprobs, deterministic=deterministic) - def update_average_model(self, model): + # log likelihood of selected action + action_logprobs = self.policy.action_head.logprob(actions, logprobs) + values = (torch.exp(logprobs) * q).sum(dim=1) + + return { + 'actions': actions, + 'q': q, + 'values': values, + 'action:logprobs': action_logprobs, + 'logprobs': logprobs + } + + def update_target_policy(self): """ Update weights of the average model with new model observation """ - for model_param, average_param in zip(model.parameters(), self.average_model.parameters()): + for model_param, average_param in zip(self.policy.parameters(), self.target_policy.parameters()): # EWMA average model update average_param.data.mul_(self.average_model_alpha).add_(model_param.data * (1 - self.average_model_alpha)) - def calculate_gradient(self, batch_info, device, model, rollout): + def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: """ Calculate loss of the supplied rollout """ assert isinstance(rollout, Trajectories), "ACER algorithm requires trajectory input" - local_epsilon = 1e-6 - - evaluator = model.evaluate(rollout) - - actions = evaluator.get('rollout:actions') - rollout_probabilities = torch.exp(evaluator.get('rollout:logprobs')) - # We calculate the trust-region update with respect to the average model if self.trust_region: - self.update_average_model(model) + self.update_target_policy() + + local_epsilon = 1e-6 + + # Part 0.0 - Rollout values + actions = rollout.batch_tensor('actions') + rollout_probabilities = torch.exp(rollout.batch_tensor('logprobs')) + observations = rollout.batch_tensor('observations') - logprobs = evaluator.get('model:logprobs') - q = evaluator.get('model:q') + # PART 0.1 - Model evaluation + logprobs, q = self(observations) # Selected action values action_logprobs = select_indices(logprobs, actions) @@ -99,7 +170,7 @@ def calculate_gradient(self, batch_info, device, model, rollout): explained_variance = 1 - torch.var(q_retraced - action_q) / torch.var(q_retraced) # Entropy of the policy distribution - policy_entropy = torch.mean(model.entropy(logprobs)) + policy_entropy = torch.mean(self.policy.action_head.entropy(logprobs)) policy_gradient_loss = -torch.mean(advantages * importance_sampling_coefficient * action_logprobs) # Policy gradient bias correction @@ -121,8 +192,7 @@ def calculate_gradient(self, batch_info, device, model, rollout): if self.trust_region: with torch.no_grad(): - average_evaluator = self.average_model.evaluate(rollout) - average_action_logits = average_evaluator.get('model:logprobs') + target_logprobs = self.target_policy(observations)[0] actor_loss = policy_loss - self.entropy_coefficient * policy_entropy q_loss = self.q_coefficient * q_function_loss @@ -134,7 +204,7 @@ def calculate_gradient(self, batch_info, device, model, rollout): # Analytically calculated derivative of KL divergence on logits # That makes it hardcoded for discrete action spaces - kl_divergence_grad_symbolic = - torch.exp(average_action_logits) / logprobs.size(0) + kl_divergence_grad_symbolic = - torch.exp(target_logprobs) / logprobs.size(0) k_dot_g = (actor_gradient * kl_divergence_grad_symbolic).sum(dim=-1) k_dot_k = (kl_divergence_grad_symbolic ** 2).sum(dim=-1) @@ -195,7 +265,6 @@ def metrics(self) -> list: AveragingNamedMetric("policy_gradient_bias_correction"), AveragingNamedMetric("explained_variance"), AveragingNamedMetric("advantage_norm"), - AveragingNamedMetric("grad_norm"), AveragingNamedMetric("model_prob_std"), AveragingNamedMetric("rollout_prob_std"), AveragingNamedMetric("avg_q_selected"), @@ -203,17 +272,52 @@ def metrics(self) -> list: ] -def create(model, trust_region, entropy_coefficient, q_coefficient, max_grad_norm, discount_factor, - rho_cap=10.0, retrace_rho_cap=1.0, average_model_alpha=0.99, trust_region_delta=1.0): +class ACERFactory(ModelFactory): + """ Factory class for ACER policies """ + def __init__(self, net_factory, trust_region: bool, entropy_coefficient: float, q_coefficient: float, + discount_factor: float, rho_cap: float = 10.0, retrace_rho_cap: float = 1.0, + average_model_alpha: float = 0.99, trust_region_delta: float = 1.0): + self.net_factory = net_factory + self.trust_region = trust_region + self.entropy_coefficient = entropy_coefficient + self.q_coefficient = q_coefficient + self.discount_factor = discount_factor + self.rho_cap = rho_cap + self.retrace_rho_cap = retrace_rho_cap + self.average_model_alpha = average_model_alpha + self.trust_region_delta = trust_region_delta + + def instantiate(self, **extra_args): + """ Instantiate the model """ + action_space = extra_args.pop('action_space') + net = self.net_factory.instantiate(**extra_args) + + return ACER( + net=net, + net_factory=self.net_factory, + action_space=action_space, + trust_region=self.trust_region, + entropy_coefficient=self.entropy_coefficient, + q_coefficient=self.q_coefficient, + discount_factor=self.discount_factor, + rho_cap=self.rho_cap, + retrace_rho_cap=self.retrace_rho_cap, + average_model_alpha=self.average_model_alpha, + trust_region_delta=self.trust_region_delta, + ) + + +def create(net, trust_region: bool , entropy_coefficient: float, q_coefficient: float, discount_factor: float, + rho_cap: float = 10.0, retrace_rho_cap: float = 1.0, average_model_alpha: float = 0.99, + trust_region_delta: float = 1.0): """ Vel factory function """ - return AcerPolicyGradient( + return ACERFactory( + net_factory=net, trust_region=trust_region, - model_factory=model, entropy_coefficient=entropy_coefficient, q_coefficient=q_coefficient, rho_cap=rho_cap, retrace_rho_cap=retrace_rho_cap, - max_grad_norm=max_grad_norm, discount_factor=discount_factor, average_model_alpha=average_model_alpha, trust_region_delta=trust_region_delta diff --git a/vel/rl/policy/ppo.py b/vel/rl/policy/ppo.py index 58be0a22..6230020d 100644 --- a/vel/rl/policy/ppo.py +++ b/vel/rl/policy/ppo.py @@ -10,9 +10,7 @@ from vel.rl.api import RlPolicy, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae - -from vel.rl.module.stochastic_action_head import StochasticActionHead -from vel.rl.module.value_head import ValueHead +from vel.rl.module.stochastic_policy import StochasticPolicy class PPO(RlPolicy): @@ -32,40 +30,23 @@ def __init__(self, net: BackboneNetwork, action_space: gym.Space, else: self.cliprange = cliprange - self.net = net - - assert not self.net.is_stateful, "For stateful policies, use PPORnn" - - # Make sure network returns two results - (action_size, value_size) = self.net.size_hints().assert_tuple(2) - - self.action_head = StochasticActionHead( - action_space=action_space, - input_dim=action_size.last(), - ) - - self.value_head = ValueHead( - input_dim=value_size.last() - ) + self.policy = StochasticPolicy(net, action_space) def reset_weights(self): """ Initialize properly model weights """ - self.net.reset_weights() - self.action_head.reset_weights() - self.value_head.reset_weights() + self.policy.reset_weights() - def forward(self, observation): + def forward(self, observation, state=None): """ Calculate model outputs """ - action_hidden, value_hidden = self.net(observation) - return self.action_head(action_hidden), self.value_head(value_hidden) + return self.policy(observation) def act(self, observation, state=None, deterministic=False): """ Select actions based on model's output """ action_pd_params, value_output = self(observation) - actions = self.action_head.sample(action_pd_params, deterministic=deterministic) + actions = self.policy.action_head.sample(action_pd_params, deterministic=deterministic) # log likelihood of selected action - logprobs = self.action_head.logprob(actions, action_pd_params) + logprobs = self.policy.action_head.logprob(actions, action_pd_params) return { 'actions': actions, @@ -109,8 +90,8 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: # PART 0.1 - Model evaluation pd_params, model_values = self(observations) - model_action_logprobs = self.action_head.logprob(actions, pd_params) - entropy = self.action_head.entropy(pd_params) + model_action_logprobs = self.policy.action_head.logprob(actions, pd_params) + entropy = self.policy.action_head.entropy(pd_params) # Select the cliprange current_cliprange = self.cliprange.value(batch_info['progress']) diff --git a/vel/rl/policy/purgatory/__init__.py b/vel/rl/policy/purgatory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/rl/policy/a2c_rnn.py b/vel/rl/policy/purgatory/a2c_rnn.py similarity index 100% rename from vel/rl/policy/a2c_rnn.py rename to vel/rl/policy/purgatory/a2c_rnn.py diff --git a/vel/rl/xpolicy/semipurgatory/ddpg.py b/vel/rl/policy/purgatory/ddpg.py similarity index 100% rename from vel/rl/xpolicy/semipurgatory/ddpg.py rename to vel/rl/policy/purgatory/ddpg.py diff --git a/vel/rl/xpolicy/semipurgatory/distributional_dqn.py b/vel/rl/policy/purgatory/distributional_dqn.py similarity index 100% rename from vel/rl/xpolicy/semipurgatory/distributional_dqn.py rename to vel/rl/policy/purgatory/distributional_dqn.py diff --git a/vel/rl/policy/dqn.py b/vel/rl/policy/purgatory/dqn.py similarity index 100% rename from vel/rl/policy/dqn.py rename to vel/rl/policy/purgatory/dqn.py diff --git a/vel/rl/policy/ppo_rnn.py b/vel/rl/policy/purgatory/ppo_rnn.py similarity index 100% rename from vel/rl/policy/ppo_rnn.py rename to vel/rl/policy/purgatory/ppo_rnn.py diff --git a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py index f80694dc..11764c84 100644 --- a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py @@ -7,8 +7,8 @@ from vel.api import TrainingInfo, EpochInfo, BatchInfo, Model, ModelFactory from vel.openai.baselines.common.vec_env import VecEnv from vel.rl.api import ( - Reinforcer, ReinforcerFactory, VecEnvFactory, ReplayEnvRollerBase, AlgoBase, ReplayEnvRollerFactoryBase -) + Reinforcer, ReinforcerFactory, VecEnvFactory, ReplayEnvRollerBase, ReplayEnvRollerFactoryBase, + RlPolicy) from vel.rl.metrics import ( FPSMetric, EpisodeLengthMetric, EpisodeRewardMetricQuantile, EpisodeRewardMetric, FramesMetric ) @@ -33,15 +33,19 @@ class BufferedMixedPolicyIterationReinforcer(Reinforcer): """ def __init__(self, device: torch.device, settings: BufferedMixedPolicyIterationReinforcerSettings, env: VecEnv, - model: Model, env_roller: ReplayEnvRollerBase, algo: AlgoBase) -> None: + model: Model, env_roller: ReplayEnvRollerBase) -> None: self.device = device self.settings = settings self.environment = env - self._trained_model = model.to(self.device) + self._model: RlPolicy = model.to(self.device) self.env_roller = env_roller - self.algo = algo + + @property + def policy(self) -> RlPolicy: + """ Model trained by this reinforcer """ + return self._model def metrics(self) -> list: """ List of metrics to track for this learning process """ @@ -54,12 +58,7 @@ def metrics(self) -> list: EpisodeLengthMetric("episode_length") ] - return my_metrics + self.algo.metrics() + self.env_roller.metrics() - - @property - def policy(self) -> Model: - """ Model trained by this reinforcer """ - return self._trained_model + return my_metrics + self.policy.metrics() + self.env_roller.metrics() def initialize_training(self, training_info: TrainingInfo, model_state=None, hidden_state=None): """ Prepare models for training """ @@ -68,11 +67,7 @@ def initialize_training(self, training_info: TrainingInfo, model_state=None, hid else: self.policy.reset_weights() - self.algo.initialize( - training_info=training_info, model=self.policy, environment=self.environment, device=self.device - ) - - def train_epoch(self, epoch_info: EpochInfo, interactive=True): + def train_epoch(self, epoch_info: EpochInfo, interactive=True) -> None: """ Train model on an epoch of a fixed number of batch updates """ epoch_info.on_epoch_begin() @@ -91,7 +86,7 @@ def train_epoch(self, epoch_info: EpochInfo, interactive=True): epoch_info.result_accumulator.freeze_results() epoch_info.on_epoch_end() - def train_batch(self, batch_info: BatchInfo): + def train_batch(self, batch_info: BatchInfo) -> None: """ Single, most atomic 'step' of learning this reinforcer can perform """ batch_info['sub_batch_data'] = [] @@ -113,12 +108,13 @@ def on_policy_train_batch(self, batch_info: BatchInfo): """ Perform an 'on-policy' training step of evaluating an env and a single backpropagation step """ self.policy.train() - rollout = self.env_roller.rollout(batch_info, self.policy, self.settings.number_of_steps).to_device(self.device) + rollout = self.env_roller.rollout(batch_info, self.settings.number_of_steps).to_device(self.device) - batch_result = self.algo.optimize( + # Preprocessing of the rollout for this algorithm + rollout = self.policy.process_rollout(rollout) + + batch_result = self.policy.optimize( batch_info=batch_info, - device=self.device, - model=self.policy, rollout=rollout ) @@ -130,12 +126,10 @@ def off_policy_train_batch(self, batch_info: BatchInfo): """ Perform an 'off-policy' training step of sampling the replay buffer and gradient descent """ self.policy.train() - rollout = self.env_roller.sample(batch_info, self.policy, self.settings.number_of_steps).to_device(self.device) + rollout = self.env_roller.sample(batch_info, self.settings.number_of_steps).to_device(self.device) - batch_result = self.algo.optimize( + batch_result = self.policy.optimize( batch_info=batch_info, - device=self.device, - model=self.policy, rollout=rollout ) @@ -145,25 +139,23 @@ def off_policy_train_batch(self, batch_info: BatchInfo): class BufferedMixedPolicyIterationReinforcerFactory(ReinforcerFactory): """ Factory class for the PolicyGradientReplayBuffer factory """ def __init__(self, settings, env_factory: VecEnvFactory, model_factory: ModelFactory, - env_roller_factory: ReplayEnvRollerFactoryBase, algo: AlgoBase, parallel_envs: int, seed: int): + env_roller_factory: ReplayEnvRollerFactoryBase, parallel_envs: int, seed: int): self.settings = settings self.model_factory = model_factory self.env_factory = env_factory self.parallel_envs = parallel_envs self.env_roller_factory = env_roller_factory - self.algo = algo self.seed = seed def instantiate(self, device: torch.device) -> Reinforcer: env = self.env_factory.instantiate(parallel_envs=self.parallel_envs, seed=self.seed) - model = self.model_factory.instantiate(action_space=env.action_space) - env_roller = self.env_roller_factory.instantiate(env, device) - - return BufferedMixedPolicyIterationReinforcer(device, self.settings, env, model, env_roller, self.algo) + policy = self.model_factory.instantiate(action_space=env.action_space) + env_roller = self.env_roller_factory.instantiate(environment=env, policy=policy, device=device) + return BufferedMixedPolicyIterationReinforcer(device, self.settings, env, policy, env_roller) -def create(model_config, model, vec_env, algo, env_roller, +def create(model_config, model, vec_env, env_roller, parallel_envs, number_of_steps, experience_replay=1, stochastic_experience_replay=True): """ Vel factory function """ @@ -179,6 +171,5 @@ def create(model_config, model, vec_env, algo, env_roller, model_factory=model, parallel_envs=parallel_envs, env_roller_factory=env_roller, - algo=algo, seed=model_config.seed ) diff --git a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py index 93096a6b..03b53e28 100644 --- a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py @@ -43,6 +43,11 @@ def __init__(self, device: torch.device, settings: OnPolicyIterationReinforcerSe self._model: RlPolicy = policy.to(self.device) + @property + def policy(self) -> RlPolicy: + """ Model trained by this reinforcer """ + return self._model + def metrics(self) -> list: """ List of metrics to track for this learning process """ my_metrics = [ @@ -56,11 +61,6 @@ def metrics(self) -> list: return my_metrics + self.env_roller.metrics() + self.policy.metrics() - @property - def policy(self) -> RlPolicy: - """ Model trained by this reinforcer """ - return self._model - def initialize_training(self, training_info: TrainingInfo, model_state=None, hidden_state=None): """ Prepare models for training """ if model_state is not None: diff --git a/vel/rl/xpolicy/semipurgatory/a2c_rnn.py b/vel/rl/xpolicy/semipurgatory/a2c_rnn.py deleted file mode 100644 index fc38671a..00000000 --- a/vel/rl/xpolicy/semipurgatory/a2c_rnn.py +++ /dev/null @@ -1,92 +0,0 @@ -import torch -import torch.nn.functional as F - -from vel.metric.base import AveragingNamedMetric -from vel.calc.function import explained_variance -from vel.rl.api import OptimizerAlgoBase, Rollout, Trajectories -from vel.rl.discount_bootstrap import discount_bootstrap_gae - - -class A2CPolicyGradient(OptimizerAlgoBase): - """ Simplest policy gradient - calculate loss as an advantage of an actor versus value function """ - def __init__(self, entropy_coefficient, value_coefficient, discount_factor: float, gae_lambda=1.0): - super().__init__() - - self.entropy_coefficient = entropy_coefficient - self.value_coefficient = value_coefficient - self.gae_lambda = gae_lambda - self.discount_factor = discount_factor - - def process_rollout(self, batch_info, rollout: Rollout): - """ Process rollout for ALGO before any chunking/shuffling """ - assert isinstance(rollout, Trajectories), "A2C requires trajectory rollouts" - - advantages = discount_bootstrap_gae( - rewards_buffer=rollout.transition_tensors['rewards'], - dones_buffer=rollout.transition_tensors['dones'], - values_buffer=rollout.transition_tensors['values'], - final_values=rollout.rollout_tensors['final_values'], - discount_factor=self.discount_factor, - gae_lambda=self.gae_lambda, - number_of_steps=rollout.num_steps - ) - - returns = advantages + rollout.transition_tensors['values'] - - rollout.transition_tensors['advantages'] = advantages - rollout.transition_tensors['returns'] = returns - - return rollout - - def calculate_gradient(self, batch_info, device, model, rollout): - """ Calculate loss of the supplied rollout """ - evaluator = model.evaluate(rollout) - - # Use evaluator interface to get the what we are interested in from the model - advantages = evaluator.get('rollout:advantages') - returns = evaluator.get('rollout:returns') - rollout_values = evaluator.get('rollout:values') - - logprobs = evaluator.get('model:action:logprobs') - values = evaluator.get('model:values') - entropy = evaluator.get('model:entropy') - - # Actual calculations. Pretty trivial - policy_loss = -torch.mean(advantages * logprobs) - value_loss = 0.5 * F.mse_loss(values, returns) - policy_entropy = torch.mean(entropy) - - loss_value = ( - policy_loss - self.entropy_coefficient * policy_entropy + self.value_coefficient * value_loss - ) - - loss_value.backward() - - return { - 'policy_loss': policy_loss.item(), - 'value_loss': value_loss.item(), - 'policy_entropy': policy_entropy.item(), - 'advantage_norm': torch.norm(advantages).item(), - 'explained_variance': explained_variance(returns, rollout_values) - } - - def metrics(self) -> list: - """ List of metrics to track for this learning process """ - return [ - AveragingNamedMetric("value_loss"), - AveragingNamedMetric("policy_entropy"), - AveragingNamedMetric("policy_loss"), - AveragingNamedMetric("grad_norm"), - AveragingNamedMetric("advantage_norm"), - AveragingNamedMetric("explained_variance") - ] - - -def create(entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): - """ Vel factory function """ - return A2CPolicyGradient( - entropy_coefficient, - value_coefficient, - discount_factor, - gae_lambda - ) diff --git a/vel/rl/xpolicy/semipurgatory/ppo_rnn.py b/vel/rl/xpolicy/semipurgatory/ppo_rnn.py deleted file mode 100644 index 3ef76c72..00000000 --- a/vel/rl/xpolicy/semipurgatory/ppo_rnn.py +++ /dev/null @@ -1,133 +0,0 @@ -import torch - -import numbers - -from vel.calc.function import explained_variance -from vel.function.constant import ConstantSchedule -from vel.metric.base import AveragingNamedMetric -from vel.rl.api import OptimizerAlgoBase, Rollout, Trajectories -from vel.rl.discount_bootstrap import discount_bootstrap_gae - - -class PpoPolicyGradient(OptimizerAlgoBase): - """ Proximal Policy Optimization - https://arxiv.org/abs/1707.06347 """ - def __init__(self, entropy_coefficient, value_coefficient, cliprange, max_grad_norm, discount_factor: float, - normalize_advantage: bool = True, gae_lambda: float = 1.0): - super().__init__(max_grad_norm) - - self.entropy_coefficient = entropy_coefficient - self.value_coefficient = value_coefficient - self.normalize_advantage = normalize_advantage - - if isinstance(cliprange, numbers.Number): - self.cliprange = ConstantSchedule(cliprange) - else: - self.cliprange = cliprange - - self.gae_lambda = gae_lambda - self.discount_factor = discount_factor - - def process_rollout(self, batch_info, rollout: Rollout): - """ Process rollout for ALGO before any chunking/shuffling """ - assert isinstance(rollout, Trajectories), "PPO requires trajectory rollouts" - - advantages = discount_bootstrap_gae( - rewards_buffer=rollout.transition_tensors['rewards'], - dones_buffer=rollout.transition_tensors['dones'], - values_buffer=rollout.transition_tensors['values'], - final_values=rollout.rollout_tensors['final_values'], - discount_factor=self.discount_factor, - gae_lambda=self.gae_lambda, - number_of_steps=rollout.num_steps - ) - - returns = advantages + rollout.transition_tensors['values'] - - rollout.transition_tensors['advantages'] = advantages - rollout.transition_tensors['returns'] = returns - - return rollout - - def calculate_gradient(self, batch_info, device, model, rollout): - """ Calculate loss of the supplied rollout """ - evaluator = model.evaluate(rollout) - - # Part 0.0 - Rollout values - advantages = evaluator.get('rollout:advantages') - rollout_values = evaluator.get('rollout:values') - rollout_action_logprobs = evaluator.get('rollout:action:logprobs') - returns = evaluator.get('rollout:returns') - - # PART 0.1 - Model evaluation - entropy = evaluator.get('model:entropy') - model_values = evaluator.get('model:values') - model_action_logprobs = evaluator.get('model:action:logprobs') - - # Select the cliprange - current_cliprange = self.cliprange.value(batch_info['progress']) - - # Normalize the advantages? - if self.normalize_advantage: - advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) - - # PART 1 - policy entropy - policy_entropy = torch.mean(entropy) - - # PART 2 - value function - value_output_clipped = rollout_values + torch.clamp( - model_values - rollout_values, -current_cliprange, current_cliprange - ) - value_loss_part1 = (model_values - returns).pow(2) - value_loss_part2 = (value_output_clipped - returns).pow(2) - value_loss = 0.5 * torch.mean(torch.max(value_loss_part1, value_loss_part2)) - - # PART 3 - policy gradient loss - ratio = torch.exp(model_action_logprobs - rollout_action_logprobs) - - pg_loss_part1 = -advantages * ratio - pg_loss_part2 = -advantages * torch.clamp(ratio, 1.0 - current_cliprange, 1.0 + current_cliprange) - policy_loss = torch.mean(torch.max(pg_loss_part1, pg_loss_part2)) - - loss_value = ( - policy_loss - self.entropy_coefficient * policy_entropy + self.value_coefficient * value_loss - ) - - loss_value.backward() - - with torch.no_grad(): - approx_kl_divergence = 0.5 * torch.mean((model_action_logprobs - rollout_action_logprobs).pow(2)) - clip_fraction = torch.mean((torch.abs(ratio - 1.0) > current_cliprange).to(dtype=torch.float)) - - return { - 'policy_loss': policy_loss.item(), - 'value_loss': value_loss.item(), - 'policy_entropy': policy_entropy.item(), - 'approx_kl_divergence': approx_kl_divergence.item(), - 'clip_fraction': clip_fraction.item(), - 'advantage_norm': torch.norm(advantages).item(), - 'explained_variance': explained_variance(returns, rollout_values) - } - - def metrics(self) -> list: - """ List of metrics to track for this learning process """ - return [ - AveragingNamedMetric("policy_loss"), - AveragingNamedMetric("value_loss"), - AveragingNamedMetric("policy_entropy"), - AveragingNamedMetric("approx_kl_divergence"), - AveragingNamedMetric("clip_fraction"), - AveragingNamedMetric("grad_norm"), - AveragingNamedMetric("advantage_norm"), - AveragingNamedMetric("explained_variance") - ] - - -def create(entropy_coefficient, value_coefficient, cliprange, max_grad_norm, discount_factor, - normalize_advantage=True, gae_lambda=1.0): - """ Vel factory function """ - return PpoPolicyGradient( - entropy_coefficient, value_coefficient, cliprange, max_grad_norm, - discount_factor=discount_factor, - normalize_advantage=normalize_advantage, - gae_lambda=gae_lambda - ) diff --git a/vel/rl/xpolicy/stochastic_policy.py b/vel/rl/xpolicy/stochastic_policy.py deleted file mode 100644 index 23db9d13..00000000 --- a/vel/rl/xpolicy/stochastic_policy.py +++ /dev/null @@ -1,70 +0,0 @@ -import gym -import typing - -from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.module.input.identity import IdentityFactory -from vel.rl.module.stochastic_action_head import StochasticActionHead -from vel.rl.module.value_head import ValueHead - - -class StochasticPolicy(BackboneModel): - """ - Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone - """ - - def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, action_space: gym.Space): - super().__init__() - - self.input_block = input_block - self.backbone = backbone - - assert not self.backbone.is_stateful, "Backbone shouldn't have state" - - self.action_head = StochasticActionHead( - action_space=action_space, - input_dim=self.backbone.output_dim - ) - - self.value_head = ValueHead( - input_dim=self.backbone.output_dim - ) - - def reset_weights(self): - """ Initialize properly model weights """ - self.input_block.reset_weights() - self.backbone.reset_weights() - self.action_head.reset_weights() - self.value_head.reset_weights() - - def forward(self, observation): - """ Calculate model outputs """ - input_data = self.input_block(observation) - - base_output = self.backbone(input_data) - - action_output = self.action_head(base_output) - value_output = self.value_head(base_output) - - return action_output, value_output - - -class StochasticPolicyFactory(ModelFactory): - """ Factory class for policy gradient models """ - def __init__(self, input_block: IdentityFactory, backbone: ModelFactory): - self.backbone = backbone - self.input_block = input_block - - def instantiate(self, **extra_args): - """ Instantiate the model """ - input_block = self.input_block.instantiate() - backbone = self.backbone.instantiate(**extra_args) - - return StochasticPolicy(input_block, backbone, extra_args['action_space']) - - -def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): - """ Vel factory function """ - if input_block is None: - input_block = IdentityFactory() - - return StochasticPolicyFactory(input_block=input_block, backbone=backbone) From cbb38f3b2fb32c75eeccaa1c3cc8741d5ee5a48c Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Wed, 2 Oct 2019 20:37:34 -0700 Subject: [PATCH 105/162] Revived the DDQN config. --- ..._ddqn_prioritized.yaml => atari_ddqn.yaml} | 59 +++---- .../rl/atari/{purgatory => }/atari_dqn.yaml | 31 ++-- .../purgatory/atari_acer_trust_region.yaml | 83 --------- vel/rl/api/policy.py | 4 - .../prioritized_circular_replay_buffer.py | 2 +- vel/rl/env/mujoco.py | 31 +--- vel/rl/env/wrappers/env_normalize.py | 56 ------ .../transition_replay_env_roller.py | 26 +-- .../purgatory => }/double_nature_cnn.py | 53 ++++-- vel/rl/layer/premade/purgatory/__init__.py | 0 .../layer/{premade => purgatory}/__init__.py | 0 .../purgatory/double_noisy_nature_cnn.py | 3 - vel/rl/layer/{premade => }/purgatory/mlp.py | 3 - .../layer/{premade => }/purgatory/mlp_rnn.py | 3 - .../{premade => }/purgatory/nature_cnn_rnn.py | 0 .../purgatory/noisy_nature_cnn.py | 0 vel/rl/layer/{premade => }/purgatory/rnn.py | 3 - vel/rl/module/head/q_dueling_head.py | 6 +- vel/rl/module/noise/eps_greedy.py | 34 ++-- vel/rl/module/noise/ou_noise.py | 12 +- vel/rl/module/q_policy.py | 50 ++++++ vel/rl/module/q_stochastic_policy.py | 46 +++++ vel/rl/policy/acer.py | 57 +----- vel/rl/policy/dqn.py | 165 ++++++++++++++++++ vel/rl/policy/purgatory/dqn.py | 102 ----------- ...fered_mixed_policy_iteration_reinforcer.py | 9 +- ...uffered_off_policy_iteration_reinforcer.py | 44 ++--- 27 files changed, 400 insertions(+), 482 deletions(-) rename examples-configs/rl/atari/{dqn/atari_dueling_ddqn_prioritized.yaml => atari_ddqn.yaml} (58%) rename examples-configs/rl/atari/{purgatory => }/atari_dqn.yaml (77%) delete mode 100644 examples-configs/rl/atari/purgatory/atari_acer_trust_region.yaml delete mode 100644 vel/rl/env/wrappers/env_normalize.py rename vel/rl/layer/{premade/purgatory => }/double_nature_cnn.py (66%) delete mode 100644 vel/rl/layer/premade/purgatory/__init__.py rename vel/rl/layer/{premade => purgatory}/__init__.py (100%) rename vel/rl/layer/{premade => }/purgatory/double_noisy_nature_cnn.py (98%) rename vel/rl/layer/{premade => }/purgatory/mlp.py (99%) rename vel/rl/layer/{premade => }/purgatory/mlp_rnn.py (98%) rename vel/rl/layer/{premade => }/purgatory/nature_cnn_rnn.py (100%) rename vel/rl/layer/{premade => }/purgatory/noisy_nature_cnn.py (100%) rename vel/rl/layer/{premade => }/purgatory/rnn.py (98%) create mode 100644 vel/rl/module/q_policy.py create mode 100644 vel/rl/module/q_stochastic_policy.py create mode 100644 vel/rl/policy/dqn.py delete mode 100644 vel/rl/policy/purgatory/dqn.py diff --git a/examples-configs/rl/atari/dqn/atari_dueling_ddqn_prioritized.yaml b/examples-configs/rl/atari/atari_ddqn.yaml similarity index 58% rename from examples-configs/rl/atari/dqn/atari_dueling_ddqn_prioritized.yaml rename to examples-configs/rl/atari/atari_ddqn.yaml index 3084f15a..ecbf6544 100644 --- a/examples-configs/rl/atari/dqn/atari_dueling_ddqn_prioritized.yaml +++ b/examples-configs/rl/atari/atari_ddqn.yaml @@ -12,26 +12,37 @@ vec_env: model: - name: vel.rl.models.q_dueling_model + name: vel.rl.policy.dqn - input_block: - name: vel.modules.input.image_to_tensor + double_dqn: true + dueling_dqn: true + target_update_frequency: 10_000 # After how many batches to update the target network + max_grad_norm: 0.5 - backbone: - name: vel.rl.models.backbone.double_nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history + discount_factor: 0.99 + + epsilon: + name: vel.function.linear_and_constant + end_of_interpolation: 0.1 + initial_value: 1.0 + final_value: 0.1 + + net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.image_to_tensor + size: [84, 84, 4] # Number of channels is frame history + - name: vel.rl.layer.double_nature_cnn reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer + name: vel.rl.reinforcer.buffered_off_policy_iteration_reinforcer env_roller: name: vel.rl.env_roller.transition_replay_env_roller replay_buffer: - name: vel.rl.buffers.prioritized_circular_replay_buffer + name: vel.rl.buffer.prioritized_circular_replay_buffer buffer_initial_size: 30_000 # How many samples we need in the buffer before we start using replay buffer buffer_capacity: 250_000 @@ -42,30 +53,12 @@ reinforcer: priority_exponent: 0.6 priority_weight: - name: vel.schedules.linear + name: vel.function.linear initial_value: 0.4 final_value: 1.0 priority_epsilon: 1.0e-6 - action_noise: - name: vel.rl.modules.noise.eps_greedy - - epsilon: - name: vel.schedules.linear_and_constant - end_of_interpolation: 0.1 - initial_value: 1.0 - final_value: 0.1 - - algo: - name: vel.rl.algo.dqn - - double_dqn: true - target_update_frequency: 10_000 # After how many batches to update the target network - max_grad_norm: 0.5 - - discount_factor: 0.99 - rollout_steps: 4 # How many environment steps to perform per batch of training training_steps: 32 # How many environment steps (per env) to perform per training round @@ -73,7 +66,7 @@ reinforcer: optimizer: - name: vel.optimizers.rmsprop + name: vel.optimizer.rmsprop lr: 2.5e-4 alpha: 0.95 momentum: 0.95 @@ -82,15 +75,15 @@ optimizer: commands: train: - name: vel.rl.commands.rl_train_command + name: vel.rl.command.rl_train_command total_frames: 1.1e7 # 11M batches_per_epoch: 2500 record: - name: vel.rl.commands.record_movie_command + name: vel.rl.command.record_movie_command takes: 10 videoname: 'atari_vid_{:04}.avi' evaluate: - name: vel.rl.commands.evaluate_env_command + name: vel.rl.command.evaluate_env_command takes: 100 diff --git a/examples-configs/rl/atari/purgatory/atari_dqn.yaml b/examples-configs/rl/atari/atari_dqn.yaml similarity index 77% rename from examples-configs/rl/atari/purgatory/atari_dqn.yaml rename to examples-configs/rl/atari/atari_dqn.yaml index a811880a..9851ecba 100644 --- a/examples-configs/rl/atari/purgatory/atari_dqn.yaml +++ b/examples-configs/rl/atari/atari_dqn.yaml @@ -12,19 +12,23 @@ vec_env: model: - name: vel.rl.algo.dqn + name: vel.rl.policy.dqn target_update_frequency: 10_000 # After how many batches to update the target network discount_factor: 0.99 - backbone: - name: vel.module.sequence - modules: - - name: vel.modules.input.image_to_tensor - - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history + epsilon: + name: vel.function.linear_and_constant + end_of_interpolation: 0.1 + initial_value: 1.0 + final_value: 0.1 + + net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.image_to_tensor + size: [84, 84, 4] # Number of channels is frame history + - name: vel.rl.layer.nature_cnn reinforcer: @@ -43,15 +47,6 @@ reinforcer: frame_stack_compensation: true frame_history: 4 # How many stacked frames go into a single observation - action_noise: - name: vel.rl.module.noise.eps_greedy - - epsilon: - name: vel.function.linear_and_constant - end_of_interpolation: 0.1 - initial_value: 1.0 - final_value: 0.1 - rollout_steps: 4 # How many environment steps (per env) to perform per batch of training training_steps: 32 # How many environment steps (per env) to perform per training round parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper diff --git a/examples-configs/rl/atari/purgatory/atari_acer_trust_region.yaml b/examples-configs/rl/atari/purgatory/atari_acer_trust_region.yaml deleted file mode 100644 index 810a6a51..00000000 --- a/examples-configs/rl/atari/purgatory/atari_acer_trust_region.yaml +++ /dev/null @@ -1,83 +0,0 @@ -name: 'atari_acer_trust_region' - - -env: - name: vel.rl.env.classic_atari - game: !param game = 'BreakoutNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.shared_mem - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_stochastic_policy_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_mixed_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.trajectory_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 1_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 50_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - algo: - name: vel.rl.algo.policy_gradient.acer - entropy_coefficient: 0.01 - q_coefficient: 0.5 - rho_cap: 10.0 - retrace_rho_cap: 1.0 - - max_grad_norm: 10.0 - discount_factor: 0.99 - - trust_region: true - trust_region_delta: 1.0 - - parallel_envs: 12 # How many environments to run in parallel - number_of_steps: 20 # How many environment steps go into a single batch - experience_replay: 4 - - -optimizer: - name: vel.optimizers.rmsprop - lr: 7.0e-4 - alpha: 0.99 - # epsilon: 1.0e-5 - epsilon: 1.0e-3 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 - batches_per_epoch: 10 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'atari_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - takes: 100 - parallel_envs: 12 # How many environments to run in parallel diff --git a/vel/rl/api/policy.py b/vel/rl/api/policy.py index 3a73003e..4d1109c2 100644 --- a/vel/rl/api/policy.py +++ b/vel/rl/api/policy.py @@ -62,10 +62,6 @@ def reset_state(self, state, dones): #################################################################################################################### # Utility Methods - that provide default implementations but may be short circuited by some implementations - def value(self, observation, state=None): - """ Return value for given observation """ - return self.act(observation, state=state)['values'] - def action(self, observation, state=None, deterministic=False): """ Return policy action for given observation """ return self.act(observation, state=state, deterministic=deterministic)['actions'] diff --git a/vel/rl/buffer/prioritized_circular_replay_buffer.py b/vel/rl/buffer/prioritized_circular_replay_buffer.py index 85f6dedb..0081d73c 100644 --- a/vel/rl/buffer/prioritized_circular_replay_buffer.py +++ b/vel/rl/buffer/prioritized_circular_replay_buffer.py @@ -58,7 +58,7 @@ def _get_transitions(self, probs, indexes, tree_idxs, batch_info, forward_steps= weights = (capacity * probs) ** (-priority_weight) weights = weights / weights.max(axis=0, keepdims=True) - transition_arrays['weights'] = weights + transition_arrays['weights'] = weights.astype(np.float32) transition_tensors = {k: torch.from_numpy(v) for k, v in transition_arrays.items()} transitions = Trajectories( diff --git a/vel/rl/env/mujoco.py b/vel/rl/env/mujoco.py index 3c511c69..9b16b8a0 100644 --- a/vel/rl/env/mujoco.py +++ b/vel/rl/env/mujoco.py @@ -6,28 +6,22 @@ from vel.openai.baselines import logger from vel.openai.baselines.bench import Monitor from vel.rl.api import EnvFactory -from vel.rl.env.wrappers.env_normalize import EnvNormalize from vel.util.situational import process_environment_settings DEFAULT_SETTINGS = { 'default': { 'monitor': False, - 'allow_early_resets': False, - 'normalize_observations': False, - 'normalize_returns': False, + 'allow_early_resets': False }, 'record': { 'monitor': False, - 'allow_early_resets': True, - 'normalize_observations': False, - 'normalize_returns': False, + 'allow_early_resets': True } } -def env_maker(environment_id, seed, serial_id, monitor=False, allow_early_resets=False, normalize_observations=False, - normalize_returns=False, normalize_gamma=0.99): +def env_maker(environment_id, seed, serial_id, monitor=False, allow_early_resets=False): """ Create a relatively raw atari environment """ env = gym.make(environment_id) env.seed(seed + serial_id) @@ -40,30 +34,16 @@ def env_maker(environment_id, seed, serial_id, monitor=False, allow_early_resets env = Monitor(env, logdir, allow_early_resets=allow_early_resets) - if normalize_observations or normalize_returns: - env = EnvNormalize( - env, - normalize_observations=normalize_observations, - normalize_returns=normalize_returns, - gamma=normalize_gamma - ) - return env class MujocoEnv(EnvFactory): """ Atari game environment wrapped in the same way as Deep Mind and OpenAI baselines """ - def __init__(self, envname, normalize_observations=False, normalize_returns=False, settings=None, presets=None): + def __init__(self, envname, settings=None, presets=None): self.envname = envname settings = settings if settings is not None else {} - if normalize_observations: - settings['normalize_observations'] = True - - if normalize_returns: - settings['normalize_returns'] = True - self.settings = process_environment_settings(DEFAULT_SETTINGS, settings, presets) def specification(self) -> EnvSpec: @@ -80,11 +60,10 @@ def instantiate(self, seed=0, serial_id=0, preset='default', extra_args=None) -> return env_maker(self.envname, seed, serial_id, **settings) -def create(game, normalize_returns=False, settings=None, presets=None): +def create(game, settings=None, presets=None): """ Vel factory function """ return MujocoEnv( envname=game, - normalize_returns=normalize_returns, settings=settings, presets=presets ) diff --git a/vel/rl/env/wrappers/env_normalize.py b/vel/rl/env/wrappers/env_normalize.py deleted file mode 100644 index be21772f..00000000 --- a/vel/rl/env/wrappers/env_normalize.py +++ /dev/null @@ -1,56 +0,0 @@ -import gym -import numpy as np - -from vel.openai.baselines.common.running_mean_std import RunningMeanStd - - -class EnvNormalize(gym.Wrapper): - """ - Single environment normalization based on VecNormalize from OpenAI baselines - """ - def __init__(self, env, normalize_observations=True, normalize_returns=True, - clip_observations=10., clip_rewards=10., gamma=0.99, epsilon=1e-8): - super().__init__(env) - - self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if normalize_observations else None - self.ret_rms = RunningMeanStd(shape=()) if normalize_returns else None - self.clipob = clip_observations - self.cliprew = clip_rewards - self.ret = 0.0 - self.gamma = gamma - self.epsilon = epsilon - - def step(self, action): - """ - Apply sequence of actions to sequence of environments - actions -> (observations, rewards, news) - - where 'news' is a boolean vector indicating whether each element is new. - """ - obs, rews, news, infos = self.env.step(action) - - self.ret = self.ret * self.gamma + rews - - obs = self._filter_observation(obs) - - if self.ret_rms: - self.ret_rms.update(np.array([self.ret])) - rews = np.clip(rews / np.sqrt(self.ret_rms.var + self.epsilon), -self.cliprew, self.cliprew) - - return obs, rews, news, infos - - def _filter_observation(self, obs): - if self.ob_rms: - self.ob_rms.update(obs[None]) - obs = np.clip((obs - self.ob_rms.mean) / np.sqrt(self.ob_rms.var + self.epsilon), -self.clipob, self.clipob) - - return obs.astype(np.float32) - else: - return obs - - def reset(self): - """ - Reset all environments - """ - obs = self.env.reset() - return self._filter_observation(obs) diff --git a/vel/rl/env_roller/transition_replay_env_roller.py b/vel/rl/env_roller/transition_replay_env_roller.py index 14cb282e..5cf7a738 100644 --- a/vel/rl/env_roller/transition_replay_env_roller.py +++ b/vel/rl/env_roller/transition_replay_env_roller.py @@ -22,14 +22,13 @@ class TransitionReplayEnvRoller(ReplayEnvRollerBase): def __init__(self, environment: VecEnv, policy: RlPolicy, device: torch.device, replay_buffer: ReplayBuffer, discount_factor: typing.Optional[float] = None, normalize_returns: bool = False, - forward_steps: int = 1, action_noise: typing.Optional[nn.Module] = None): + forward_steps: int = 1): self._environment = environment self.device = device self.replay_buffer = replay_buffer self.normalize_returns = normalize_returns self.forward_steps = forward_steps self.discount_factor = discount_factor - self.action_noise = action_noise.to(self.device) if action_noise is not None else None self.actor = PolicyActor(self.environment.num_envs, policy, device) assert not self.actor.is_stateful, "Does not support stateful policies" @@ -66,9 +65,6 @@ def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: for step_idx in range(number_of_steps): step = self.actor.act(self.last_observation) - if self.action_noise is not None: - step['actions'] = self.action_noise(step['actions'], batch_info=batch_info) - replay_extra_information = {} accumulator.add('observations', self.last_observation_cpu) @@ -102,9 +98,6 @@ def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: dones_tensor = torch.from_numpy(new_dones.astype(np.float32)).clone() accumulator.add('dones', dones_tensor) - if self.action_noise is not None: - self.action_noise.reset_training_state(dones_tensor, batch_info=batch_info) - self.accumulated_returns = self.accumulated_returns * (1.0 - new_dones.astype(np.float32)) self.last_observation_cpu = torch.from_numpy(new_obs).clone() @@ -161,22 +154,15 @@ class TransitionReplayEnvRollerFactory(ReplayEnvRollerFactoryBase): """ Factory for the ReplayEnvRoller """ def __init__(self, replay_buffer_factory: ReplayBufferFactory, discount_factor: typing.Optional[float] = None, - normalize_returns: bool = False, forward_steps: int = 1, - action_noise: typing.Optional[ModelFactory] = None): + normalize_returns: bool = False, forward_steps: int = 1): self.replay_buffer_factory = replay_buffer_factory self.normalize_returns = normalize_returns self.forward_steps = forward_steps self.discount_factor = discount_factor - self.action_noise_factory = action_noise def instantiate(self, environment, policy, device): replay_buffer = self.replay_buffer_factory.instantiate(environment) - if self.action_noise_factory is None: - action_noise = None - else: - action_noise = self.action_noise_factory.instantiate(environment=environment) - return TransitionReplayEnvRoller( environment=environment, policy=policy, @@ -184,18 +170,16 @@ def instantiate(self, environment, policy, device): replay_buffer=replay_buffer, discount_factor=self.discount_factor, normalize_returns=self.normalize_returns, - forward_steps=self.forward_steps, - action_noise=action_noise + forward_steps=self.forward_steps ) def create(replay_buffer, discount_factor: typing.Optional[float] = None, normalize_returns: bool = False, - forward_steps: int = 1, action_noise: typing.Optional[ModelFactory] = None): + forward_steps: int = 1): """ Vel factory function """ return TransitionReplayEnvRollerFactory( replay_buffer_factory=replay_buffer, discount_factor=discount_factor, forward_steps=forward_steps, - normalize_returns=normalize_returns, - action_noise=action_noise + normalize_returns=normalize_returns ) diff --git a/vel/rl/layer/premade/purgatory/double_nature_cnn.py b/vel/rl/layer/double_nature_cnn.py similarity index 66% rename from vel/rl/layer/premade/purgatory/double_nature_cnn.py rename to vel/rl/layer/double_nature_cnn.py index ed64afcd..54599e9e 100644 --- a/vel/rl/layer/premade/purgatory/double_nature_cnn.py +++ b/vel/rl/layer/double_nature_cnn.py @@ -12,18 +12,19 @@ import vel.util.network as net_util -from vel.api import LinearBackboneModel, ModelFactory +from vel.api import SizeHints, SizeHint +from vel.net.layer_base import Layer, LayerFactory -class DoubleNatureCnn(LinearBackboneModel): +class DoubleNatureCnn(Layer): """ Neural network as defined in the paper 'Human-level control through deep reinforcement learning' but with two separate heads. """ - def __init__(self, input_width, input_height, input_channels, output_dim=512): - super().__init__() + def __init__(self, name: str, input_width, input_height, input_channels, output_dim=512): + super().__init__(name) - self._output_dim = output_dim + self.output_dim = output_dim self.conv1 = nn.Conv2d( in_channels=input_channels, @@ -68,11 +69,6 @@ def __init__(self, input_width, input_height, input_channels, output_dim=512): self.output_dim ) - @property - def output_dim(self) -> int: - """ Final dimension of model output """ - return self._output_dim - def reset_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): @@ -84,7 +80,13 @@ def reset_weights(self): init.orthogonal_(m.weight, gain=np.sqrt(2)) init.constant_(m.bias, 0.0) - def forward(self, image): + def size_hints(self) -> SizeHints: + return SizeHints(( + SizeHint(None, self.output_dim), + SizeHint(None, self.output_dim) + )) + + def forward(self, image, state: dict = None, context: dict = None): result = image result = F.relu(self.conv1(result)) result = F.relu(self.conv2(result)) @@ -97,12 +99,29 @@ def forward(self, image): return output_one, output_two -def create(input_width, input_height, input_channels=1): - """ Vel factory function """ - def instantiate(**_): - return DoubleNatureCnn(input_width=input_width, input_height=input_height, input_channels=input_channels) +class DoubleNatureCnnFactory(LayerFactory): + """ Nature Cnn Network Factory """ - return ModelFactory.generic(instantiate) + def __init__(self, output_dim: int = 512): + self.output_dim = output_dim + @property + def name_base(self) -> str: + """ Base of layer name """ + return "nature_cnn" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + (b, c, w, h) = direct_input.assert_single(4) + + return DoubleNatureCnn( + name=name, + input_width=w, + input_height=h, + input_channels=c, + output_dim=self.output_dim + ) -DoubleNatureCnnFactory = create + +def create(output_dim: int = 512): + """ Vel factory function """ + return DoubleNatureCnnFactory(output_dim=output_dim) diff --git a/vel/rl/layer/premade/purgatory/__init__.py b/vel/rl/layer/premade/purgatory/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/vel/rl/layer/premade/__init__.py b/vel/rl/layer/purgatory/__init__.py similarity index 100% rename from vel/rl/layer/premade/__init__.py rename to vel/rl/layer/purgatory/__init__.py diff --git a/vel/rl/layer/premade/purgatory/double_noisy_nature_cnn.py b/vel/rl/layer/purgatory/double_noisy_nature_cnn.py similarity index 98% rename from vel/rl/layer/premade/purgatory/double_noisy_nature_cnn.py rename to vel/rl/layer/purgatory/double_noisy_nature_cnn.py index a55fc8ed..7c31e719 100644 --- a/vel/rl/layer/premade/purgatory/double_noisy_nature_cnn.py +++ b/vel/rl/layer/purgatory/double_noisy_nature_cnn.py @@ -116,6 +116,3 @@ def instantiate(**_): ) return ModelFactory.generic(instantiate) - - -DoubleNoisyNatureCnnFactory = create diff --git a/vel/rl/layer/premade/purgatory/mlp.py b/vel/rl/layer/purgatory/mlp.py similarity index 99% rename from vel/rl/layer/premade/purgatory/mlp.py rename to vel/rl/layer/purgatory/mlp.py index f4e03ae3..65560553 100644 --- a/vel/rl/layer/premade/purgatory/mlp.py +++ b/vel/rl/layer/purgatory/mlp.py @@ -68,6 +68,3 @@ def instantiate(**_): ) return ModelFactory.generic(instantiate) - - -MLPFactory = create diff --git a/vel/rl/layer/premade/purgatory/mlp_rnn.py b/vel/rl/layer/purgatory/mlp_rnn.py similarity index 98% rename from vel/rl/layer/premade/purgatory/mlp_rnn.py rename to vel/rl/layer/purgatory/mlp_rnn.py index d5229d7f..1e871ffb 100644 --- a/vel/rl/layer/premade/purgatory/mlp_rnn.py +++ b/vel/rl/layer/purgatory/mlp_rnn.py @@ -57,6 +57,3 @@ def instantiate(**_): ) return ModelFactory.generic(instantiate) - - -MlpRnnFactory = create diff --git a/vel/rl/layer/premade/purgatory/nature_cnn_rnn.py b/vel/rl/layer/purgatory/nature_cnn_rnn.py similarity index 100% rename from vel/rl/layer/premade/purgatory/nature_cnn_rnn.py rename to vel/rl/layer/purgatory/nature_cnn_rnn.py diff --git a/vel/rl/layer/premade/purgatory/noisy_nature_cnn.py b/vel/rl/layer/purgatory/noisy_nature_cnn.py similarity index 100% rename from vel/rl/layer/premade/purgatory/noisy_nature_cnn.py rename to vel/rl/layer/purgatory/noisy_nature_cnn.py diff --git a/vel/rl/layer/premade/purgatory/rnn.py b/vel/rl/layer/purgatory/rnn.py similarity index 98% rename from vel/rl/layer/premade/purgatory/rnn.py rename to vel/rl/layer/purgatory/rnn.py index 973345d4..5a6e9625 100644 --- a/vel/rl/layer/premade/purgatory/rnn.py +++ b/vel/rl/layer/purgatory/rnn.py @@ -45,6 +45,3 @@ def instantiate(**_): rnn_type=rnn_type ) return ModelFactory.generic(instantiate) - - -RNNFactory = create diff --git a/vel/rl/module/head/q_dueling_head.py b/vel/rl/module/head/q_dueling_head.py index 78a9d6ea..600e3564 100644 --- a/vel/rl/module/head/q_dueling_head.py +++ b/vel/rl/module/head/q_dueling_head.py @@ -6,14 +6,14 @@ class QDuelingHead(nn.Module): """ Network head calculating Q-function value for each (discrete) action using two separate inputs. """ - def __init__(self, input_dim, action_space): + def __init__(self, val_input_dim, adv_input_dim, action_space): super().__init__() # Q-function requires a discrete action space assert isinstance(action_space, spaces.Discrete) - self.linear_layer_advantage = nn.Linear(input_dim, action_space.n) - self.linear_layer_value = nn.Linear(input_dim, 1) + self.linear_layer_value = nn.Linear(val_input_dim, 1) + self.linear_layer_advantage = nn.Linear(adv_input_dim, action_space.n) self.action_space = action_space def reset_weights(self): diff --git a/vel/rl/module/noise/eps_greedy.py b/vel/rl/module/noise/eps_greedy.py index 0f3346eb..becf13e1 100644 --- a/vel/rl/module/noise/eps_greedy.py +++ b/vel/rl/module/noise/eps_greedy.py @@ -1,39 +1,31 @@ +import gym import typing import torch -import torch.nn as nn -from vel.api import Schedule +from vel.api import Schedule, Network from vel.internal.generic_factory import GenericFactory from vel.function.constant import ConstantSchedule -class EpsGreedy(nn.Module): +class EpsGreedy(Network): """ Epsilon-greedy action selection """ - def __init__(self, epsilon: typing.Union[Schedule, float], environment): + def __init__(self, action_space: gym.Space): super().__init__() - if isinstance(epsilon, Schedule): - self.epsilon_schedule = epsilon - else: - self.epsilon_schedule = ConstantSchedule(epsilon) - - self.action_space = environment.action_space + self.action_space = action_space - def forward(self, actions, batch_info=None): - if batch_info is None: - # Just take final value if there is no batch info - epsilon = self.epsilon_schedule.value(1.0) + def forward(self, actions, epsilon, deterministic=False): + if deterministic: + return actions else: - epsilon = self.epsilon_schedule.value(batch_info['progress']) - - random_samples = torch.randint_like(actions, self.action_space.n) - selector = torch.rand_like(random_samples, dtype=torch.float32) + random_samples = torch.randint_like(actions, self.action_space.n) + selector = torch.rand_like(random_samples, dtype=torch.float32) - # Actions with noise applied - noisy_actions = torch.where(selector > epsilon, actions, random_samples) + # Actions with noise applied + noisy_actions = torch.where(selector > epsilon, actions, random_samples) - return noisy_actions + return noisy_actions def reset_training_state(self, dones, batch_info): """ A hook for a model to react when during training episode is finished """ diff --git a/vel/rl/module/noise/ou_noise.py b/vel/rl/module/noise/ou_noise.py index d54a67e9..edda86f8 100644 --- a/vel/rl/module/noise/ou_noise.py +++ b/vel/rl/module/noise/ou_noise.py @@ -1,19 +1,21 @@ -import torch +import gym import numpy as np +import torch import torch.nn as nn -from vel.calc.process import OrnsteinUhlenbeckNoiseProcess +from vel.api import Network +from vel.util.process import OrnsteinUhlenbeckNoiseProcess from vel.internal.generic_factory import GenericFactory -class OuNoise(nn.Module): +class OuNoise(Network): """ Ornstein–Uhlenbeck noise process for action noise """ - def __init__(self, std_dev, environment): + def __init__(self, std_dev: float, action_space: gym.Space): super().__init__() self.std_dev = std_dev - self.action_space = environment.action_space + self.action_space = action_space self.processes = [] self.register_buffer('low_tensor', torch.from_numpy(self.action_space.low).unsqueeze(0)) diff --git a/vel/rl/module/q_policy.py b/vel/rl/module/q_policy.py new file mode 100644 index 00000000..7e6e8925 --- /dev/null +++ b/vel/rl/module/q_policy.py @@ -0,0 +1,50 @@ +import gym + +from vel.api import Network, BackboneNetwork + +from vel.rl.module.head.q_head import QHead +from vel.rl.module.head.q_dueling_head import QDuelingHead + + +class QPolicy(Network): + """ + Simple deterministic greedy action-value model. + Supports only discrete action spaces (ones that can be enumerated) + """ + def __init__(self, net: BackboneNetwork, action_space: gym.Space, dueling_dqn=False): + super().__init__() + + self.dueling_dqn = dueling_dqn + self.action_space = action_space + + self.net = net + + if self.dueling_dqn: + (value_size, adv_size) = self.net.size_hints().assert_tuple(2) + + self.q_head = QDuelingHead( + val_input_dim=value_size.last(), + adv_input_dim=adv_size.last(), + action_space=action_space + ) + else: + self.q_head = QHead( + input_dim=self.net.size_hints().assert_single(2).last(), + action_space=action_space + ) + + def reset_weights(self): + """ Initialize weights to reasonable defaults """ + self.net.reset_weights() + self.q_head.reset_weights() + + def forward(self, observations): + """ Model forward pass """ + if self.dueling_dqn: + val_output, adv_output = self.net(observations) + q_values = self.q_head(val_output, adv_output) + else: + base_output = self.net(observations) + q_values = self.q_head(base_output) + + return q_values diff --git a/vel/rl/module/q_stochastic_policy.py b/vel/rl/module/q_stochastic_policy.py new file mode 100644 index 00000000..d98b1ac3 --- /dev/null +++ b/vel/rl/module/q_stochastic_policy.py @@ -0,0 +1,46 @@ +import gym + +from vel.api import BackboneNetwork, Network +from vel.rl.module.head.stochastic_action_head import make_stockastic_action_head +from vel.rl.module.head.q_head import QHead + + +class QStochasticPolicy(Network): + """ + A policy model with an action-value critic head (instead of more common state-value critic head). + Supports only discrete action spaces (ones that can be enumerated) + """ + + def __init__(self, net: BackboneNetwork, action_space: gym.Space): + super().__init__() + + assert isinstance(action_space, gym.spaces.Discrete) + + self.net = net + + (action_size, value_size) = self.net.size_hints().assert_tuple(2) + + self.action_head = make_stockastic_action_head( + input_dim=action_size.last(), + action_space=action_space + ) + + self.q_head = QHead( + input_dim=value_size.last(), + action_space=action_space + ) + + def reset_weights(self): + """ Initialize properly model weights """ + self.net.reset_weights() + self.action_head.reset_weights() + self.q_head.reset_weights() + + def forward(self, observations): + """ Calculate model outputs """ + action_hidden, q_hidden = self.net(observations) + policy_params = self.action_head(action_hidden) + + q = self.q_head(q_hidden) + + return policy_params, q diff --git a/vel/rl/policy/acer.py b/vel/rl/policy/acer.py index f4d62580..c320a25f 100644 --- a/vel/rl/policy/acer.py +++ b/vel/rl/policy/acer.py @@ -2,11 +2,10 @@ import torch import torch.nn.functional as F -from vel.api import BackboneNetwork, ModelFactory, BatchInfo, Network +from vel.api import BackboneNetwork, ModelFactory, BatchInfo from vel.metric.base import AveragingNamedMetric from vel.rl.api import Trajectories, RlPolicy, Rollout -from vel.rl.module.head.stochastic_action_head import make_stockastic_action_head -from vel.rl.module.head.q_head import QHead +from vel.rl.module.q_stochastic_policy import QStochasticPolicy def select_indices(tensor, indices): @@ -14,47 +13,6 @@ def select_indices(tensor, indices): return tensor.gather(1, indices.unsqueeze(1)).squeeze() -class QStochasticPolicy(Network): - """ - A policy model with an action-value critic head (instead of more common state-value critic head). - Supports only discrete action spaces (ones that can be enumerated) - """ - - def __init__(self, net: BackboneNetwork, action_space: gym.Space): - super().__init__() - - assert isinstance(action_space, gym.spaces.Discrete) - - self.net = net - - (action_size, value_size) = self.net.size_hints().assert_tuple(2) - - self.action_head = make_stockastic_action_head( - input_dim=action_size.last(), - action_space=action_space - ) - - self.q_head = QHead( - input_dim=value_size.last(), - action_space=action_space - ) - - def reset_weights(self): - """ Initialize properly model weights """ - self.net.reset_weights() - self.action_head.reset_weights() - self.q_head.reset_weights() - - def forward(self, observations): - """ Calculate model outputs """ - action_hidden, q_hidden = self.net(observations) - policy_params = self.action_head(action_hidden) - - q = self.q_head(q_hidden) - - return policy_params, q - - class ACER(RlPolicy): """ Actor-Critic with Experience Replay - policy gradient calculations """ @@ -117,14 +75,16 @@ def update_target_policy(self): # EWMA average model update average_param.data.mul_(self.average_model_alpha).add_(model_param.data * (1 - self.average_model_alpha)) - def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: - """ Calculate loss of the supplied rollout """ - assert isinstance(rollout, Trajectories), "ACER algorithm requires trajectory input" - + def post_optimization_step(self, batch_info: BatchInfo, rollout: Rollout): + """ Optional operations to perform after optimization """ # We calculate the trust-region update with respect to the average model if self.trust_region: self.update_target_policy() + def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: + """ Calculate loss of the supplied rollout """ + assert isinstance(rollout, Trajectories), "ACER algorithm requires trajectory input" + local_epsilon = 1e-6 # Part 0.0 - Rollout values @@ -192,6 +152,7 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: if self.trust_region: with torch.no_grad(): + self.target_policy.eval() target_logprobs = self.target_policy(observations)[0] actor_loss = policy_loss - self.entropy_coefficient * policy_entropy diff --git a/vel/rl/policy/dqn.py b/vel/rl/policy/dqn.py new file mode 100644 index 00000000..e2460fe7 --- /dev/null +++ b/vel/rl/policy/dqn.py @@ -0,0 +1,165 @@ +import numbers + +import typing +import gym +import torch +import torch.nn.functional as F +import torch.nn.utils + +from vel.api import ModelFactory, BackboneNetwork, BatchInfo, Schedule +from vel.function.constant import ConstantSchedule +from vel.metric import AveragingNamedMetric +from vel.rl.api import RlPolicy, Rollout +from vel.rl.module.q_policy import QPolicy +from vel.rl.module.noise.eps_greedy import EpsGreedy + + +class DQN(RlPolicy): + """ Deep Q-Learning algorithm """ + + def __init__(self, net: BackboneNetwork, net_factory: ModelFactory, action_space: gym.Space, + epsilon: typing.Union[float, Schedule], discount_factor: float, double_dqn: bool, + dueling_dqn: bool, target_update_frequency: int): + super().__init__(discount_factor) + + self.model = QPolicy(net=net, action_space=action_space, dueling_dqn=dueling_dqn) + + self.double_dqn = double_dqn + self.target_update_frequency = target_update_frequency + + if isinstance(epsilon, numbers.Number): + self.epsilon_schedule = ConstantSchedule(epsilon) + else: + self.epsilon_schedule = epsilon + + self.epsilon_value = self.epsilon_schedule.value(0.0) + + self.action_noise = EpsGreedy(action_space=action_space) + + self.target_model = QPolicy(net=net_factory.instantiate(), action_space=action_space, dueling_dqn=dueling_dqn) + + def reset_weights(self): + """ Initialize properly model weights """ + self.model.reset_weights() + self.target_model.load_state_dict(self.model.state_dict()) + + def forward(self, observation, state=None): + """ Calculate model outputs """ + return self.model(observation) + + def act(self, observation, state=None, deterministic=False): + """ Select actions based on model's output """ + q_values = self.model(observation) + actions = self.model.q_head.sample(q_values) + noisy_actions = self.action_noise(actions, epsilon=self.epsilon_value, deterministic=deterministic) + + return { + 'actions': noisy_actions, + 'q': q_values + } + + def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: + """ Calculate loss of the supplied rollout """ + observations = rollout.batch_tensor('observations') + observations_next = rollout.batch_tensor('observations_next') + + actions = rollout.batch_tensor('actions') + dones_tensor = rollout.batch_tensor('dones') + rewards_tensor = rollout.batch_tensor('rewards') + + assert dones_tensor.dtype == torch.float32 + + q = self.model(observations) + + with torch.no_grad(): + self.target_model.eval() + target_q = self.target_model(observations_next) + + if self.double_dqn: + # DOUBLE DQN + model_q_next = self.model(observations_next) + # Select largest 'target' value based on action that 'model' selects + values = target_q.gather(1, model_q_next.argmax(dim=1, keepdim=True)).squeeze(1) + else: + # REGULAR DQN + # [0] is because in pytorch .max(...) returns tuple (max values, argmax) + values = target_q.max(dim=1)[0] + + forward_steps = rollout.extra_data.get('forward_steps', 1) + estimated_return = rewards_tensor + (self.discount_factor ** forward_steps) * values * (1 - dones_tensor) + + q_selected = q.gather(1, actions.unsqueeze(1)).squeeze(1) + + if rollout.has_tensor('weights'): + weights = rollout.batch_tensor('weights') + else: + weights = torch.ones_like(rewards_tensor) + + original_losses = F.smooth_l1_loss(q_selected, estimated_return, reduction='none') + + loss_value = torch.mean(weights * original_losses) + loss_value.backward() + + return { + 'loss': loss_value.item(), + # We need it to update priorities in the replay buffer: + 'errors': original_losses.detach().cpu().numpy(), + 'average_q_selected': torch.mean(q_selected).item(), + 'average_q_target': torch.mean(estimated_return).item() + } + + def post_optimization_step(self, batch_info, rollout): + """ Steps to take after optimization has been done""" + if batch_info.aggregate_batch_number % self.target_update_frequency == 0: + self.target_model.load_state_dict(self.model.state_dict()) + + self.epsilon_value = self.epsilon_schedule.value(batch_info['progress']) + + def metrics(self) -> list: + """ List of metrics to track for this learning process """ + return [ + AveragingNamedMetric("loss", scope="model"), + AveragingNamedMetric("average_q_selected", scope="model"), + AveragingNamedMetric("average_q_target", scope="model") + ] + + +class DQNFactory(ModelFactory): + def __init__(self, net_factory: ModelFactory, epsilon: typing.Union[float, Schedule], discount_factor: float, + target_update_frequency: int, double_dqn: bool = False, dueling_dqn: bool = False): + self.net_factory = net_factory + self.epsilon = epsilon + self.discount_factor = discount_factor + self.target_update_frequency = target_update_frequency + self.double_dqn = double_dqn + self.dueling_dqn = dueling_dqn + + def instantiate(self, **extra_args): + """ Instantiate the model """ + action_space = extra_args.pop('action_space') + net = self.net_factory.instantiate(**extra_args) + + return DQN( + net=net, + net_factory=self.net_factory, + action_space=action_space, + epsilon=self.epsilon, + discount_factor=self.discount_factor, + double_dqn=self.double_dqn, + dueling_dqn=self.dueling_dqn, + target_update_frequency=self.target_update_frequency + ) + + +def create(net: ModelFactory, epsilon: typing.Union[float, Schedule], discount_factor: float, + target_update_frequency: int, double_dqn: bool = False, dueling_dqn: bool = False): + """ Vel factory function """ + + return DQNFactory( + net_factory=net, + epsilon=epsilon, + discount_factor=discount_factor, + double_dqn=double_dqn, + dueling_dqn=dueling_dqn, + target_update_frequency=target_update_frequency, + ) diff --git a/vel/rl/policy/purgatory/dqn.py b/vel/rl/policy/purgatory/dqn.py deleted file mode 100644 index c6ea4933..00000000 --- a/vel/rl/policy/purgatory/dqn.py +++ /dev/null @@ -1,102 +0,0 @@ -import torch -import torch.nn.functional as F -import torch.nn.utils - -from vel.api import ModelFactory, BackboneModel -from vel.metric import AveragingNamedMetric -from vel.rl.api import RlPolicy - - -class DeepQLearning(RlPolicy): - """ Deep Q-Learning algorithm """ - - def __init__(self, backbone: BackboneModel, - discount_factor: float, double_dqn: bool, - target_update_frequency: int): - super().__init__(discount_factor) - - self.backbone = backbone - - self.double_dqn = double_dqn - self.target_update_frequency = target_update_frequency - - self.target_model = None - - def initialize(self, training_info, model, environment, device): - """ Initialize policy gradient from reinforcer settings """ - self.target_model = self.model_factory.instantiate(action_space=environment.action_space).to(device) - self.target_model.load_state_dict(model.state_dict()) - self.target_model.eval() - - def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: - """ Calculate loss of the supplied rollout """ - evaluator = model.evaluate(rollout) - - dones_tensor = evaluator.get('rollout:dones') - rewards_tensor = evaluator.get('rollout:rewards') - - assert dones_tensor.dtype == torch.float32 - - with torch.no_grad(): - target_evaluator = self.target_model.evaluate(rollout) - - if self.double_dqn: - # DOUBLE DQN - target_q = target_evaluator.get('model:q_next') - model_q = evaluator.get('model:q_next') - # Select largest 'target' value based on action that 'model' selects - values = target_q.gather(1, model_q.argmax(dim=1, keepdim=True)).squeeze(1) - else: - # REGULAR DQN - # [0] is because in pytorch .max(...) returns tuple (max values, argmax) - values = target_evaluator.get('model:q_next').max(dim=1)[0] - - forward_steps = rollout.extra_data.get('forward_steps', 1) - estimated_return = rewards_tensor + (self.discount_factor ** forward_steps) * values * (1 - dones_tensor) - - q_selected = evaluator.get('model:action:q') - - if evaluator.is_provided('rollout:weights'): - weights = evaluator.get('rollout:weights') - else: - weights = torch.ones_like(rewards_tensor) - - original_losses = F.smooth_l1_loss(q_selected, estimated_return, reduction='none') - - loss_value = torch.mean(weights * original_losses) - loss_value.backward() - - return { - 'loss': loss_value.item(), - # We need it to update priorities in the replay buffer: - 'errors': original_losses.detach().cpu().numpy(), - 'average_q_selected': torch.mean(q_selected).item(), - 'average_q_target': torch.mean(estimated_return).item() - } - - def post_optimization_step(self, batch_info, rollout): - """ Steps to take after optimization has been done""" - if batch_info.aggregate_batch_number % self.target_update_frequency == 0: - self.target_model.load_state_dict(self.state_dict()) - self.target_model.eval() - - def metrics(self) -> list: - """ List of metrics to track for this learning process """ - return [ - AveragingNamedMetric("loss", scope="model"), - AveragingNamedMetric("average_q_selected", scope="model"), - AveragingNamedMetric("average_q_target", scope="model") - ] - - -def create(backbone: ModelFactory, - discount_factor: float, target_update_frequency: int, - double_dqn: bool = False): - """ Vel factory function """ - - return DeepQLearning( - backbone=backbone, - discount_factor=discount_factor, - double_dqn=double_dqn, - target_update_frequency=target_update_frequency, - ) diff --git a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py index 11764c84..2a6b42e4 100644 --- a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py @@ -33,12 +33,12 @@ class BufferedMixedPolicyIterationReinforcer(Reinforcer): """ def __init__(self, device: torch.device, settings: BufferedMixedPolicyIterationReinforcerSettings, env: VecEnv, - model: Model, env_roller: ReplayEnvRollerBase) -> None: + policy: RlPolicy, env_roller: ReplayEnvRollerBase) -> None: self.device = device self.settings = settings self.environment = env - self._model: RlPolicy = model.to(self.device) + self._model: RlPolicy = policy.to(self.device) self.env_roller = env_roller @@ -110,7 +110,7 @@ def on_policy_train_batch(self, batch_info: BatchInfo): rollout = self.env_roller.rollout(batch_info, self.settings.number_of_steps).to_device(self.device) - # Preprocessing of the rollout for this algorithm + # Preprocessing of the rollout for this policy rollout = self.policy.process_rollout(rollout) batch_result = self.policy.optimize( @@ -155,8 +155,7 @@ def instantiate(self, device: torch.device) -> Reinforcer: return BufferedMixedPolicyIterationReinforcer(device, self.settings, env, policy, env_roller) -def create(model_config, model, vec_env, env_roller, - parallel_envs, number_of_steps, +def create(model_config, model, vec_env, env_roller, parallel_envs, number_of_steps, experience_replay=1, stochastic_experience_replay=True): """ Vel factory function """ settings = BufferedMixedPolicyIterationReinforcerSettings( diff --git a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py index be04f2d7..b5baaad5 100644 --- a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py @@ -7,8 +7,8 @@ from vel.api import TrainingInfo, EpochInfo, BatchInfo, Model, ModelFactory from vel.openai.baselines.common.vec_env import VecEnv from vel.rl.api import ( - Reinforcer, ReinforcerFactory, ReplayEnvRollerBase, AlgoBase, VecEnvFactory, ReplayEnvRollerFactoryBase -) + Reinforcer, ReinforcerFactory, ReplayEnvRollerBase, VecEnvFactory, ReplayEnvRollerFactoryBase, + RlPolicy) from vel.rl.metrics import ( FPSMetric, EpisodeLengthMetric, EpisodeRewardMetricQuantile, EpisodeRewardMetric, FramesMetric, ) @@ -32,13 +32,12 @@ class BufferedOffPolicyIterationReinforcer(Reinforcer): Afterwards, it samples experience batches from this buffer to train the policy. """ def __init__(self, device: torch.device, settings: BufferedOffPolicyIterationReinforcerSettings, - environment: VecEnv, model: Model, algo: AlgoBase, env_roller: ReplayEnvRollerBase): + environment: VecEnv, policy: RlPolicy, env_roller: ReplayEnvRollerBase): self.device = device self.settings = settings self.environment = environment - self._trained_model = model.to(self.device) - self.algo = algo + self._policy = policy.to(self.device) self.env_roller = env_roller @@ -53,11 +52,11 @@ def metrics(self) -> list: EpisodeLengthMetric("episode_length") ] - return my_metrics + self.algo.metrics() + self.env_roller.metrics() + return my_metrics + self.policy.metrics() + self.env_roller.metrics() @property def policy(self) -> Model: - return self._trained_model + return self._policy def initialize_training(self, training_info: TrainingInfo, model_state=None, hidden_state=None): """ Prepare models for training """ @@ -66,10 +65,6 @@ def initialize_training(self, training_info: TrainingInfo, model_state=None, hid else: self.policy.reset_weights() - self.algo.initialize( - training_info=training_info, model=self.policy, environment=self.environment, device=self.device - ) - def train_epoch(self, epoch_info: EpochInfo, interactive=True) -> None: """ Train model for a single epoch """ epoch_info.on_epoch_begin() @@ -96,14 +91,14 @@ def train_batch(self, batch_info: BatchInfo) -> None: For this reinforforcer, that involves: 1. Roll out environment and store out experience in the buffer - 2. Sample the buffer and train the algo on sample batch + 2. Sample the buffer and train the policy on sample batch """ # For each reinforcer batch: # 1. Roll out environment and store out experience in the buffer self.roll_out_and_store(batch_info) - # 2. Sample the buffer and train the algo on sample batch + # 2. Sample the buffer and train the policy on sample batch self.train_on_replay_memory(batch_info) def roll_out_and_store(self, batch_info): @@ -111,7 +106,7 @@ def roll_out_and_store(self, batch_info): self.policy.train() if self.env_roller.is_ready_for_sampling(): - rollout = self.env_roller.rollout(batch_info, self.policy, self.settings.rollout_steps) + rollout = self.env_roller.rollout(batch_info, self.settings.rollout_steps) rollout = rollout.to_device(self.device) # Store some information about the rollout, no training phase @@ -123,7 +118,7 @@ def roll_out_and_store(self, batch_info): with tqdm.tqdm(desc="Populating memory", total=self.env_roller.initial_memory_size_hint()) as pbar: while not self.env_roller.is_ready_for_sampling(): - rollout = self.env_roller.rollout(batch_info, self.policy, self.settings.rollout_steps) + rollout = self.env_roller.rollout(batch_info, self.settings.rollout_steps) rollout = rollout.to_device(self.device) new_frames = rollout.frames() @@ -144,12 +139,10 @@ def train_on_replay_memory(self, batch_info): batch_info['sub_batch_data'] = [] for i in range(self.settings.training_rounds): - sampled_rollout = self.env_roller.sample(batch_info, self.policy, self.settings.training_steps) + sampled_rollout = self.env_roller.sample(batch_info, self.settings.training_steps) - batch_result = self.algo.optimize( + batch_result = self.policy.optimize( batch_info=batch_info, - device=self.device, - model=self.policy, rollout=sampled_rollout.to_device(self.device) ) @@ -164,32 +157,30 @@ class BufferedOffPolicyIterationReinforcerFactory(ReinforcerFactory): """ Factory class for the DQN reinforcer """ def __init__(self, settings, env_factory: VecEnvFactory, model_factory: ModelFactory, - algo: AlgoBase, env_roller_factory: ReplayEnvRollerFactoryBase, parallel_envs: int, seed: int): + env_roller_factory: ReplayEnvRollerFactoryBase, parallel_envs: int, seed: int): self.settings = settings self.env_factory = env_factory self.model_factory = model_factory - self.algo = algo self.env_roller_factory = env_roller_factory self.parallel_envs = parallel_envs self.seed = seed def instantiate(self, device: torch.device) -> BufferedOffPolicyIterationReinforcer: env = self.env_factory.instantiate(parallel_envs=self.parallel_envs, seed=self.seed) - env_roller = self.env_roller_factory.instantiate(env, device) - model = self.model_factory.instantiate(action_space=env.action_space) + policy = self.model_factory.instantiate(action_space=env.action_space) + env_roller = self.env_roller_factory.instantiate(environment=env, policy=policy, device=device) return BufferedOffPolicyIterationReinforcer( device=device, settings=self.settings, environment=env, - model=model, - algo=self.algo, + policy=policy, env_roller=env_roller ) -def create(model_config, vec_env, model, algo, env_roller, parallel_envs: int, +def create(model_config, vec_env, model, env_roller, parallel_envs: int, rollout_steps: int, training_steps: int, training_rounds: int = 1): """ Vel factory function """ settings = BufferedOffPolicyIterationReinforcerSettings( @@ -202,7 +193,6 @@ def create(model_config, vec_env, model, algo, env_roller, parallel_envs: int, settings=settings, env_factory=vec_env, model_factory=model, - algo=algo, env_roller_factory=env_roller, parallel_envs=parallel_envs, seed=model_config.seed From aa2905e4177db8fe565e722a0a241ad36220320d Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Wed, 2 Oct 2019 21:57:43 -0700 Subject: [PATCH 106/162] Revived the Rainbow. --- examples-configs/rl/atari/atari_ddqn.yaml | 2 +- .../atari_rainbow.yaml | 51 ++-- examples-configs/rl/atari/dqn/atari_ddqn.yaml | 78 ------ .../atari/dqn/atari_dqn_distributional.yaml | 90 ------- .../rl/atari/dqn/atari_dqn_raw.yaml | 86 ------- .../rl/atari/dqn/atari_dueling_ddqn.yaml | 79 ------ .../atari_rp_dqn_distributional.yaml | 93 ------- .../atari_rp_dqn_noisynet.yaml | 86 ------- .../dqn_rainbow_param/atari_rp_dqn_nstep.yaml | 91 ------- .../dqn_rainbow_param/atari_rp_dqn_raw.yaml | 88 ------- .../rl/atari/purgatory/atari_ddqn.yaml | 0 .../rl/atari/purgatory/atari_rainbow.yaml | 0 vel/rl/env_roller/step_env_roller.py | 1 + .../trajectory_replay_env_roller.py | 1 + .../transition_replay_env_roller.py | 31 +-- vel/rl/layer/double_nature_cnn.py | 2 +- .../double_noisy_nature_cnn.py | 60 +++-- .../q_distributional_noisy_dueling_head.py | 7 +- vel/rl/module/noisy_linear.py | 2 +- vel/rl/module/rainbow_policy.py | 67 +++++ vel/rl/policy/acer.py | 1 - vel/rl/policy/dqn.py | 1 - vel/rl/policy/purgatory/distributional_dqn.py | 191 -------------- vel/rl/policy/rainbow.py | 240 ++++++++++++++++++ ...fered_mixed_policy_iteration_reinforcer.py | 6 +- ...uffered_off_policy_iteration_reinforcer.py | 2 - .../on_policy_iteration_reinforcer.py | 5 +- vel/rl/util/actor.py | 10 + vel/rl/xpolicy/purgatory/q_rainbow_model.py | 110 -------- 29 files changed, 405 insertions(+), 1076 deletions(-) rename examples-configs/rl/atari/{dqn_rainbow_param => }/atari_rainbow.yaml (64%) delete mode 100644 examples-configs/rl/atari/dqn/atari_ddqn.yaml delete mode 100644 examples-configs/rl/atari/dqn/atari_dqn_distributional.yaml delete mode 100644 examples-configs/rl/atari/dqn/atari_dqn_raw.yaml delete mode 100644 examples-configs/rl/atari/dqn/atari_dueling_ddqn.yaml delete mode 100644 examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_distributional.yaml delete mode 100644 examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_noisynet.yaml delete mode 100644 examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_nstep.yaml delete mode 100644 examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_raw.yaml delete mode 100644 examples-configs/rl/atari/purgatory/atari_ddqn.yaml delete mode 100644 examples-configs/rl/atari/purgatory/atari_rainbow.yaml rename vel/rl/layer/{purgatory => }/double_noisy_nature_cnn.py (64%) create mode 100644 vel/rl/module/rainbow_policy.py delete mode 100644 vel/rl/policy/purgatory/distributional_dqn.py create mode 100644 vel/rl/policy/rainbow.py delete mode 100644 vel/rl/xpolicy/purgatory/q_rainbow_model.py diff --git a/examples-configs/rl/atari/atari_ddqn.yaml b/examples-configs/rl/atari/atari_ddqn.yaml index ecbf6544..d4b7430b 100644 --- a/examples-configs/rl/atari/atari_ddqn.yaml +++ b/examples-configs/rl/atari/atari_ddqn.yaml @@ -17,7 +17,6 @@ model: double_dqn: true dueling_dqn: true target_update_frequency: 10_000 # After how many batches to update the target network - max_grad_norm: 0.5 discount_factor: 0.99 @@ -71,6 +70,7 @@ optimizer: alpha: 0.95 momentum: 0.95 epsilon: 1.0e-1 + max_grad_norm: 0.5 commands: diff --git a/examples-configs/rl/atari/dqn_rainbow_param/atari_rainbow.yaml b/examples-configs/rl/atari/atari_rainbow.yaml similarity index 64% rename from examples-configs/rl/atari/dqn_rainbow_param/atari_rainbow.yaml rename to examples-configs/rl/atari/atari_rainbow.yaml index 9e8a92cb..a11c3afc 100644 --- a/examples-configs/rl/atari/dqn_rainbow_param/atari_rainbow.yaml +++ b/examples-configs/rl/atari/atari_rainbow.yaml @@ -14,7 +14,11 @@ vec_env: model: - name: vel.rl.models.q_rainbow_model + name: vel.rl.policy.rainbow + + target_update_frequency: 32_000 # After how many batches to update the target network + + discount_factor: 0.99 atoms: 51 # 51 bins for Distributional DQN vmin: -10.0 @@ -23,31 +27,28 @@ model: initial_std_dev: 0.5 factorized_noise: true - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.double_noisy_nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - initial_std_dev: 0.5 - factorized_noise: true + net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.image_to_tensor + size: [84, 84, 4] # Number of channels is frame history + - name: vel.rl.layer.double_noisy_nature_cnn + # TODO(this should ideally be brough from level up) + initial_std_dev: 0.5 + factorized_noise: true reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer + name: vel.rl.reinforcer.buffered_off_policy_iteration_reinforcer env_roller: name: vel.rl.env_roller.transition_replay_env_roller # N-Step Q-Learning forward_steps: 3 - discount_factor: 0.99 replay_buffer: - name: vel.rl.buffers.prioritized_circular_replay_buffer + name: vel.rl.buffer.prioritized_circular_replay_buffer buffer_initial_size: 80_000 # How many samples we need in the buffer before we start using replay buffer buffer_capacity: 1_000_000 @@ -58,45 +59,37 @@ reinforcer: priority_exponent: 0.5 priority_weight: - name: vel.schedules.linear + name: vel.function.linear initial_value: 0.4 final_value: 1.0 priority_epsilon: 1.0e-6 - algo: - name: vel.rl.algo.distributional_dqn - double_dqn: true - - target_update_frequency: 32_000 # After how many batches to update the target network - max_grad_norm: 0.5 - - discount_factor: 0.99 - rollout_steps: 4 # How many environment steps (per env) to perform per batch of training training_steps: 32 # How many environment steps (per env) to perform per training round parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper optimizer: - name: vel.optimizers.adam + name: vel.optimizer.adam lr: 6.25e-05 epsilon: 1.5e-4 + max_grad_norm: 0.5 commands: train: - name: vel.rl.commands.rl_train_command + name: vel.rl.command.rl_train_command total_frames: 1.1e7 # 11M batches_per_epoch: 2500 record: - name: vel.rl.commands.record_movie_command + name: vel.rl.command.record_movie_command takes: 10 videoname: 'atari_rainbow_vid_{:04}.avi' fps: 15 evaluate: - name: vel.rl.commands.evaluate_env_command + name: vel.rl.command.evaluate_env_command parallel_envs: 12 takes: 20 diff --git a/examples-configs/rl/atari/dqn/atari_ddqn.yaml b/examples-configs/rl/atari/dqn/atari_ddqn.yaml deleted file mode 100644 index 667ce429..00000000 --- a/examples-configs/rl/atari/dqn/atari_ddqn.yaml +++ /dev/null @@ -1,78 +0,0 @@ -name: 'atari_ddqn' - - -env: - name: vel.rl.env.classic_atari - game: !param game = 'BreakoutNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.dummy - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.transition_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 30_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 250_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - action_noise: - name: vel.rl.modules.noise.eps_greedy - - epsilon: - name: vel.schedules.linear_and_constant - end_of_interpolation: 0.1 - initial_value: 1.0 - final_value: 0.1 - - algo: - name: vel.rl.algo.dqn - - double_dqn: true - target_update_frequency: 10_000 # After how many batches to update the target network - max_grad_norm: 0.5 - - discount_factor: 0.99 - - rollout_steps: 4 # How many environment steps (per env) to perform per batch of training - training_steps: 32 # How many environment steps (per env) to perform per training round - parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper - - -optimizer: - name: vel.optimizers.rmsprop - lr: 2.5e-4 - alpha: 0.95 - momentum: 0.95 - epsilon: 1.0e-1 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 # 11M - batches_per_epoch: 2500 diff --git a/examples-configs/rl/atari/dqn/atari_dqn_distributional.yaml b/examples-configs/rl/atari/dqn/atari_dqn_distributional.yaml deleted file mode 100644 index dd5b62f3..00000000 --- a/examples-configs/rl/atari/dqn/atari_dqn_distributional.yaml +++ /dev/null @@ -1,90 +0,0 @@ -name: 'atari_dqn_distributional' - - -env: - name: vel.rl.env.classic_atari - game: !param game = 'BreakoutNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.dummy - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.distributional_q_model - - atoms: 51 # 51 bins for Distributional DQN - vmin: -10.0 - vmax: 10.0 - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.transition_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 30_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 250_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - action_noise: - name: vel.rl.modules.noise.eps_greedy - - epsilon: - name: vel.schedules.linear_and_constant - end_of_interpolation: 0.1 - initial_value: 1.0 - final_value: 0.1 - - algo: - name: vel.rl.algo.distributional_dqn - - target_update_frequency: 10_000 # After how many batches to update the target network - max_grad_norm: 0.5 - - discount_factor: 0.99 - - rollout_steps: 4 # How many environment steps (per env) to perform per batch of training - training_steps: 32 # How many environment steps (per env) to perform per training round - parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper - - -optimizer: - name: vel.optimizers.rmsprop - lr: 2.5e-4 - alpha: 0.95 - momentum: 0.95 - epsilon: 1.0e-1 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 # 11M - batches_per_epoch: 2500 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'atari_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - takes: 100 diff --git a/examples-configs/rl/atari/dqn/atari_dqn_raw.yaml b/examples-configs/rl/atari/dqn/atari_dqn_raw.yaml deleted file mode 100644 index a32427bd..00000000 --- a/examples-configs/rl/atari/dqn/atari_dqn_raw.yaml +++ /dev/null @@ -1,86 +0,0 @@ -name: 'atari_dqn_raw' - - -env: - name: vel.rl.env.classic_atari - game: !param game = 'BreakoutNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.dummy - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.transition_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 30_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 250_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - action_noise: - name: vel.rl.modules.noise.eps_greedy - - epsilon: - name: vel.schedules.linear_and_constant - end_of_interpolation: 0.1 - initial_value: 1.0 - final_value: 0.1 - - algo: - name: vel.rl.algo.dqn - - target_update_frequency: 10_000 # After how many batches to update the target network - max_grad_norm: 0.5 - - discount_factor: 0.99 - - rollout_steps: 4 # How many environment steps (per env) to perform per batch of training - training_steps: 32 # How many environment steps (per env) to perform per training round - parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper - - -optimizer: - name: vel.optimizers.rmsprop - lr: 2.5e-4 - alpha: 0.95 - momentum: 0.95 - epsilon: 1.0e-1 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 # 11M - batches_per_epoch: 2500 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'atari_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - takes: 100 diff --git a/examples-configs/rl/atari/dqn/atari_dueling_ddqn.yaml b/examples-configs/rl/atari/dqn/atari_dueling_ddqn.yaml deleted file mode 100644 index a5a225a9..00000000 --- a/examples-configs/rl/atari/dqn/atari_dueling_ddqn.yaml +++ /dev/null @@ -1,79 +0,0 @@ -name: 'atari_dueling_ddqn' - - -env: - name: vel.rl.env.classic_atari - game: !param game = 'BreakoutNoFrameskip-v4' - - -vec_env: - name: vel.rl.vecenv.dummy - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_dueling_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.double_nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.transition_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 30_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 250_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - action_noise: - name: vel.rl.modules.noise.eps_greedy - - epsilon: - name: vel.schedules.linear_and_constant - end_of_interpolation: 0.1 - initial_value: 1.0 - final_value: 0.1 - - algo: - name: vel.rl.algo.dqn - - double_dqn: true - target_update_frequency: 10_000 # After how many batches to update the target network - max_grad_norm: 0.5 - - discount_factor: 0.99 - - rollout_steps: 4 # How many environment steps to perform per batch of training - training_steps: 32 # How many environment steps (per env) to perform per training round - parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper - - - -optimizer: - name: vel.optimizers.rmsprop - lr: 2.5e-4 - alpha: 0.95 - momentum: 0.95 - epsilon: 1.0e-1 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.1e7 # 11M - batches_per_epoch: 2500 diff --git a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_distributional.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_distributional.yaml deleted file mode 100644 index 20fabbd6..00000000 --- a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_distributional.yaml +++ /dev/null @@ -1,93 +0,0 @@ -name: 'atari_dqn_distributional' - - -env: - name: vel.rl.env.classic_atari - game: !param game = 'BreakoutNoFrameskip-v4' - settings: - max_episode_frames: 108_000 - - -vec_env: - name: vel.rl.vecenv.dummy - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_distributional_model - - atoms: 51 # 51 bins for Distributional DQN - vmin: -10.0 - vmax: 10.0 - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.transition_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 80_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 1_000_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - action_noise: - name: vel.rl.modules.noise.eps_greedy - - epsilon: - name: vel.schedules.linear_and_constant - end_of_interpolation: 0.1 - initial_value: 1.0 - final_value: 0.1 - - algo: - name: vel.rl.algo.distributional_dqn - - target_update_frequency: 10_000 # After how many batches to update the target network - max_grad_norm: 0.5 - - discount_factor: 0.99 - - rollout_steps: 4 # How many environment steps (per env) to perform per batch of training - training_steps: 32 # How many environment steps (per env) to perform per training round - parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper - - -optimizer: - name: vel.optimizers.adam - lr: 6.25e-05 - epsilon: 1.5e-4 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 5.0e7 # 50M - batches_per_epoch: 2500 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - fps: 15 - videoname: 'atari_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 12 - takes: 20 - diff --git a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_noisynet.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_noisynet.yaml deleted file mode 100644 index 822e3085..00000000 --- a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_noisynet.yaml +++ /dev/null @@ -1,86 +0,0 @@ -name: 'atari_rp_dqn_noisynet' - - -env: - name: vel.rl.env.classic_atari - game: !param game = 'BreakoutNoFrameskip-v4' - settings: - max_episode_frames: 108_000 - - -vec_env: - name: vel.rl.vecenv.dummy - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.noisy_q_model - - initial_std_dev: 0.5 - factorized_noise: true - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.noisy_nature_cnn - - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - initial_std_dev: 0.5 - factorized_noise: true - - -reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.transition_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 80_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 1_000_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - algo: - name: vel.rl.algo.dqn - - target_update_frequency: 32_000 # After how many batches to update the target network - max_grad_norm: 0.5 - - discount_factor: 0.99 - - rollout_steps: 4 # How many environment steps (per env) to perform per batch of training - training_steps: 32 # How many environment steps (per env) to perform per training round - parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper - - -optimizer: - name: vel.optimizers.adam - lr: 6.25e-05 - epsilon: 1.5e-4 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 5.0e7 # 50M - batches_per_epoch: 2500 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'atari_vid_{:04}.avi' - fps: 15 - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 12 - takes: 20 diff --git a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_nstep.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_nstep.yaml deleted file mode 100644 index af118e3d..00000000 --- a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_nstep.yaml +++ /dev/null @@ -1,91 +0,0 @@ -name: 'atari_rp_dqn_nstep' - - -env: - name: vel.rl.env.classic_atari - game: !param game = 'BreakoutNoFrameskip-v4' - settings: - max_episode_frames: 108_000 - - -vec_env: - name: vel.rl.vecenv.dummy - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.transition_replay_env_roller - - # N-Step Q-Learning - forward_steps: 3 - discount_factor: 0.99 - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 80_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 1_000_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - action_noise: - name: vel.rl.modules.noise.eps_greedy - - epsilon: - name: vel.schedules.linear_and_constant - end_of_interpolation: 0.1 - initial_value: 1.0 - final_value: 0.1 - - algo: - name: vel.rl.algo.dqn - - target_update_frequency: 32_000 # After how many batches to update the target network - max_grad_norm: 0.5 - - discount_factor: 0.99 - - rollout_steps: 4 # How many environment steps (per env) to perform per batch of training - training_steps: 32 # How many environment steps (per env) to perform per training round - parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper - - -optimizer: - name: vel.optimizers.adam - lr: 6.25e-05 - epsilon: 1.5e-4 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 5.0e7 # 50M - batches_per_epoch: 2500 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'atari_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 12 - takes: 100 diff --git a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_raw.yaml b/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_raw.yaml deleted file mode 100644 index 8e7272b8..00000000 --- a/examples-configs/rl/atari/dqn_rainbow_param/atari_rp_dqn_raw.yaml +++ /dev/null @@ -1,88 +0,0 @@ -name: 'atari_rp_dqn_raw' - - -env: - name: vel.rl.env.classic_atari - game: !param game = 'BreakoutNoFrameskip-v4' - settings: - max_episode_frames: 108_000 - - -vec_env: - name: vel.rl.vecenv.dummy - frame_history: 4 # How many stacked frames go into a single observation - - -model: - name: vel.rl.models.q_model - - input_block: - name: vel.modules.input.image_to_tensor - - backbone: - name: vel.rl.models.backbone.nature_cnn - input_width: 84 - input_height: 84 - input_channels: 4 # The same as frame_history - - -reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.transition_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_initial_size: 80_000 # How many samples we need in the buffer before we start using replay buffer - buffer_capacity: 1_000_000 - - # Because env has a framestack already built-in, save memory by encoding only last frames in the replay buffer - frame_stack_compensation: true - frame_history: 4 # How many stacked frames go into a single observation - - action_noise: - name: vel.rl.modules.noise.eps_greedy - - epsilon: - name: vel.schedules.linear_and_constant - end_of_interpolation: 0.1 - initial_value: 1.0 - final_value: 0.1 - - algo: - name: vel.rl.algo.dqn - - target_update_frequency: 32_000 # After how many batches to update the target network - max_grad_norm: 0.5 - - discount_factor: 0.99 - - rollout_steps: 4 # How many environment steps (per env) to perform per batch of training - training_steps: 32 # How many environment steps (per env) to perform per training round - parallel_envs: 1 # Roll out only one env in parallel, just like in DeepMind paper - - -optimizer: - name: vel.optimizers.adam - lr: 6.25e-05 - epsilon: 1.5e-4 - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 5.0e7 # 50M - batches_per_epoch: 2500 - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'atari_vid_{:04}.avi' - fps: 15 - - evaluate: - name: vel.rl.commands.evaluate_env_command - parallel_envs: 12 - takes: 20 diff --git a/examples-configs/rl/atari/purgatory/atari_ddqn.yaml b/examples-configs/rl/atari/purgatory/atari_ddqn.yaml deleted file mode 100644 index e69de29b..00000000 diff --git a/examples-configs/rl/atari/purgatory/atari_rainbow.yaml b/examples-configs/rl/atari/purgatory/atari_rainbow.yaml deleted file mode 100644 index e69de29b..00000000 diff --git a/vel/rl/env_roller/step_env_roller.py b/vel/rl/env_roller/step_env_roller.py index 2b959f6e..bb70e1c5 100644 --- a/vel/rl/env_roller/step_env_roller.py +++ b/vel/rl/env_roller/step_env_roller.py @@ -30,6 +30,7 @@ def environment(self): @torch.no_grad() def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: """ Calculate env rollout """ + self.actor.train() accumulator = TensorAccumulator() episode_information = [] # List of dictionaries with episode information diff --git a/vel/rl/env_roller/trajectory_replay_env_roller.py b/vel/rl/env_roller/trajectory_replay_env_roller.py index 7e347edb..f294f4e0 100644 --- a/vel/rl/env_roller/trajectory_replay_env_roller.py +++ b/vel/rl/env_roller/trajectory_replay_env_roller.py @@ -37,6 +37,7 @@ def environment(self): @torch.no_grad() def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: """ Calculate env rollout """ + self.actor.train() accumulator = TensorAccumulator() episode_information = [] # List of dictionaries with episode information diff --git a/vel/rl/env_roller/transition_replay_env_roller.py b/vel/rl/env_roller/transition_replay_env_roller.py index 5cf7a738..dc25b676 100644 --- a/vel/rl/env_roller/transition_replay_env_roller.py +++ b/vel/rl/env_roller/transition_replay_env_roller.py @@ -1,9 +1,8 @@ import torch -import torch.nn as nn import typing import numpy as np -from vel.api import BatchInfo, ModelFactory +from vel.api import BatchInfo from vel.openai.baselines.common.vec_env import VecEnv from vel.openai.baselines.common.running_mean_std import RunningMeanStd from vel.rl.api import ( @@ -21,26 +20,16 @@ class TransitionReplayEnvRoller(ReplayEnvRollerBase): """ def __init__(self, environment: VecEnv, policy: RlPolicy, device: torch.device, replay_buffer: ReplayBuffer, - discount_factor: typing.Optional[float] = None, normalize_returns: bool = False, - forward_steps: int = 1): + normalize_returns: bool = False, forward_steps: int = 1): self._environment = environment self.device = device self.replay_buffer = replay_buffer self.normalize_returns = normalize_returns self.forward_steps = forward_steps - self.discount_factor = discount_factor self.actor = PolicyActor(self.environment.num_envs, policy, device) assert not self.actor.is_stateful, "Does not support stateful policies" - if self.normalize_returns: - assert self.discount_factor is not None, \ - "TransitionReplayEnvRoller must have a discount factor defined if normalize_returns is turned on" - - if self.forward_steps > 1: - assert self.discount_factor is not None, \ - "TransitionReplayEnvRoller must have a discount factor defined if forward_steps is larger than one" - self.ret_rms = RunningMeanStd(shape=()) if normalize_returns else None # Initial observation @@ -59,6 +48,8 @@ def environment(self): @torch.no_grad() def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: """ Calculate env rollout """ + self.actor.train() + accumulator = TensorAccumulator() episode_information = [] # List of dictionaries with episode information @@ -90,7 +81,7 @@ def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: ) if self.ret_rms is not None: - self.accumulated_returns = new_rewards + self.discount_factor * self.accumulated_returns + self.accumulated_returns = new_rewards + self.actor.discount_factor * self.accumulated_returns self.ret_rms.update(self.accumulated_returns) # Done is flagged true when the episode has ended AND the frame we see is already a first frame from the @@ -125,7 +116,7 @@ def sample(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: if self.forward_steps > 1: transitions = self.replay_buffer.sample_forward_transitions( batch_size=number_of_steps, batch_info=batch_info, forward_steps=self.forward_steps, - discount_factor=self.discount_factor + discount_factor=self.actor.discount_factor ) else: transitions = self.replay_buffer.sample_transitions(batch_size=number_of_steps, batch_info=batch_info) @@ -153,12 +144,11 @@ def update(self, rollout, batch_info): class TransitionReplayEnvRollerFactory(ReplayEnvRollerFactoryBase): """ Factory for the ReplayEnvRoller """ - def __init__(self, replay_buffer_factory: ReplayBufferFactory, discount_factor: typing.Optional[float] = None, - normalize_returns: bool = False, forward_steps: int = 1): + def __init__(self, replay_buffer_factory: ReplayBufferFactory, normalize_returns: bool = False, + forward_steps: int = 1): self.replay_buffer_factory = replay_buffer_factory self.normalize_returns = normalize_returns self.forward_steps = forward_steps - self.discount_factor = discount_factor def instantiate(self, environment, policy, device): replay_buffer = self.replay_buffer_factory.instantiate(environment) @@ -168,18 +158,15 @@ def instantiate(self, environment, policy, device): policy=policy, device=device, replay_buffer=replay_buffer, - discount_factor=self.discount_factor, normalize_returns=self.normalize_returns, forward_steps=self.forward_steps ) -def create(replay_buffer, discount_factor: typing.Optional[float] = None, normalize_returns: bool = False, - forward_steps: int = 1): +def create(replay_buffer, normalize_returns: bool = False, forward_steps: int = 1): """ Vel factory function """ return TransitionReplayEnvRollerFactory( replay_buffer_factory=replay_buffer, - discount_factor=discount_factor, forward_steps=forward_steps, normalize_returns=normalize_returns ) diff --git a/vel/rl/layer/double_nature_cnn.py b/vel/rl/layer/double_nature_cnn.py index 54599e9e..2e269783 100644 --- a/vel/rl/layer/double_nature_cnn.py +++ b/vel/rl/layer/double_nature_cnn.py @@ -108,7 +108,7 @@ def __init__(self, output_dim: int = 512): @property def name_base(self) -> str: """ Base of layer name """ - return "nature_cnn" + return "double_nature_cnn" def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: (b, c, w, h) = direct_input.assert_single(4) diff --git a/vel/rl/layer/purgatory/double_noisy_nature_cnn.py b/vel/rl/layer/double_noisy_nature_cnn.py similarity index 64% rename from vel/rl/layer/purgatory/double_noisy_nature_cnn.py rename to vel/rl/layer/double_noisy_nature_cnn.py index 7c31e719..25299baf 100644 --- a/vel/rl/layer/purgatory/double_noisy_nature_cnn.py +++ b/vel/rl/layer/double_noisy_nature_cnn.py @@ -12,20 +12,22 @@ import vel.util.network as net_util -from vel.api import LinearBackboneModel, ModelFactory +from vel.api import SizeHints, SizeHint + +from vel.net.layer_base import Layer, LayerFactory from vel.rl.module.noisy_linear import NoisyLinear -class DoubleNoisyNatureCnn(LinearBackboneModel): +class DoubleNoisyNatureCnn(Layer): """ Neural network as defined in the paper 'Human-level control through deep reinforcement learning' but with two separate heads and "noisy" linear layer. """ - def __init__(self, input_width, input_height, input_channels, output_dim=512, initial_std_dev=0.4, + def __init__(self, name: str, input_width, input_height, input_channels, output_dim=512, initial_std_dev=0.4, factorized_noise=True): - super().__init__() + super().__init__(name) - self._output_dim = output_dim + self.output_dim = output_dim self.conv1 = nn.Conv2d( in_channels=input_channels, @@ -76,10 +78,11 @@ def __init__(self, input_width, input_height, input_channels, output_dim=512, in factorized_noise=factorized_noise ) - @property - def output_dim(self) -> int: - """ Final dimension of model output """ - return self._output_dim + def size_hints(self) -> SizeHints: + return SizeHints(( + SizeHint(None, self.output_dim), + SizeHint(None, self.output_dim) + )) def reset_weights(self): for m in self.modules(): @@ -94,7 +97,7 @@ def reset_weights(self): elif isinstance(m, NoisyLinear): m.reset_weights() - def forward(self, image): + def forward(self, image, state: dict = None, context: dict = None): result = image result = F.relu(self.conv1(result)) result = F.relu(self.conv2(result)) @@ -107,12 +110,37 @@ def forward(self, image): return output_one, output_two -def create(input_width, input_height, input_channels=1, output_dim=512, initial_std_dev=0.4, factorized_noise=True): - """ Vel factory function """ - def instantiate(**_): +class DoubleNoisyNatureCnnFactory(LayerFactory): + """ Nature Cnn Network Factory """ + + def __init__(self, initial_std_dev: float = 0.4, factorized_noise: bool = True, output_dim: int = 512): + self.initial_std_dev = initial_std_dev + self.factorized_noise = factorized_noise + self.output_dim = output_dim + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "double_noisy_nature_cnn" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + (b, c, w, h) = direct_input.assert_single(4) + return DoubleNoisyNatureCnn( - input_width=input_width, input_height=input_height, input_channels=input_channels, - output_dim=output_dim, initial_std_dev=initial_std_dev, factorized_noise=factorized_noise + name=name, + input_width=w, + input_height=h, + input_channels=c, + output_dim=self.output_dim, + initial_std_dev=self.initial_std_dev, + factorized_noise=self.factorized_noise ) - return ModelFactory.generic(instantiate) + +def create(initial_std_dev: float = 0.4, factorized_noise: bool = True, output_dim: int = 512): + """ Vel factory function """ + return DoubleNoisyNatureCnnFactory( + output_dim=output_dim, + initial_std_dev=initial_std_dev, + factorized_noise=factorized_noise + ) diff --git a/vel/rl/module/head/q_distributional_noisy_dueling_head.py b/vel/rl/module/head/q_distributional_noisy_dueling_head.py index 3e0f2794..59a22ac8 100644 --- a/vel/rl/module/head/q_distributional_noisy_dueling_head.py +++ b/vel/rl/module/head/q_distributional_noisy_dueling_head.py @@ -10,7 +10,7 @@ class QDistributionalNoisyDuelingHead(nn.Module): """ Network head calculating Q-function value for each (discrete) action. """ - def __init__(self, input_dim, action_space, vmin: float, vmax: float, atoms: int = 1, + def __init__(self, val_input_dim, adv_input_dim, action_space, vmin: float, vmax: float, atoms: int = 1, initial_std_dev: float = 0.4, factorized_noise: bool = True): super().__init__() @@ -28,11 +28,12 @@ def __init__(self, input_dim, action_space, vmin: float, vmax: float, atoms: int self.atom_delta = (self.vmax - self.vmin) / (self.atoms - 1) self.linear_layer_advantage = NoisyLinear( - input_dim, self.action_size * self.atoms, initial_std_dev=initial_std_dev, factorized_noise=factorized_noise + adv_input_dim, self.action_size * self.atoms, initial_std_dev=initial_std_dev, + factorized_noise=factorized_noise ) self.linear_layer_value = NoisyLinear( - input_dim, self.atoms, initial_std_dev=initial_std_dev, factorized_noise=factorized_noise + val_input_dim, self.atoms, initial_std_dev=initial_std_dev, factorized_noise=factorized_noise ) self.register_buffer('support_atoms', torch.linspace(self.vmin, self.vmax, self.atoms)) diff --git a/vel/rl/module/noisy_linear.py b/vel/rl/module/noisy_linear.py index 1fdea082..2b94c90c 100644 --- a/vel/rl/module/noisy_linear.py +++ b/vel/rl/module/noisy_linear.py @@ -82,5 +82,5 @@ def extra_repr(self): """ return ( f'{self.in_features}, {self.out_features}, initial_std_dev={self.initial_std_dev}, ' - 'factorized_noise={self.factorized_noise} ' + f'factorized_noise={self.factorized_noise} ' ) diff --git a/vel/rl/module/rainbow_policy.py b/vel/rl/module/rainbow_policy.py new file mode 100644 index 00000000..a61ef126 --- /dev/null +++ b/vel/rl/module/rainbow_policy.py @@ -0,0 +1,67 @@ +import gym +import torch + +from vel.api import Network, BackboneNetwork +from vel.rl.module.head.q_distributional_noisy_dueling_head import QDistributionalNoisyDuelingHead + + +class RainbowPolicy(Network): + """ + A deterministic greedy action-value model. + Includes following commonly known modifications: + - Distributional Q-Learning + - Dueling architecture + - Noisy Nets + """ + + def __init__(self, net: BackboneNetwork, action_space: gym.Space, vmin: float, vmax: float, + atoms: int = 1, initial_std_dev: float = 0.4, factorized_noise: bool = True): + super().__init__() + + self.net = net + + self.action_space = action_space + + (value_size, adv_size) = self.net.size_hints().assert_tuple(2) + + self.q_head = QDistributionalNoisyDuelingHead( + val_input_dim=value_size.last(), + adv_input_dim=adv_size.last(), + action_space=action_space, + vmin=vmin, vmax=vmax, atoms=atoms, + initial_std_dev=initial_std_dev, factorized_noise=factorized_noise + ) + + @property + def atom_delta(self) -> float: + return self.q_head.atom_delta + + @property + def support_atoms(self) -> torch.Tensor: + return self.q_head.support_atoms + + def reset_weights(self): + """ Initialize weights to reasonable defaults """ + self.net.reset_weights() + self.q_head.reset_weights() + + def forward(self, observations): + """ Model forward pass """ + advantage_features, value_features = self.net(observations) + log_histogram = self.q_head(advantage_features, value_features) + return log_histogram + + def histogram_info(self): + """ Return extra information about histogram """ + return self.q_head.histogram_info() + + # def step(self, observations): + # """ Sample action from an action space for given state """ + # log_histogram = self(observations) + # actions = self.q_head.sample(log_histogram) + # + # return { + # 'actions': actions, + # 'log_histogram': log_histogram + # } + diff --git a/vel/rl/policy/acer.py b/vel/rl/policy/acer.py index c320a25f..049c6842 100644 --- a/vel/rl/policy/acer.py +++ b/vel/rl/policy/acer.py @@ -152,7 +152,6 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: if self.trust_region: with torch.no_grad(): - self.target_policy.eval() target_logprobs = self.target_policy(observations)[0] actor_loss = policy_loss - self.entropy_coefficient * policy_entropy diff --git a/vel/rl/policy/dqn.py b/vel/rl/policy/dqn.py index e2460fe7..9ef180ba 100644 --- a/vel/rl/policy/dqn.py +++ b/vel/rl/policy/dqn.py @@ -72,7 +72,6 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: q = self.model(observations) with torch.no_grad(): - self.target_model.eval() target_q = self.target_model(observations_next) if self.double_dqn: diff --git a/vel/rl/policy/purgatory/distributional_dqn.py b/vel/rl/policy/purgatory/distributional_dqn.py deleted file mode 100644 index adbee949..00000000 --- a/vel/rl/policy/purgatory/distributional_dqn.py +++ /dev/null @@ -1,191 +0,0 @@ -import torch -import torch.nn.utils - -from vel.api import ModelFactory -from vel.metric import AveragingNamedMetric -from vel.rl.api import OptimizerAlgoBase - - -class DistributionalDeepQLearning(OptimizerAlgoBase): - """ Deep Q-Learning algorithm """ - - def __init__(self, model_factory: ModelFactory, discount_factor: float, double_dqn: bool, - target_update_frequency: int): - super().__init__(max_grad_norm) - - self.model_factory = model_factory - self.discount_factor = discount_factor - - self.double_dqn = double_dqn - self.target_update_frequency = target_update_frequency - - self.target_model = None - - self.vmin = None - self.vmax = None - self.num_atoms = None - self.support_atoms = None - self.atom_delta = None - - def initialize(self, training_info, model, environment, device): - """ Initialize policy gradient from reinforcer settings """ - self.target_model = self.model_factory.instantiate(action_space=environment.action_space).to(device) - self.target_model.load_state_dict(model.state_dict()) - self.target_model.eval() - - histogram_info = model.histogram_info() - - self.vmin = histogram_info['vmin'] - self.vmax = histogram_info['vmax'] - - self.num_atoms = histogram_info['num_atoms'] - - self.support_atoms = histogram_info['support_atoms'] - self.atom_delta = histogram_info['atom_delta'] - - def calculate_gradient(self, batch_info, device, model, rollout): - """ Calculate loss of the supplied rollout """ - evaluator = model.evaluate(rollout) - batch_size = rollout.frames() - - dones_tensor = evaluator.get('rollout:dones') - rewards_tensor = evaluator.get('rollout:rewards') - - assert dones_tensor.dtype == torch.float32 - - with torch.no_grad(): - target_evaluator = self.target_model.evaluate(rollout) - - if self.double_dqn: - # DOUBLE DQN - # Histogram gets returned as logits initially, we need to exp it before projection - target_value_histogram_for_all_actions = target_evaluator.get('model:q_dist_next').exp() - model_value_histogram_for_all_actions = evaluator.get('model:q_dist_next').exp() - - atoms_aligned = self.support_atoms.view(1, 1, self.num_atoms) - - selected_action_indices = ( - (atoms_aligned * model_value_histogram_for_all_actions).sum(dim=-1).argmax(dim=1) - ) - - # Select largest 'target' value based on action that 'model' selects - next_value_histograms = ( - target_value_histogram_for_all_actions[range(batch_size), selected_action_indices] - ) - else: - # REGULAR DQN - # Histogram gets returned as logits initially, we need to exp it before projection - target_value_histogram_for_all_actions = target_evaluator.get('model:q_dist_next').exp() - - atoms_aligned = self.support_atoms.view(1, 1, self.num_atoms) - - selected_action_indices = ( - (atoms_aligned * target_value_histogram_for_all_actions).sum(dim=-1).argmax(dim=1) - ) - - next_value_histograms = ( - target_value_histogram_for_all_actions[range(batch_size), selected_action_indices] - ) - - # HISTOGRAM PROJECTION CODE - forward_steps = rollout.extra_data.get('forward_steps', 1) - - atoms_projected = ( - rewards_tensor.unsqueeze(1) + - (self.discount_factor ** forward_steps) * - (1 - dones_tensor).unsqueeze(1) * self.support_atoms.unsqueeze(0) - ) - - atoms_projected = atoms_projected.clamp(min=self.vmin, max=self.vmax) - projection_indices = (atoms_projected - self.vmin) / self.atom_delta - - index_floor = projection_indices.floor().long() - index_ceil = projection_indices.ceil().long() - - # Fix corner case when index_floor == index_ceil - index_floor[(index_ceil > 0) * (index_floor == index_ceil)] -= 1 - index_ceil[(index_floor < (self.num_atoms - 1)) * (index_floor == index_ceil)] += 1 - - value_histogram_projected = torch.zeros_like(next_value_histograms) - - # Following part will be a bit convoluted, in an effort to fully vectorize projection operation - - # Special offset index tensor - offsets = ( - torch.arange(0, batch_size * self.num_atoms, self.num_atoms) - .unsqueeze(1) - .expand(batch_size, self.num_atoms) - .contiguous().view(-1).to(device) - ) - - # Linearize all the buffers - value_histogram_projected = value_histogram_projected.view(-1) - index_ceil = index_ceil.view(-1) - index_floor = index_floor.view(-1) - projection_indices = projection_indices.view(-1) - - value_histogram_projected.index_add_( - 0, - index_floor+offsets, - (next_value_histograms.view(-1) * (index_ceil.float() - projection_indices)) - ) - - value_histogram_projected.index_add_( - 0, - index_ceil+offsets, - (next_value_histograms.view(-1) * (projection_indices - index_floor.float())) - ) - - value_histogram_projected = value_histogram_projected.reshape(next_value_histograms.shape) - - q_log_histogram_selected = evaluator.get('model:action:q_dist') - - # Cross-entropy loss as usual - original_losses = -(value_histogram_projected * q_log_histogram_selected).sum(dim=1) - - if evaluator.is_provided('rollout:weights'): - weights = evaluator.get('rollout:weights') - else: - weights = torch.ones_like(rewards_tensor) - - loss_value = torch.mean(weights * original_losses) - loss_value.backward() - - with torch.no_grad(): - mean_q_model = (self.support_atoms.unsqueeze(0) * torch.exp(q_log_histogram_selected)).sum(dim=1).mean() - mean_q_target = (self.support_atoms.unsqueeze(0) * value_histogram_projected).sum(dim=1).mean() - - return { - 'loss': loss_value.item(), - # We need it to update priorities in the replay buffer: - 'errors': original_losses.detach().cpu().numpy(), - 'average_q_selected': mean_q_model.item(), - 'average_q_target': mean_q_target.item() - } - - def post_optimization_step(self, batch_info, device, model, rollout): - """ Steps to take after optimization has been done""" - if batch_info.aggregate_batch_number % self.target_update_frequency == 0: - self.target_model.load_state_dict(model.state_dict()) - self.target_model.eval() - - def metrics(self) -> list: - """ List of metrics to track for this learning process """ - return [ - AveragingNamedMetric("loss"), - AveragingNamedMetric("average_q_selected"), - AveragingNamedMetric("average_q_target"), - AveragingNamedMetric("grad_norm"), - ] - - -def create(model: ModelFactory, discount_factor: float, target_update_frequency: int, - max_grad_norm: float, double_dqn: bool = False): - """ Vel factory function """ - return DistributionalDeepQLearning( - model_factory=model, - discount_factor=discount_factor, - double_dqn=double_dqn, - target_update_frequency=target_update_frequency, - max_grad_norm=max_grad_norm - ) diff --git a/vel/rl/policy/rainbow.py b/vel/rl/policy/rainbow.py new file mode 100644 index 00000000..846f032c --- /dev/null +++ b/vel/rl/policy/rainbow.py @@ -0,0 +1,240 @@ +import gym +import torch +import torch.nn.utils + +from vel.api import ModelFactory, BackboneNetwork, BatchInfo +from vel.metric import AveragingNamedMetric +from vel.rl.api import RlPolicy, Rollout +from vel.rl.module.rainbow_policy import RainbowPolicy + + +class Rainbow(RlPolicy): + """ Deep Q-Learning algorithm """ + + # def __init__(self, model_factory: ModelFactory, discount_factor: float, double_dqn: bool, + + def __init__(self, net: BackboneNetwork, net_factory: ModelFactory, action_space: gym.Space, + discount_factor: float, target_update_frequency: int, + vmin: float, vmax: float, atoms: int = 1, initial_std_dev: float = 0.4, factorized_noise: bool = True): + super().__init__(discount_factor) + + self.model = RainbowPolicy( + net=net, + action_space=action_space, + vmin=vmin, + vmax=vmax, + atoms=atoms, + initial_std_dev=initial_std_dev, + factorized_noise=factorized_noise + ) + + self.target_model = RainbowPolicy( + net=net_factory.instantiate(), + action_space=action_space, + vmin=vmin, + vmax=vmax, + atoms=atoms, + initial_std_dev=initial_std_dev, + factorized_noise=factorized_noise + ) + + self.discount_factor = discount_factor + self.target_update_frequency = target_update_frequency + + self.vmin = vmin + self.vmax = vmax + self.num_atoms = atoms + + # self.support_atoms = self.model.q + # self.atom_delta = histogram_info['atom_delta'] + self.register_buffer('support_atoms', self.model.support_atoms.clone()) + self.atom_delta = self.model.atom_delta + + def reset_weights(self): + """ Initialize properly model weights """ + self.model.reset_weights() + self.target_model.load_state_dict(self.model.state_dict()) + + def forward(self, observation, state=None): + """ Calculate model outputs """ + return self.model(observation) + + def act(self, observation, state=None, deterministic=False): + """ Select actions based on model's output """ + self.train(mode=not deterministic) + + q_values = self.model(observation) + actions = self.model.q_head.sample(q_values) + + return { + 'actions': actions, + 'q': q_values + } + + def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: + """ Calculate loss of the supplied rollout """ + batch_size = rollout.frames() + + observations = rollout.batch_tensor('observations') + observations_next = rollout.batch_tensor('observations_next') + + actions = rollout.batch_tensor('actions') + dones_tensor = rollout.batch_tensor('dones') + rewards_tensor = rollout.batch_tensor('rewards') + + assert dones_tensor.dtype == torch.float32 + + q = self.model(observations) + + with torch.no_grad(): + # DOUBLE DQN + # Histogram gets returned as logits initially, we need to exp it before projection + target_value_histogram_for_all_actions = self.target_model(observations_next).exp() + model_value_histogram_for_all_actions = self.model(observations_next).exp() + + atoms_aligned = self.support_atoms.view(1, 1, self.num_atoms) + + selected_action_indices = ( + (atoms_aligned * model_value_histogram_for_all_actions).sum(dim=-1).argmax(dim=1) + ) + + # Select largest 'target' value based on action that 'model' selects + next_value_histograms = ( + target_value_histogram_for_all_actions[range(batch_size), selected_action_indices] + ) + + # HISTOGRAM PROJECTION CODE + forward_steps = rollout.extra_data.get('forward_steps', 1) + + atoms_projected = ( + rewards_tensor.unsqueeze(1) + + (self.discount_factor ** forward_steps) * + (1 - dones_tensor).unsqueeze(1) * self.support_atoms.unsqueeze(0) + ) + + atoms_projected = atoms_projected.clamp(min=self.vmin, max=self.vmax) + projection_indices = (atoms_projected - self.vmin) / self.atom_delta + + index_floor = projection_indices.floor().long() + index_ceil = projection_indices.ceil().long() + + # Fix corner case when index_floor == index_ceil + index_floor[(index_ceil > 0) * (index_floor == index_ceil)] -= 1 + index_ceil[(index_floor < (self.num_atoms - 1)) * (index_floor == index_ceil)] += 1 + + value_histogram_projected = torch.zeros_like(next_value_histograms) + + # Following part will be a bit convoluted, in an effort to fully vectorize projection operation + + # Special offset index tensor + offsets = ( + torch.arange(0, batch_size * self.num_atoms, self.num_atoms) + .unsqueeze(1) + .expand(batch_size, self.num_atoms) + .contiguous().view(-1).to(value_histogram_projected.device) + ) + + # Linearize all the buffers + value_histogram_projected = value_histogram_projected.view(-1) + index_ceil = index_ceil.view(-1) + index_floor = index_floor.view(-1) + projection_indices = projection_indices.view(-1) + + value_histogram_projected.index_add_( + 0, + index_floor+offsets, + (next_value_histograms.view(-1) * (index_ceil.float() - projection_indices)) + ) + + value_histogram_projected.index_add_( + 0, + index_ceil+offsets, + (next_value_histograms.view(-1) * (projection_indices - index_floor.float())) + ) + + value_histogram_projected = value_histogram_projected.reshape(next_value_histograms.shape) + + q_log_histogram_selected = q[range(q.size(0)), actions] + + # Cross-entropy loss as usual + original_losses = -(value_histogram_projected * q_log_histogram_selected).sum(dim=1) + + if rollout.has_tensor('weights'): + weights = rollout.batch_tensor('weights') + else: + weights = torch.ones_like(rewards_tensor) + + loss_value = torch.mean(weights * original_losses) + loss_value.backward() + + with torch.no_grad(): + mean_q_model = (self.support_atoms.unsqueeze(0) * torch.exp(q_log_histogram_selected)).sum(dim=1).mean() + mean_q_target = (self.support_atoms.unsqueeze(0) * value_histogram_projected).sum(dim=1).mean() + + return { + 'loss': loss_value.item(), + # We need it to update priorities in the replay buffer: + 'errors': original_losses.detach().cpu().numpy(), + 'average_q_selected': mean_q_model.item(), + 'average_q_target': mean_q_target.item() + } + + def post_optimization_step(self, batch_info, rollout): + """ Steps to take after optimization has been done""" + if batch_info.aggregate_batch_number % self.target_update_frequency == 0: + self.target_model.load_state_dict(self.model.state_dict()) + + def metrics(self) -> list: + """ List of metrics to track for this learning process """ + return [ + AveragingNamedMetric("loss"), + AveragingNamedMetric("average_q_selected"), + AveragingNamedMetric("average_q_target") + ] + + +class RainbowFactory(ModelFactory): + def __init__(self, net_factory: ModelFactory, discount_factor: float, target_update_frequency: int, + vmin: float, vmax: float, atoms: int = 1, initial_std_dev: float = 0.4, factorized_noise: bool = True): + self.net_factory = net_factory + self.discount_factor = discount_factor + self.target_update_frequency = target_update_frequency + self.vmin = vmin + self.vmax = vmax + self.atoms = atoms + self.initial_std_dev = initial_std_dev + self.factorized_noise = factorized_noise + + def instantiate(self, **extra_args): + """ Instantiate the model """ + action_space = extra_args.pop('action_space') + # TODO(jerry): Push noisy net parameters down the stack here + net = self.net_factory.instantiate(**extra_args) + + return Rainbow( + net=net, + net_factory=self.net_factory, + action_space=action_space, + discount_factor=self.discount_factor, + target_update_frequency=self.target_update_frequency, + vmin=self.vmin, + vmax=self.vmax, + atoms=self.atoms, + initial_std_dev=self.initial_std_dev, + factorized_noise=self.factorized_noise + ) + + +def create(net: ModelFactory, discount_factor: float, target_update_frequency: int, + vmin: float, vmax: float, atoms: int = 1, initial_std_dev: float = 0.4, factorized_noise: bool = True): + """ Vel factory function """ + return RainbowFactory( + net_factory=net, + discount_factor=discount_factor, + target_update_frequency=target_update_frequency, + vmin=vmin, + vmax=vmax, + atoms=atoms, + initial_std_dev=initial_std_dev, + factorized_noise=factorized_noise + ) diff --git a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py index 2a6b42e4..d53225a9 100644 --- a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py @@ -106,13 +106,12 @@ def train_batch(self, batch_info: BatchInfo) -> None: def on_policy_train_batch(self, batch_info: BatchInfo): """ Perform an 'on-policy' training step of evaluating an env and a single backpropagation step """ - self.policy.train() - rollout = self.env_roller.rollout(batch_info, self.settings.number_of_steps).to_device(self.device) # Preprocessing of the rollout for this policy rollout = self.policy.process_rollout(rollout) + self.policy.train() batch_result = self.policy.optimize( batch_info=batch_info, rollout=rollout @@ -124,10 +123,9 @@ def on_policy_train_batch(self, batch_info: BatchInfo): def off_policy_train_batch(self, batch_info: BatchInfo): """ Perform an 'off-policy' training step of sampling the replay buffer and gradient descent """ - self.policy.train() - rollout = self.env_roller.sample(batch_info, self.settings.number_of_steps).to_device(self.device) + self.policy.train() batch_result = self.policy.optimize( batch_info=batch_info, rollout=rollout diff --git a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py index b5baaad5..d55b3bf0 100644 --- a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py @@ -103,8 +103,6 @@ def train_batch(self, batch_info: BatchInfo) -> None: def roll_out_and_store(self, batch_info): """ Roll out environment and store result in the replay buffer """ - self.policy.train() - if self.env_roller.is_ready_for_sampling(): rollout = self.env_roller.rollout(batch_info, self.settings.rollout_steps) rollout = rollout.to_device(self.device) diff --git a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py index 03b53e28..d2d0a50d 100644 --- a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py @@ -96,9 +96,6 @@ def train_batch(self, batch_info: BatchInfo) -> None: 1. Roll out the environmnent using current policy 2. Use that rollout to train the policy """ - # Calculate environment rollout on the evaluation version of the model - self.policy.train() - rollout = self.env_roller.rollout(batch_info, self.settings.number_of_steps) # Preprocessing of the rollout for this algorithm @@ -117,6 +114,8 @@ def train_batch(self, batch_info: BatchInfo) -> None: else: experience_replay_count = self.settings.experience_replay + self.policy.train() + # Repeat the experience N times for i in range(experience_replay_count): # We may potentially need to split rollout into multiple batches diff --git a/vel/rl/util/actor.py b/vel/rl/util/actor.py index 76fa9d94..55b3950b 100644 --- a/vel/rl/util/actor.py +++ b/vel/rl/util/actor.py @@ -13,6 +13,10 @@ def __init__(self, num_envs: int, policy: RlPolicy, device: torch.device): self.device = device self.state = to_device(self.policy.zero_state(num_envs), self.device) + @property + def discount_factor(self) -> float: + return self.policy.discount_factor + def act(self, observation, advance_state=True, deterministic=False): """ Return result of a policy on a given input """ result = self.policy.act(observation, state=self.state, deterministic=deterministic) @@ -39,3 +43,9 @@ def value(self, observation): def is_stateful(self) -> bool: """ If the model has a state that needs to be fed between individual observations """ return self.policy.is_stateful + + def eval(self): + self.policy.eval() + + def train(self): + self.policy.train() diff --git a/vel/rl/xpolicy/purgatory/q_rainbow_model.py b/vel/rl/xpolicy/purgatory/q_rainbow_model.py deleted file mode 100644 index d9b9dfbf..00000000 --- a/vel/rl/xpolicy/purgatory/q_rainbow_model.py +++ /dev/null @@ -1,110 +0,0 @@ -import gym -import typing - -from vel.api import LinearBackboneModel, Model, ModelFactory, BackboneModel -from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, Evaluator -from vel.rl.model.q_distributional_model import QDistributionalModelEvaluator -from vel.rl.module.q_distributional_noisy_dueling_head import QDistributionalNoisyDuelingHead - - -class QRainbowModel(Model): - """ - A deterministic greedy action-value model. - Includes following commonly known modifications: - - Distributional Q-Learning - - Dueling architecture - - Noisy Nets - """ - - def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, action_space: gym.Space, vmin: float, - vmax: float, atoms: int = 1, initial_std_dev: float = 0.4, factorized_noise: bool = True): - super().__init__() - - self.action_space = action_space - - self.input_block = input_block - self.backbone = backbone - - self.q_head = QDistributionalNoisyDuelingHead( - input_dim=backbone.output_dim, - action_space=action_space, - vmin=vmin, vmax=vmax, atoms=atoms, - initial_std_dev=initial_std_dev, factorized_noise=factorized_noise - ) - - def reset_weights(self): - """ Initialize weights to reasonable defaults """ - self.input_block.reset_weights() - self.backbone.reset_weights() - self.q_head.reset_weights() - - def forward(self, observations): - """ Model forward pass """ - input_data = self.input_block(observations) - advantage_features, value_features = self.backbone(input_data) - log_histogram = self.q_head(advantage_features, value_features) - return log_histogram - - def histogram_info(self): - """ Return extra information about histogram """ - return self.q_head.histogram_info() - - def step(self, observations): - """ Sample action from an action space for given state """ - log_histogram = self(observations) - actions = self.q_head.sample(log_histogram) - - return { - 'actions': actions, - 'log_histogram': log_histogram - } - - def evaluate(self, rollout: Rollout) -> Evaluator: - """ Evaluate model on a rollout """ - return QDistributionalModelEvaluator(self, rollout) - - -class QDistributionalModelFactory(ModelFactory): - """ Factory class for q-learning models """ - def __init__(self, input_block: ModelFactory, backbone: ModelFactory, vmin: float, vmax: float, atoms: int, - initial_std_dev: float = 0.4, factorized_noise: bool = True): - self.input_block = input_block - self.backbone = backbone - self.vmin = vmin - self.vmax = vmax - self.atoms = atoms - self.initial_std_dev = initial_std_dev - self.factorized_noise = factorized_noise - - def instantiate(self, **extra_args): - """ Instantiate the model """ - input_block = self.input_block.instantiate() - backbone = self.backbone.instantiate(**extra_args) - - return QRainbowModel( - input_block=input_block, - backbone=backbone, - action_space=extra_args['action_space'], - vmin=self.vmin, - vmax=self.vmax, - atoms=self.atoms, - initial_std_dev=self.initial_std_dev, - factorized_noise=self.factorized_noise - ) - - -def create(backbone: ModelFactory, vmin: float, vmax: float, atoms: int, initial_std_dev: float = 0.4, - factorized_noise: bool = True, input_block: typing.Optional[ModelFactory] = None): - """ Vel factory function """ - if input_block is None: - input_block = IdentityFactory() - - return QDistributionalModelFactory( - input_block=input_block, backbone=backbone, - vmin=vmin, - vmax=vmax, - atoms=atoms, - initial_std_dev=initial_std_dev, - factorized_noise=factorized_noise - ) From 936b2b92f2c301eebf83ff80646b6d123e8c0a3a Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 3 Oct 2019 09:14:08 -0700 Subject: [PATCH 107/162] A2C yaml works now. --- examples-configs/rl/mujoco/mujoco_a2c.yaml | 28 +++++------ vel/module/input/normalize_observations.py | 17 +------ vel/net/layer/input/image_to_tensor.py | 4 +- vel/net/layer/input/normalize.py | 42 ++++++++++++++++ vel/{rl/layer/purgatory => net/layer}/mlp.py | 53 ++++++++++++-------- vel/net/layer/util/repeat_tensor.py | 4 +- vel/net/modular.py | 7 ++- vel/rl/vecenv/dummy.py | 11 ++-- 8 files changed, 102 insertions(+), 64 deletions(-) create mode 100644 vel/net/layer/input/normalize.py rename vel/{rl/layer/purgatory => net/layer}/mlp.py (51%) diff --git a/examples-configs/rl/mujoco/mujoco_a2c.yaml b/examples-configs/rl/mujoco/mujoco_a2c.yaml index 266f7353..2e33f23b 100644 --- a/examples-configs/rl/mujoco/mujoco_a2c.yaml +++ b/examples-configs/rl/mujoco/mujoco_a2c.yaml @@ -4,33 +4,31 @@ name: 'mujoco_a2c' env: name: vel.rl.env.mujoco game: !param game = 'Reacher-v2' - normalize_returns: true vec_env: name: vel.rl.vecenv.dummy + normalize_returns: true model: - name: vel.rl.algo.a2c + name: vel.rl.policy.a2c entropy_coefficient: 0.0 value_coefficient: 0.5 gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter discount_factor: 0.99 # Discount factor for the rewards - policy: - name: vel.rl.policy.stochastic_policy - - input_block: - name: vel.module.input.normalize_observations - input_shape: 11 - - backbone: - name: vel.rl.backbone.mlp - input_length: 11 - hidden_layers: [64, 64] - activation: 'tanh' + net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.normalize + shape: 11 + - name: vel.net.layer.mlp + hidden_layers: [64, 64] + activation: 'tanh' + - name: vel.net.layer.util.repeat_tensor + times: 2 # Need to repeat output twice, for action and value heads reinforcer: @@ -61,5 +59,3 @@ commands: name: vel.rl.command.record_movie_command takes: 10 videoname: 'reacher_vid_{:04}.avi' - sample_args: - argmax_sampling: true diff --git a/vel/module/input/normalize_observations.py b/vel/module/input/normalize_observations.py index d3013238..52dc8de9 100644 --- a/vel/module/input/normalize_observations.py +++ b/vel/module/input/normalize_observations.py @@ -1,10 +1,9 @@ import torch -import numbers -from vel.api import BackboneModel, ModelFactory +from vel.api import Network -class NormalizeObservations(BackboneModel): +class NormalizeObservations(Network): """ Normalize a vector of observations """ def __init__(self, input_shape, epsilon=1e-6): @@ -46,15 +45,3 @@ def forward(self, input_vector): return (input_vector - self.running_mean.unsqueeze(0)) / torch.sqrt(self.running_var.unsqueeze(0)) - -def create(input_shape): - """ Vel factory function """ - if isinstance(input_shape, numbers.Number): - input_shape = (input_shape,) - elif not isinstance(input_shape, tuple): - input_shape = tuple(input_shape) - - def instantiate(**_): - return NormalizeObservations(input_shape) - - return ModelFactory.generic(instantiate) diff --git a/vel/net/layer/input/image_to_tensor.py b/vel/net/layer/input/image_to_tensor.py index cd034320..3019e933 100644 --- a/vel/net/layer/input/image_to_tensor.py +++ b/vel/net/layer/input/image_to_tensor.py @@ -1,8 +1,6 @@ -import typing - from vel.api import SizeHints, SizeHint -from vel.net.modular import LayerFactory, Layer from vel.module.input.image_to_tensor import image_to_tensor +from vel.net.layer_base import LayerFactory, Layer class ImageToTensorLayer(Layer): diff --git a/vel/net/layer/input/normalize.py b/vel/net/layer/input/normalize.py new file mode 100644 index 00000000..e2ac6a03 --- /dev/null +++ b/vel/net/layer/input/normalize.py @@ -0,0 +1,42 @@ +import collections.abc as abc + +from vel.api import SizeHints, SizeHint +from vel.module.input.normalize_observations import NormalizeObservations +from vel.net.layer_base import LayerFactory, Layer + + +class NormalizeLayer(Layer): + def __init__(self, name: str, shape): + super().__init__(name) + if not isinstance(shape, abc.Sequence): + self.shape = (shape,) + else: + self.shape = shape + + self.normalize = NormalizeObservations(input_shape=shape) + + def forward(self, direct, state: dict = None, context: dict = None): + return self.normalize(direct) + + def size_hints(self) -> SizeHints: + return SizeHints(SizeHint(*([None] + list(self.shape)))) + + +class NormalizeLayerFactory(LayerFactory): + def __init__(self, shape=None): + self.shape = shape + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "image_to_tensor" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + """ Create a given layer object """ + # Potential improvement here is to use either direct input or size parameter + return NormalizeLayer(name=name, shape=self.shape) + + +def create(shape=None): + """ Vel factory function """ + return NormalizeLayerFactory(shape=shape) diff --git a/vel/rl/layer/purgatory/mlp.py b/vel/net/layer/mlp.py similarity index 51% rename from vel/rl/layer/purgatory/mlp.py rename to vel/net/layer/mlp.py index 65560553..1be0d57b 100644 --- a/vel/rl/layer/purgatory/mlp.py +++ b/vel/net/layer/mlp.py @@ -11,15 +11,16 @@ import torch.nn.init as init import vel.util.network as net_util +from vel.api import SizeHints, SizeHint -from vel.api import LinearBackboneModel, ModelFactory +from vel.net.layer_base import LayerFactory, Layer -class MLP(LinearBackboneModel): +class MLP(Layer): """ Simple Multi-Layer-Perceptron network """ - def __init__(self, input_length: int, hidden_layers: typing.List[int], activation: str = 'tanh', + def __init__(self, name: str, input_length: int, hidden_layers: typing.List[int], activation: str = 'tanh', normalization: typing.Optional[str] = None): - super().__init__() + super().__init__(name) self.input_length = input_length self.hidden_layers = hidden_layers @@ -40,11 +41,6 @@ def __init__(self, input_length: int, hidden_layers: typing.List[int], activatio self.model = nn.Sequential(*layer_objects) self.hidden_units = hidden_layers[-1] if hidden_layers else input_length - @property - def output_dim(self) -> int: - """ Final dimension of model output """ - return self.hidden_units - def reset_weights(self): for m in self.modules(): if isinstance(m, nn.Linear): @@ -52,19 +48,36 @@ def reset_weights(self): init.orthogonal_(m.weight, gain=np.sqrt(2)) init.constant_(m.bias, 0.0) - def forward(self, input_data): - input_data = input_data.float() - return self.model(input_data) + def forward(self, direct, state: dict = None, context: dict = None): + return self.model(direct.float()) + def size_hints(self) -> SizeHints: + return SizeHints(SizeHint(None, self.hidden_units)) -def create(input_length, hidden_layers, activation='tanh', normalization=None): - """ Vel factory function """ - def instantiate(**_): + +class MLPFactory(LayerFactory): + def __init__(self, hidden_layers: typing.List[int], activation: str = 'tanh', + normalization: typing.Optional[str] = None): + self.hidden_layers = hidden_layers + self.activation = activation + self.normalization = normalization + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "mlp" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + """ Create a given layer object """ return MLP( - input_length=input_length, - hidden_layers=hidden_layers, - activation=activation, - normalization=normalization + name=name, + input_length=direct_input.assert_single().last(), + hidden_layers=self.hidden_layers, + activation=self.activation, + normalization=self.normalization ) - return ModelFactory.generic(instantiate) + +def create(hidden_layers, activation='tanh', normalization=None): + """ Vel factory function """ + return MLPFactory(hidden_layers=hidden_layers, activation=activation, normalization=normalization) diff --git a/vel/net/layer/util/repeat_tensor.py b/vel/net/layer/util/repeat_tensor.py index 58ea5dc1..32ca7ede 100644 --- a/vel/net/layer/util/repeat_tensor.py +++ b/vel/net/layer/util/repeat_tensor.py @@ -1,7 +1,5 @@ -import typing - from vel.api import SizeHints, SizeHint -from vel.net.modular import LayerFactory, Layer +from vel.net.layer_base import LayerFactory, Layer class RepeatTensor(Layer): diff --git a/vel/net/modular.py b/vel/net/modular.py index e3147c3f..774689fd 100644 --- a/vel/net/modular.py +++ b/vel/net/modular.py @@ -1,10 +1,9 @@ -import typing import collections -import torch.nn as nn -from vel.api import Network, BackboneNetwork, ModelFactory, SizeHints, SizeHint +import torch.nn as nn -from .layer_base import Layer, LayerFactory +from vel.api import BackboneNetwork, ModelFactory, SizeHints +from .layer_base import LayerFactory def instantiate_layers(layers: [LayerFactory]) -> nn.Module: diff --git a/vel/rl/vecenv/dummy.py b/vel/rl/vecenv/dummy.py index b37f6e27..29b405e0 100644 --- a/vel/rl/vecenv/dummy.py +++ b/vel/rl/vecenv/dummy.py @@ -2,6 +2,7 @@ from vel.openai.baselines.common.atari_wrappers import FrameStack from vel.openai.baselines.common.vec_env.dummy_vec_env import DummyVecEnv from vel.openai.baselines.common.vec_env.vec_frame_stack import VecFrameStack +from vel.openai.baselines.common.vec_env.vec_normalize import VecNormalize from vel.rl.api import VecEnvFactory @@ -9,9 +10,10 @@ class DummyVecEnvWrapper(VecEnvFactory): """ Wraps a single-threaded environment into a one-element vector environment """ - def __init__(self, env, frame_history=None): + def __init__(self, env, frame_history=None, normalize_returns=False): self.env = env self.frame_history = frame_history + self.normalize_returns = normalize_returns def instantiate(self, parallel_envs, seed=0, preset='default') -> VecEnv: """ Create vectorized environments """ @@ -20,6 +22,9 @@ def instantiate(self, parallel_envs, seed=0, preset='default') -> VecEnv: if self.frame_history is not None: envs = VecFrameStack(envs, self.frame_history) + if self.normalize_returns: + envs = VecNormalize(envs, ob=False, ret=True) + return envs def instantiate_single(self, seed=0, preset='default'): @@ -36,6 +41,6 @@ def _creation_function(self, idx, seed, preset): return lambda: self.env.instantiate(seed=seed, serial_id=idx, preset=preset) -def create(env, frame_history=None): +def create(env, frame_history=None, normalize_returns=False): """ Vel factory function """ - return DummyVecEnvWrapper(env, frame_history=frame_history) + return DummyVecEnvWrapper(env, frame_history=frame_history, normalize_returns=normalize_returns) From c3b8c991193ca2efb9f1571dd0ae5b4543d9fbf3 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 3 Oct 2019 09:29:25 -0700 Subject: [PATCH 108/162] Revived MuJoCo A2C --- vel/net/layer/input/normalize.py | 2 ++ vel/rl/env_roller/step_env_roller.py | 4 ++-- vel/rl/env_roller/trajectory_replay_env_roller.py | 4 ++-- vel/rl/env_roller/transition_replay_env_roller.py | 4 ++-- vel/rl/layer/double_noisy_nature_cnn.py | 9 +++++++-- .../head/q_distributional_noisy_dueling_head.py | 11 ++++++++--- vel/rl/module/noisy_linear.py | 4 ++-- vel/rl/module/rainbow_policy.py | 15 ++------------- vel/rl/policy/acer.py | 7 +++++++ vel/rl/policy/dqn.py | 5 +++++ vel/rl/policy/rainbow.py | 11 +++++++---- .../buffered_mixed_policy_iteration_reinforcer.py | 4 ++-- .../buffered_off_policy_iteration_reinforcer.py | 5 ++--- 13 files changed, 50 insertions(+), 35 deletions(-) diff --git a/vel/net/layer/input/normalize.py b/vel/net/layer/input/normalize.py index e2ac6a03..f8a8dcb5 100644 --- a/vel/net/layer/input/normalize.py +++ b/vel/net/layer/input/normalize.py @@ -6,6 +6,8 @@ class NormalizeLayer(Layer): + """ Layer that normalizes the inputs """ + def __init__(self, name: str, shape): super().__init__(name) if not isinstance(shape, abc.Sequence): diff --git a/vel/rl/env_roller/step_env_roller.py b/vel/rl/env_roller/step_env_roller.py index bb70e1c5..b3b701bf 100644 --- a/vel/rl/env_roller/step_env_roller.py +++ b/vel/rl/env_roller/step_env_roller.py @@ -30,12 +30,12 @@ def environment(self): @torch.no_grad() def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: """ Calculate env rollout """ - self.actor.train() + self.actor.eval() accumulator = TensorAccumulator() episode_information = [] # List of dictionaries with episode information for step_idx in range(number_of_steps): - step = self.actor.act(self.last_observation.to(self.device)) + step = self.actor.act(self.last_observation.to(self.device), deterministic=False) # Add step to the tensor accumulator for name, tensor in step.items(): diff --git a/vel/rl/env_roller/trajectory_replay_env_roller.py b/vel/rl/env_roller/trajectory_replay_env_roller.py index f294f4e0..259a7497 100644 --- a/vel/rl/env_roller/trajectory_replay_env_roller.py +++ b/vel/rl/env_roller/trajectory_replay_env_roller.py @@ -37,12 +37,12 @@ def environment(self): @torch.no_grad() def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: """ Calculate env rollout """ - self.actor.train() + self.actor.eval() accumulator = TensorAccumulator() episode_information = [] # List of dictionaries with episode information for step_idx in range(number_of_steps): - step = self.actor.act(self.last_observation) + step = self.actor.act(self.last_observation, deterministic=False) replay_extra_information = {} diff --git a/vel/rl/env_roller/transition_replay_env_roller.py b/vel/rl/env_roller/transition_replay_env_roller.py index dc25b676..1bf96acc 100644 --- a/vel/rl/env_roller/transition_replay_env_roller.py +++ b/vel/rl/env_roller/transition_replay_env_roller.py @@ -48,13 +48,13 @@ def environment(self): @torch.no_grad() def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: """ Calculate env rollout """ - self.actor.train() + self.actor.eval() accumulator = TensorAccumulator() episode_information = [] # List of dictionaries with episode information for step_idx in range(number_of_steps): - step = self.actor.act(self.last_observation) + step = self.actor.act(self.last_observation, deterministic=False) replay_extra_information = {} diff --git a/vel/rl/layer/double_noisy_nature_cnn.py b/vel/rl/layer/double_noisy_nature_cnn.py index 25299baf..acade064 100644 --- a/vel/rl/layer/double_noisy_nature_cnn.py +++ b/vel/rl/layer/double_noisy_nature_cnn.py @@ -98,14 +98,19 @@ def reset_weights(self): m.reset_weights() def forward(self, image, state: dict = None, context: dict = None): + if context is not None: + deterministic = context.get('deterministic', False) + else: + deterministic = False + result = image result = F.relu(self.conv1(result)) result = F.relu(self.conv2(result)) result = F.relu(self.conv3(result)) flattened = result.view(result.size(0), -1) - output_one = F.relu(self.linear_layer_one(flattened)) - output_two = F.relu(self.linear_layer_two(flattened)) + output_one = F.relu(self.linear_layer_one(flattened, deterministic=deterministic)) + output_two = F.relu(self.linear_layer_two(flattened, deterministic=deterministic)) return output_one, output_two diff --git a/vel/rl/module/head/q_distributional_noisy_dueling_head.py b/vel/rl/module/head/q_distributional_noisy_dueling_head.py index 59a22ac8..8cc39aa9 100644 --- a/vel/rl/module/head/q_distributional_noisy_dueling_head.py +++ b/vel/rl/module/head/q_distributional_noisy_dueling_head.py @@ -52,9 +52,14 @@ def reset_weights(self): self.linear_layer_advantage.reset_weights() self.linear_layer_value.reset_weights() - def forward(self, advantage_features, value_features): - adv = self.linear_layer_advantage(advantage_features).view(-1, self.action_size, self.atoms) - val = self.linear_layer_value(value_features).view(-1, 1, self.atoms) + def forward(self, advantage_features, value_features, deterministic=False): + adv = self.linear_layer_advantage( + advantage_features, deterministic=deterministic + ).view(-1, self.action_size, self.atoms) + + val = self.linear_layer_value( + value_features, deterministic=deterministic + ).view(-1, 1, self.atoms) # I'm quite unsure if this is the right way to combine these, but this is what paper seems to be suggesting # and I don't know any better way. diff --git a/vel/rl/module/noisy_linear.py b/vel/rl/module/noisy_linear.py index 2b94c90c..9a56fa3a 100644 --- a/vel/rl/module/noisy_linear.py +++ b/vel/rl/module/noisy_linear.py @@ -54,8 +54,8 @@ def reset_weights(self): self.weight_sigma.data.fill_(self.initial_std_dev / math.sqrt(self.in_features)) self.bias_sigma.data.fill_(self.initial_std_dev / math.sqrt(self.out_features)) - def forward(self, input_data): - if self.training: + def forward(self, input_data, deterministic=False): + if not deterministic: if self.factorized_noise: weight_epsilon, bias_epsilon = factorized_gaussian_noise( self.in_features, self.out_features, device=input_data.device diff --git a/vel/rl/module/rainbow_policy.py b/vel/rl/module/rainbow_policy.py index a61ef126..8e709758 100644 --- a/vel/rl/module/rainbow_policy.py +++ b/vel/rl/module/rainbow_policy.py @@ -45,23 +45,12 @@ def reset_weights(self): self.net.reset_weights() self.q_head.reset_weights() - def forward(self, observations): + def forward(self, observations, deterministic=False): """ Model forward pass """ - advantage_features, value_features = self.net(observations) + advantage_features, value_features = self.net(observations, context={'deterministic': deterministic}) log_histogram = self.q_head(advantage_features, value_features) return log_histogram def histogram_info(self): """ Return extra information about histogram """ return self.q_head.histogram_info() - - # def step(self, observations): - # """ Sample action from an action space for given state """ - # log_histogram = self(observations) - # actions = self.q_head.sample(log_histogram) - # - # return { - # 'actions': actions, - # 'log_histogram': log_histogram - # } - diff --git a/vel/rl/policy/acer.py b/vel/rl/policy/acer.py index 049c6842..f85136a7 100644 --- a/vel/rl/policy/acer.py +++ b/vel/rl/policy/acer.py @@ -41,6 +41,13 @@ def __init__(self, net: BackboneNetwork, net_factory: ModelFactory, action_space else: self.target_policy = None + def train(self, mode=True): + """ Override train to make sure target model is always in eval mode """ + self.policy.train(mode) + + if self.trust_region: + self.target_policy.train(False) + def reset_weights(self): """ Initialize properly model weights """ self.policy.reset_weights() diff --git a/vel/rl/policy/dqn.py b/vel/rl/policy/dqn.py index 9ef180ba..b1b4ac16 100644 --- a/vel/rl/policy/dqn.py +++ b/vel/rl/policy/dqn.py @@ -38,6 +38,11 @@ def __init__(self, net: BackboneNetwork, net_factory: ModelFactory, action_space self.target_model = QPolicy(net=net_factory.instantiate(), action_space=action_space, dueling_dqn=dueling_dqn) + def train(self, mode=True): + """ Override train to make sure target model is always in eval mode """ + self.model.train(mode) + self.target_model.train(False) + def reset_weights(self): """ Initialize properly model weights """ self.model.reset_weights() diff --git a/vel/rl/policy/rainbow.py b/vel/rl/policy/rainbow.py index 846f032c..57b34fbf 100644 --- a/vel/rl/policy/rainbow.py +++ b/vel/rl/policy/rainbow.py @@ -11,11 +11,9 @@ class Rainbow(RlPolicy): """ Deep Q-Learning algorithm """ - # def __init__(self, model_factory: ModelFactory, discount_factor: float, double_dqn: bool, - def __init__(self, net: BackboneNetwork, net_factory: ModelFactory, action_space: gym.Space, - discount_factor: float, target_update_frequency: int, - vmin: float, vmax: float, atoms: int = 1, initial_std_dev: float = 0.4, factorized_noise: bool = True): + discount_factor: float, target_update_frequency: int, vmin: float, vmax: float, atoms: int = 1, + initial_std_dev: float = 0.4, factorized_noise: bool = True): super().__init__(discount_factor) self.model = RainbowPolicy( @@ -50,6 +48,11 @@ def __init__(self, net: BackboneNetwork, net_factory: ModelFactory, action_space self.register_buffer('support_atoms', self.model.support_atoms.clone()) self.atom_delta = self.model.atom_delta + def train(self, mode=True): + """ Override train to make sure target model is always in eval mode """ + self.model.train(mode) + self.target_model.train(False) + def reset_weights(self): """ Initialize properly model weights """ self.model.reset_weights() diff --git a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py index d53225a9..f48183be 100644 --- a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py @@ -1,10 +1,10 @@ +import sys import attr import numpy as np -import sys import torch import tqdm -from vel.api import TrainingInfo, EpochInfo, BatchInfo, Model, ModelFactory +from vel.api import TrainingInfo, EpochInfo, BatchInfo, ModelFactory from vel.openai.baselines.common.vec_env import VecEnv from vel.rl.api import ( Reinforcer, ReinforcerFactory, VecEnvFactory, ReplayEnvRollerBase, ReplayEnvRollerFactoryBase, diff --git a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py index d55b3bf0..c42751dc 100644 --- a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py @@ -1,8 +1,7 @@ -import attr import sys -import tqdm - +import attr import torch +import tqdm from vel.api import TrainingInfo, EpochInfo, BatchInfo, Model, ModelFactory from vel.openai.baselines.common.vec_env import VecEnv From 0ce852fed3c47fec61de43501d33a16960c1b01e Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 3 Oct 2019 10:47:57 -0700 Subject: [PATCH 109/162] Revied MuJoCo PPO. --- examples-configs/rl/mujoco/mujoco_ppo.yaml | 37 +++++++-------- vel/api/size_hint.py | 4 ++ vel/net/layer/arch/__init__.py | 0 vel/net/layer/arch/parallel.py | 55 ++++++++++++++++++++++ 4 files changed, 76 insertions(+), 20 deletions(-) create mode 100644 vel/net/layer/arch/__init__.py create mode 100644 vel/net/layer/arch/parallel.py diff --git a/examples-configs/rl/mujoco/mujoco_ppo.yaml b/examples-configs/rl/mujoco/mujoco_ppo.yaml index a1cc2113..780cc882 100644 --- a/examples-configs/rl/mujoco/mujoco_ppo.yaml +++ b/examples-configs/rl/mujoco/mujoco_ppo.yaml @@ -4,15 +4,15 @@ name: 'mujoco_ppo' env: name: vel.rl.env.mujoco game: !param game = 'Reacher-v2' - normalize_returns: true vec_env: name: vel.rl.vecenv.dummy + normalize_returns: true model: - name: vel.rl.algo.ppo + name: vel.rl.policy.ppo entropy_coefficient: 0.0 value_coefficient: 0.5 @@ -22,24 +22,21 @@ model: discount_factor: 0.99 # Discount factor for the rewards gae_lambda: 0.95 # Generalized Advantage Estimator Lambda parameter - policy: - name: vel.rl.policy.stochastic_policy_separate - - input_block: - name: vel.module.input.normalize_observations - input_shape: 11 - - policy_backbone: - name: vel.rl.backbone.mlp - input_length: 11 - hidden_layers: [64, 64] - activation: 'tanh' - - value_backbone: - name: vel.rl.backbone.mlp - input_length: 11 - hidden_layers: [64, 64] - activation: 'tanh' + net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.normalize + shape: 11 + - name: vel.net.layer.util.repeat_tensor + times: 2 # Need to repeat output twice, to consume by the 'parallel' layers + - name: vel.net.layer.arch.parallel + layers: + - name: vel.net.layer.mlp + hidden_layers: [64, 64] + activation: 'tanh' + - name: vel.net.layer.mlp + hidden_layers: [64, 64] + activation: 'tanh' reinforcer: diff --git a/vel/api/size_hint.py b/vel/api/size_hint.py index d6a3879b..063810ac 100644 --- a/vel/api/size_hint.py +++ b/vel/api/size_hint.py @@ -68,5 +68,9 @@ def assert_single(self, length: typing.Optional[int] = None) -> SizeHint: return self.size_hints + def unwrap(self): + """ Return the underlying data """ + return self.size_hints + def __repr__(self): return repr(self.size_hints) diff --git a/vel/net/layer/arch/__init__.py b/vel/net/layer/arch/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/net/layer/arch/parallel.py b/vel/net/layer/arch/parallel.py new file mode 100644 index 00000000..de592f2a --- /dev/null +++ b/vel/net/layer/arch/parallel.py @@ -0,0 +1,55 @@ +import torch.nn as nn + +from vel.api import SizeHints +from vel.net.layer_base import LayerFactory, Layer + + +class ParallelLayer(Layer): + """ Network that consists of parallel "towers" """ + + def __init__(self, name: str, layers: [Layer]): + super().__init__(name) + + self.layers = nn.ModuleList(layers) + self._size_hints = SizeHints(tuple(layer.size_hints().unwrap() for layer in self.layers)) + + def size_hints(self) -> SizeHints: + """ Size hints for this network """ + return self._size_hints + + def forward(self, direct, state: dict = None, context: dict = None): + """ Forward propagation of a single layer """ + results = [layer(x, state, context) for layer, x in zip(self.layers, direct)] + return tuple(results) + + +class ParallelLayerFactory(LayerFactory): + """ Factory for Parallel layer """ + + def __init__(self, layers: [LayerFactory]): + self.layers = layers + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "parallel" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + hints = direct_input.assert_tuple(len(self.layers)) + + layers = [] + + for idx, (size_hint, layer_factory) in enumerate(zip(hints, self.layers)): + counter = idx + 1 + local_name = "{}_{:04d}".format(layer_factory.name_base, counter) + global_name = f"{name}/{local_name}" + + layer = layer_factory.instantiate(name=global_name, direct_input=SizeHints(size_hint), context=context) + layers.append(layer) + + return ParallelLayer(name, layers) + + +def create(layers: [LayerFactory]): + """ Vel factory function """ + return ParallelLayerFactory(layers=layers) From 3c47758ca36434ba8f8c5fa82b502ca751cf742f Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 3 Oct 2019 11:58:10 -0700 Subject: [PATCH 110/162] Sizing network input according to the environment size. --- examples-configs/rl/atari/atari_a2c.yaml | 1 - .../rl/atari/atari_a2c_tf_rmsprop.yaml | 1 - examples-configs/rl/atari/atari_acer.yaml | 1 - examples-configs/rl/atari/atari_ddqn.yaml | 1 - examples-configs/rl/atari/atari_dqn.yaml | 1 - examples-configs/rl/atari/atari_ppo.yaml | 1 - examples-configs/rl/atari/atari_rainbow.yaml | 3 +- examples-configs/rl/atari/atari_trpo.yaml | 1 - examples-configs/rl/mujoco/mujoco_a2c.yaml | 1 - examples-configs/rl/mujoco/mujoco_ppo.yaml | 1 - examples-configs/rl/mujoco/mujoco_trpo.yaml | 37 +++++---- vel/api/size_hint.py | 4 + vel/net/layer/input/image_to_tensor.py | 24 +++--- vel/net/layer/input/normalize.py | 7 +- vel/net/modular.py | 10 ++- vel/rl/env_roller/step_env_roller.py | 2 +- .../trajectory_replay_env_roller.py | 2 +- .../transition_replay_env_roller.py | 2 +- vel/rl/layer/nature_cnn.py | 3 +- vel/rl/policy/a2c.py | 13 +++- vel/rl/policy/acer.py | 19 ++++- vel/rl/policy/dqn.py | 16 ++-- vel/rl/policy/ppo.py | 13 +++- vel/rl/policy/rainbow.py | 14 +++- vel/rl/policy/trpo.py | 78 +++++++++++++------ ...fered_mixed_policy_iteration_reinforcer.py | 2 +- ...uffered_off_policy_iteration_reinforcer.py | 2 +- .../on_policy_iteration_reinforcer.py | 2 +- vel/util/situational.py | 17 ++++ vel/util/tensor_accumulator.py | 16 ---- vel/util/tensor_util.py | 14 ++++ 31 files changed, 196 insertions(+), 113 deletions(-) delete mode 100644 vel/util/tensor_accumulator.py diff --git a/examples-configs/rl/atari/atari_a2c.yaml b/examples-configs/rl/atari/atari_a2c.yaml index cbe9dc46..fa5fdcab 100644 --- a/examples-configs/rl/atari/atari_a2c.yaml +++ b/examples-configs/rl/atari/atari_a2c.yaml @@ -22,7 +22,6 @@ model: name: vel.net.modular layers: - name: vel.net.layer.input.image_to_tensor - size: [84, 84, 4] # Number of channels is frame history - name: vel.rl.layer.nature_cnn - name: vel.net.layer.util.repeat_tensor times: 2 # Need to repeat output twice, for action and value heads diff --git a/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml b/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml index 13de0fef..fc398e5a 100644 --- a/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml +++ b/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml @@ -22,7 +22,6 @@ model: name: vel.net.modular layers: - name: vel.net.layer.input.image_to_tensor - size: [84, 84, 4] # Number of channels is frame history - name: vel.rl.layer.nature_cnn - name: vel.net.layer.util.repeat_tensor times: 2 # Need to repeat output twice, for action and value heads diff --git a/examples-configs/rl/atari/atari_acer.yaml b/examples-configs/rl/atari/atari_acer.yaml index 256b09bc..e0bff147 100644 --- a/examples-configs/rl/atari/atari_acer.yaml +++ b/examples-configs/rl/atari/atari_acer.yaml @@ -29,7 +29,6 @@ model: name: vel.net.modular layers: - name: vel.net.layer.input.image_to_tensor - size: [84, 84, 4] # Number of channels is frame history - name: vel.rl.layer.nature_cnn - name: vel.net.layer.util.repeat_tensor times: 2 # Need to repeat output twice, for action and value heads diff --git a/examples-configs/rl/atari/atari_ddqn.yaml b/examples-configs/rl/atari/atari_ddqn.yaml index d4b7430b..9decd793 100644 --- a/examples-configs/rl/atari/atari_ddqn.yaml +++ b/examples-configs/rl/atari/atari_ddqn.yaml @@ -30,7 +30,6 @@ model: name: vel.net.modular layers: - name: vel.net.layer.input.image_to_tensor - size: [84, 84, 4] # Number of channels is frame history - name: vel.rl.layer.double_nature_cnn diff --git a/examples-configs/rl/atari/atari_dqn.yaml b/examples-configs/rl/atari/atari_dqn.yaml index 9851ecba..c2fd5cde 100644 --- a/examples-configs/rl/atari/atari_dqn.yaml +++ b/examples-configs/rl/atari/atari_dqn.yaml @@ -27,7 +27,6 @@ model: name: vel.net.modular layers: - name: vel.net.layer.input.image_to_tensor - size: [84, 84, 4] # Number of channels is frame history - name: vel.rl.layer.nature_cnn diff --git a/examples-configs/rl/atari/atari_ppo.yaml b/examples-configs/rl/atari/atari_ppo.yaml index 12d043e0..882926a9 100644 --- a/examples-configs/rl/atari/atari_ppo.yaml +++ b/examples-configs/rl/atari/atari_ppo.yaml @@ -29,7 +29,6 @@ model: name: vel.net.modular layers: - name: vel.net.layer.input.image_to_tensor - size: [84, 84, 4] # Number of channels is frame history - name: vel.rl.layer.nature_cnn - name: vel.net.layer.util.repeat_tensor times: 2 # Need to repeat output twice, for action and value heads diff --git a/examples-configs/rl/atari/atari_rainbow.yaml b/examples-configs/rl/atari/atari_rainbow.yaml index a11c3afc..78525aea 100644 --- a/examples-configs/rl/atari/atari_rainbow.yaml +++ b/examples-configs/rl/atari/atari_rainbow.yaml @@ -31,9 +31,8 @@ model: name: vel.net.modular layers: - name: vel.net.layer.input.image_to_tensor - size: [84, 84, 4] # Number of channels is frame history - name: vel.rl.layer.double_noisy_nature_cnn - # TODO(this should ideally be brough from level up) + # TODO(this should ideally be brought from level up) initial_std_dev: 0.5 factorized_noise: true diff --git a/examples-configs/rl/atari/atari_trpo.yaml b/examples-configs/rl/atari/atari_trpo.yaml index 56af94db..adc7850a 100644 --- a/examples-configs/rl/atari/atari_trpo.yaml +++ b/examples-configs/rl/atari/atari_trpo.yaml @@ -29,7 +29,6 @@ model: name: vel.net.modular layers: - name: vel.net.layer.input.image_to_tensor - size: [84, 84, 4] # Number of channels is frame history - name: vel.rl.layer.nature_cnn_small value_net: diff --git a/examples-configs/rl/mujoco/mujoco_a2c.yaml b/examples-configs/rl/mujoco/mujoco_a2c.yaml index 2e33f23b..5c49c725 100644 --- a/examples-configs/rl/mujoco/mujoco_a2c.yaml +++ b/examples-configs/rl/mujoco/mujoco_a2c.yaml @@ -23,7 +23,6 @@ model: name: vel.net.modular layers: - name: vel.net.layer.input.normalize - shape: 11 - name: vel.net.layer.mlp hidden_layers: [64, 64] activation: 'tanh' diff --git a/examples-configs/rl/mujoco/mujoco_ppo.yaml b/examples-configs/rl/mujoco/mujoco_ppo.yaml index 780cc882..df6db42c 100644 --- a/examples-configs/rl/mujoco/mujoco_ppo.yaml +++ b/examples-configs/rl/mujoco/mujoco_ppo.yaml @@ -26,7 +26,6 @@ model: name: vel.net.modular layers: - name: vel.net.layer.input.normalize - shape: 11 - name: vel.net.layer.util.repeat_tensor times: 2 # Need to repeat output twice, to consume by the 'parallel' layers - name: vel.net.layer.arch.parallel diff --git a/examples-configs/rl/mujoco/mujoco_trpo.yaml b/examples-configs/rl/mujoco/mujoco_trpo.yaml index 47356e0d..743877f1 100644 --- a/examples-configs/rl/mujoco/mujoco_trpo.yaml +++ b/examples-configs/rl/mujoco/mujoco_trpo.yaml @@ -3,15 +3,15 @@ name: 'mujoco_trpo' env: name: vel.rl.env.mujoco game: !param game = 'Reacher-v2' - normalize_returns: true vec_env: name: vel.rl.vecenv.dummy + normalize_returns: true model: - name: vel.rl.algo.trpo + name: vel.rl.policy.trpo discount_factor: 0.99 # Discount factor for the rewards gae_lambda: 0.98 # Generalized Advantage Estimator Lambda parameter @@ -24,21 +24,24 @@ model: vf_iters: 5 entropy_coefficient: 0.0 - input_block: - name: vel.module.input.normalize_observations - input_shape: 11 - - policy_backbone: - name: vel.rl.backbone.mlp - input_length: 11 - hidden_layers: [32, 32] - activation: 'tanh' - - value_backbone: - name: vel.rl.backbone.mlp - input_length: 11 - hidden_layers: [32, 32] - activation: 'tanh' + input_net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.normalize + + policy_net: + name: vel.net.modular + layers: + - name: vel.net.layer.mlp + hidden_layers: [32, 32] + activation: 'tanh' + + value_net: + name: vel.net.modular + layers: + - name: vel.net.layer.mlp + hidden_layers: [32, 32] + activation: 'tanh' reinforcer: diff --git a/vel/api/size_hint.py b/vel/api/size_hint.py index 063810ac..78851126 100644 --- a/vel/api/size_hint.py +++ b/vel/api/size_hint.py @@ -15,6 +15,10 @@ def last(self) -> int: assert self[-1] is not None, "Size hint shouldn't be None" return self[-1] + def shape(self, idx=1) -> typing.Tuple[int]: + """ Get shape of size hint, except for a number of dimensions (batch dimensions """ + return self[idx:] + def __repr__(self): internal = ", ".join([self._inner_repr(s) for s in self]) return f"{self.__class__.__name__}({internal})" diff --git a/vel/net/layer/input/image_to_tensor.py b/vel/net/layer/input/image_to_tensor.py index 3019e933..4924c33f 100644 --- a/vel/net/layer/input/image_to_tensor.py +++ b/vel/net/layer/input/image_to_tensor.py @@ -9,12 +9,12 @@ class ImageToTensorLayer(Layer): Flip channels to a [C, W, H] order and potentially convert 8-bit color values to floats """ - def __init__(self, name: str, size: tuple = None): + def __init__(self, name: str, shape: tuple = None): super().__init__(name) - if size is not None: - assert len(size) == 3, "Images must have three dimensions" - self.w, self.h, self.c = size + if shape is not None: + assert len(shape) == 3, "Images must have three dimensions" + self.w, self.h, self.c = shape else: self.w, self.h, self.c = (None, None, None) @@ -26,8 +26,8 @@ def size_hints(self) -> SizeHints: class ImageToTensorLayerFactory(LayerFactory): - def __init__(self, size: tuple = None): - self.size = size + def __init__(self, shape: tuple = None): + self.shape = shape @property def name_base(self) -> str: @@ -36,10 +36,14 @@ def name_base(self) -> str: def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: """ Create a given layer object """ - # Potential improvement here is to use either direct input or size parameter - return ImageToTensorLayer(name=name, size=self.size) + if self.shape is None: + shape = direct_input.assert_single().shape() + else: + shape = self.shape + + return ImageToTensorLayer(name=name, shape=shape) -def create(size: tuple = None): +def create(shape: tuple = None): """ Vel factory function """ - return ImageToTensorLayerFactory(size=size) + return ImageToTensorLayerFactory(shape=shape) diff --git a/vel/net/layer/input/normalize.py b/vel/net/layer/input/normalize.py index f8a8dcb5..8da64fa7 100644 --- a/vel/net/layer/input/normalize.py +++ b/vel/net/layer/input/normalize.py @@ -36,7 +36,12 @@ def name_base(self) -> str: def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: """ Create a given layer object """ # Potential improvement here is to use either direct input or size parameter - return NormalizeLayer(name=name, shape=self.shape) + if self.shape is None: + shape = direct_input.assert_single().shape() + else: + shape = self.shape + + return NormalizeLayer(name=name, shape=shape) def create(shape=None): diff --git a/vel/net/modular.py b/vel/net/modular.py index 774689fd..9416992a 100644 --- a/vel/net/modular.py +++ b/vel/net/modular.py @@ -6,9 +6,8 @@ from .layer_base import LayerFactory -def instantiate_layers(layers: [LayerFactory]) -> nn.Module: +def instantiate_layers(layers: [LayerFactory], size_hint: SizeHints) -> nn.Module: """ Instantiate list of layer factories into PyTorch Module """ - size_hint = SizeHints() # Empty input at first module_dict = collections.OrderedDict() context = {} @@ -96,9 +95,12 @@ class ModularNetworkFactory(ModelFactory): def __init__(self, layers: [LayerFactory]): self.layers = layers - def instantiate(self, **extra_args) -> BackboneNetwork: + def instantiate(self, size_hint=None, **extra_args) -> BackboneNetwork: """ Create either stateful or not modular network instance """ - layers = instantiate_layers(self.layers) + if size_hint is None: + size_hint = SizeHints() + + layers = instantiate_layers(self.layers, size_hint=size_hint) is_stateful = any(l.is_stateful for l in layers) if is_stateful: diff --git a/vel/rl/env_roller/step_env_roller.py b/vel/rl/env_roller/step_env_roller.py index b3b701bf..1a6c22a5 100644 --- a/vel/rl/env_roller/step_env_roller.py +++ b/vel/rl/env_roller/step_env_roller.py @@ -5,7 +5,7 @@ from vel.openai.baselines.common.vec_env import VecEnv from vel.rl.api import Trajectories, Rollout, EnvRollerBase, EnvRollerFactoryBase, RlPolicy from vel.rl.util.actor import PolicyActor -from vel.util.tensor_accumulator import TensorAccumulator +from vel.util.tensor_util import TensorAccumulator class StepEnvRoller(EnvRollerBase): diff --git a/vel/rl/env_roller/trajectory_replay_env_roller.py b/vel/rl/env_roller/trajectory_replay_env_roller.py index 259a7497..b2bd9092 100644 --- a/vel/rl/env_roller/trajectory_replay_env_roller.py +++ b/vel/rl/env_roller/trajectory_replay_env_roller.py @@ -7,7 +7,7 @@ Trajectories, Rollout, ReplayEnvRollerBase, ReplayEnvRollerFactoryBase, ReplayBuffer, ReplayBufferFactory, RlPolicy ) from vel.rl.util.actor import PolicyActor -from vel.util.tensor_accumulator import TensorAccumulator +from vel.util.tensor_util import TensorAccumulator class TrajectoryReplayEnvRoller(ReplayEnvRollerBase): diff --git a/vel/rl/env_roller/transition_replay_env_roller.py b/vel/rl/env_roller/transition_replay_env_roller.py index 1bf96acc..64e48f02 100644 --- a/vel/rl/env_roller/transition_replay_env_roller.py +++ b/vel/rl/env_roller/transition_replay_env_roller.py @@ -9,7 +9,7 @@ Trajectories, Rollout, ReplayEnvRollerBase, ReplayEnvRollerFactoryBase, ReplayBuffer, ReplayBufferFactory, RlPolicy ) from vel.rl.util.actor import PolicyActor -from vel.util.tensor_accumulator import TensorAccumulator +from vel.util.tensor_util import TensorAccumulator class TransitionReplayEnvRoller(ReplayEnvRollerBase): diff --git a/vel/rl/layer/nature_cnn.py b/vel/rl/layer/nature_cnn.py index 16cfed1b..f2503a62 100644 --- a/vel/rl/layer/nature_cnn.py +++ b/vel/rl/layer/nature_cnn.py @@ -13,7 +13,7 @@ import vel.util.network as net_util from vel.api import SizeHint, SizeHints -from vel.net.modular import Layer, LayerFactory +from vel.net.layer_base import Layer, LayerFactory class NatureCnn(Layer): @@ -109,4 +109,3 @@ def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Laye def create(output_dim=512): """ Vel factory function """ return NatureCnnFactory(output_dim=output_dim) - diff --git a/vel/rl/policy/a2c.py b/vel/rl/policy/a2c.py index cd03eead..7b23d33d 100644 --- a/vel/rl/policy/a2c.py +++ b/vel/rl/policy/a2c.py @@ -3,6 +3,7 @@ import torch.nn.functional as F from vel.metric.base import AveragingNamedMetric +from vel.util.situational import observation_space_to_size_hint from vel.util.stats import explained_variance from vel.api import ModelFactory, BatchInfo, BackboneNetwork @@ -113,8 +114,8 @@ def metrics(self) -> list: class A2CFactory(ModelFactory): """ Factory class for policy gradient models """ - def __init__(self, net, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): - self.net = net + def __init__(self, net_factory, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): + self.net_factory = net_factory self.entropy_coefficient = entropy_coefficient self.value_coefficient = value_coefficient self.discount_factor = discount_factor @@ -123,7 +124,11 @@ def __init__(self, net, entropy_coefficient, value_coefficient, discount_factor, def instantiate(self, **extra_args): """ Instantiate the model """ action_space = extra_args.pop('action_space') - net = self.net.instantiate(**extra_args) + observation_space = extra_args.pop('observation_space') + + size_hint = observation_space_to_size_hint(observation_space) + + net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) return A2C( net=net, @@ -138,7 +143,7 @@ def instantiate(self, **extra_args): def create(net: ModelFactory, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): """ Vel factory function """ return A2CFactory( - net=net, + net_factory=net, entropy_coefficient=entropy_coefficient, value_coefficient=value_coefficient, discount_factor=discount_factor, diff --git a/vel/rl/policy/acer.py b/vel/rl/policy/acer.py index f85136a7..b468eb30 100644 --- a/vel/rl/policy/acer.py +++ b/vel/rl/policy/acer.py @@ -1,3 +1,4 @@ +import typing import gym import torch import torch.nn.functional as F @@ -6,6 +7,7 @@ from vel.metric.base import AveragingNamedMetric from vel.rl.api import Trajectories, RlPolicy, Rollout from vel.rl.module.q_stochastic_policy import QStochasticPolicy +from vel.util.situational import observation_space_to_size_hint def select_indices(tensor, indices): @@ -16,7 +18,7 @@ def select_indices(tensor, indices): class ACER(RlPolicy): """ Actor-Critic with Experience Replay - policy gradient calculations """ - def __init__(self, net: BackboneNetwork, net_factory: ModelFactory, action_space: gym.Space, + def __init__(self, net: BackboneNetwork, target_net: typing.Optional[BackboneNetwork], action_space: gym.Space, discount_factor: float, trust_region: bool = True, entropy_coefficient: float = 0.01, q_coefficient: float = 0.5, rho_cap: float = 10.0, retrace_rho_cap: float = 1.0, average_model_alpha: float = 0.99, trust_region_delta: float = 1.0): @@ -37,7 +39,7 @@ def __init__(self, net: BackboneNetwork, net_factory: ModelFactory, action_space self.policy = QStochasticPolicy(net, action_space) if self.trust_region: - self.target_policy = QStochasticPolicy(net_factory.instantiate(), action_space) + self.target_policy = QStochasticPolicy(target_net, action_space) else: self.target_policy = None @@ -257,11 +259,20 @@ def __init__(self, net_factory, trust_region: bool, entropy_coefficient: float, def instantiate(self, **extra_args): """ Instantiate the model """ action_space = extra_args.pop('action_space') - net = self.net_factory.instantiate(**extra_args) + observation_space = extra_args.pop('observation_space') + + size_hint = observation_space_to_size_hint(observation_space) + + net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) + + if self.trust_region: + target_net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) + else: + target_net = None return ACER( net=net, - net_factory=self.net_factory, + target_net=target_net, action_space=action_space, trust_region=self.trust_region, entropy_coefficient=self.entropy_coefficient, diff --git a/vel/rl/policy/dqn.py b/vel/rl/policy/dqn.py index b1b4ac16..cb563e5d 100644 --- a/vel/rl/policy/dqn.py +++ b/vel/rl/policy/dqn.py @@ -12,17 +12,19 @@ from vel.rl.api import RlPolicy, Rollout from vel.rl.module.q_policy import QPolicy from vel.rl.module.noise.eps_greedy import EpsGreedy +from vel.util.situational import observation_space_to_size_hint class DQN(RlPolicy): """ Deep Q-Learning algorithm """ - def __init__(self, net: BackboneNetwork, net_factory: ModelFactory, action_space: gym.Space, + def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_space: gym.Space, epsilon: typing.Union[float, Schedule], discount_factor: float, double_dqn: bool, dueling_dqn: bool, target_update_frequency: int): super().__init__(discount_factor) self.model = QPolicy(net=net, action_space=action_space, dueling_dqn=dueling_dqn) + self.target_model = QPolicy(net=target_net, action_space=action_space, dueling_dqn=dueling_dqn) self.double_dqn = double_dqn self.target_update_frequency = target_update_frequency @@ -33,11 +35,8 @@ def __init__(self, net: BackboneNetwork, net_factory: ModelFactory, action_space self.epsilon_schedule = epsilon self.epsilon_value = self.epsilon_schedule.value(0.0) - self.action_noise = EpsGreedy(action_space=action_space) - self.target_model = QPolicy(net=net_factory.instantiate(), action_space=action_space, dueling_dqn=dueling_dqn) - def train(self, mode=True): """ Override train to make sure target model is always in eval mode """ self.model.train(mode) @@ -141,11 +140,16 @@ def __init__(self, net_factory: ModelFactory, epsilon: typing.Union[float, Sched def instantiate(self, **extra_args): """ Instantiate the model """ action_space = extra_args.pop('action_space') - net = self.net_factory.instantiate(**extra_args) + observation_space = extra_args.pop('observation_space') + + size_hint = observation_space_to_size_hint(observation_space) + + net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) + target_net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) return DQN( net=net, - net_factory=self.net_factory, + target_net=target_net, action_space=action_space, epsilon=self.epsilon, discount_factor=self.discount_factor, diff --git a/vel/rl/policy/ppo.py b/vel/rl/policy/ppo.py index 6230020d..ea09a29c 100644 --- a/vel/rl/policy/ppo.py +++ b/vel/rl/policy/ppo.py @@ -4,6 +4,7 @@ import numbers from vel.api import BatchInfo, ModelFactory, BackboneNetwork +from vel.util.situational import observation_space_to_size_hint from vel.util.stats import explained_variance from vel.function.constant import ConstantSchedule from vel.metric.base import AveragingNamedMetric @@ -153,9 +154,9 @@ def metrics(self) -> list: class PPOFactory(ModelFactory): """ Factory class for policy gradient models """ - def __init__(self, net, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, + def __init__(self, net_factory, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, normalize_advantage: bool = True, gae_lambda: float = 1.0): - self.net = net + self.net_factory = net_factory self.entropy_coefficient = entropy_coefficient self.value_coefficient = value_coefficient self.cliprange = cliprange @@ -166,7 +167,11 @@ def __init__(self, net, entropy_coefficient, value_coefficient, cliprange, disco def instantiate(self, **extra_args): """ Instantiate the model """ action_space = extra_args.pop('action_space') - net = self.net.instantiate(**extra_args) + observation_space = extra_args.pop('observation_space') + + size_hint = observation_space_to_size_hint(observation_space) + + net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) return PPO( net=net, @@ -184,7 +189,7 @@ def create(net: ModelFactory, entropy_coefficient, value_coefficient, cliprange, normalize_advantage: bool = True, gae_lambda: float = 1.0): """ Vel factory function """ return PPOFactory( - net=net, + net_factory=net, entropy_coefficient=entropy_coefficient, value_coefficient=value_coefficient, cliprange=cliprange, diff --git a/vel/rl/policy/rainbow.py b/vel/rl/policy/rainbow.py index 57b34fbf..c28c3b29 100644 --- a/vel/rl/policy/rainbow.py +++ b/vel/rl/policy/rainbow.py @@ -6,12 +6,13 @@ from vel.metric import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout from vel.rl.module.rainbow_policy import RainbowPolicy +from vel.util.situational import observation_space_to_size_hint class Rainbow(RlPolicy): """ Deep Q-Learning algorithm """ - def __init__(self, net: BackboneNetwork, net_factory: ModelFactory, action_space: gym.Space, + def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_space: gym.Space, discount_factor: float, target_update_frequency: int, vmin: float, vmax: float, atoms: int = 1, initial_std_dev: float = 0.4, factorized_noise: bool = True): super().__init__(discount_factor) @@ -27,7 +28,7 @@ def __init__(self, net: BackboneNetwork, net_factory: ModelFactory, action_space ) self.target_model = RainbowPolicy( - net=net_factory.instantiate(), + net=target_net, action_space=action_space, vmin=vmin, vmax=vmax, @@ -211,12 +212,17 @@ def __init__(self, net_factory: ModelFactory, discount_factor: float, target_upd def instantiate(self, **extra_args): """ Instantiate the model """ action_space = extra_args.pop('action_space') + observation_space = extra_args.pop('observation_space') + + size_hint = observation_space_to_size_hint(observation_space) + # TODO(jerry): Push noisy net parameters down the stack here - net = self.net_factory.instantiate(**extra_args) + net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) + target_net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) return Rainbow( net=net, - net_factory=self.net_factory, + target_net=target_net, action_space=action_space, discount_factor=self.discount_factor, target_update_frequency=self.target_update_frequency, diff --git a/vel/rl/policy/trpo.py b/vel/rl/policy/trpo.py index 4d614cd8..8c041c6f 100644 --- a/vel/rl/policy/trpo.py +++ b/vel/rl/policy/trpo.py @@ -1,6 +1,7 @@ import gym import numpy as np import itertools as it +import typing import torch import torch.autograd as autograd @@ -13,8 +14,9 @@ from vel.rl.api import Rollout, Trajectories, RlPolicy from vel.rl.discount_bootstrap import discount_bootstrap_gae -from vel.rl.module.stochastic_action_head import StochasticActionHead -from vel.rl.module.value_head import ValueHead +from vel.rl.module.head.stochastic_action_head import make_stockastic_action_head +from vel.rl.module.head.value_head import ValueHead +from vel.util.situational import observation_space_to_size_hint def p2v(params): @@ -57,11 +59,11 @@ def conjugate_gradient_method(matrix_vector_operator, loss_gradient, nsteps, rdo class TRPO(RlPolicy): """ Trust Region Policy Optimization - https://arxiv.org/abs/1502.05477 """ - def __init__(self, - policy_net: BackboneNetwork, value_net: BackboneNetwork, - action_space: gym.Space, + def __init__(self, policy_net: BackboneNetwork, value_net: BackboneNetwork, action_space: gym.Space, max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, - discount_factor, gae_lambda, improvement_acceptance_ratio): + discount_factor, gae_lambda, improvement_acceptance_ratio, + input_net: typing.Optional[BackboneNetwork] = None, + ): super().__init__(discount_factor) self.mak_kl = max_kl @@ -73,10 +75,11 @@ def __init__(self, self.gae_lambda = gae_lambda self.improvement_acceptance_ratio = improvement_acceptance_ratio + self.input_net = input_net self.policy_net = policy_net self.value_net = value_net - self.action_head = StochasticActionHead( + self.action_head = make_stockastic_action_head( action_space=action_space, input_dim=self.policy_net.size_hints().assert_single(2).last() ) @@ -87,6 +90,9 @@ def __init__(self, def reset_weights(self): """ Initialize properly model weights """ + if self.input_net: + self.input_net.reset_weights() + self.policy_net.reset_weights() self.value_net.reset_weights() @@ -95,23 +101,28 @@ def reset_weights(self): def forward(self, observations): """ Calculate model outputs """ - policy_base_output = self.policy_net(observations) - value_base_output = self.value_net(observations) + if self.input_net is not None: + normalized_observations = self.input_net(observations) + else: + normalized_observations = observations + + policy_base_output = self.policy_net(normalized_observations) + value_base_output = self.value_net(normalized_observations) action_output = self.action_head(policy_base_output) value_output = self.value_head(value_base_output) return action_output, value_output - def value(self, observations, state=None): + def _value(self, normalized_observations): """ Calculate only value head for given state """ - base_output = self.value_net(observations) + base_output = self.value_net(normalized_observations) value_output = self.value_head(base_output) return value_output - def policy(self, observations): + def _policy(self, normalized_observations): """ Calculate only action head for given state """ - policy_base_output = self.policy_net(observations) + policy_base_output = self.policy_net(normalized_observations) policy_params = self.action_head(policy_base_output) return policy_params @@ -174,10 +185,16 @@ def optimize(self, batch_info: BatchInfo, rollout: Rollout) -> dict: rollout = rollout.to_transitions() observations = rollout.batch_tensor('observations') + + if self.input_net is not None: + normalized_observations = self.input_net(observations) + else: + normalized_observations = observations + returns = rollout.batch_tensor('returns') # Evaluate model on the observations - action_pd_params = self.policy(observations) + action_pd_params = self._policy(normalized_observations) policy_entropy = torch.mean(self.action_head.entropy(action_pd_params)) policy_loss = self.calc_policy_loss(action_pd_params, policy_entropy, rollout) @@ -205,7 +222,8 @@ def optimize(self, batch_info: BatchInfo, rollout: Rollout) -> dict: (policy_optimization_success, ratio, policy_loss_improvement, new_policy_loss, kl_divergence_step) = ( self.line_search( - rollout, policy_loss, action_pd_params, original_parameter_vec, full_step, expected_improvement + normalized_observations, rollout, policy_loss, action_pd_params, original_parameter_vec, + full_step, expected_improvement ) ) @@ -213,7 +231,7 @@ def optimize(self, batch_info: BatchInfo, rollout: Rollout) -> dict: for i in range(self.vf_iters): batch_info.optimizer.zero_grad() - value_loss = self.value_loss(observations, returns) + value_loss = self._value_loss(normalized_observations, returns) value_loss.backward() @@ -238,7 +256,7 @@ def optimize(self, batch_info: BatchInfo, rollout: Rollout) -> dict: 'explained_variance': explained_variance(returns, rollout.batch_tensor('values')) } - def line_search(self, rollout, original_policy_loss, original_policy_params, original_parameter_vec, + def line_search(self, normalized_observations, rollout, original_policy_loss, original_policy_params, original_parameter_vec, full_step, expected_improvement_full): """ Find the right stepsize to make sure policy improves """ current_parameter_vec = original_parameter_vec.clone() @@ -253,7 +271,7 @@ def line_search(self, rollout, original_policy_loss, original_policy_params, ori # Calculate new loss with torch.no_grad(): - policy_params = self.policy(rollout.batch_tensor('observations')) + policy_params = self._policy(normalized_observations) policy_entropy = torch.mean(self.action_head.entropy(policy_params)) kl_divergence = torch.mean(self.action_head.kl_divergence(original_policy_params, policy_params)) @@ -289,9 +307,9 @@ def fisher_vector_product(self, vector, kl_divergence_gradient): return fvp + vector * self.cg_damping - def value_loss(self, observations, returns): + def _value_loss(self, normalized_observations, returns): """ Loss of value function head """ - value_outputs = self.value(observations) + value_outputs = self._value(normalized_observations) value_loss = 0.5 * F.mse_loss(value_outputs, returns) return value_loss @@ -337,9 +355,10 @@ class TRPOFactory(ModelFactory): """ Factory class for policy gradient models """ def __init__(self, policy_net: ModelFactory, value_net: ModelFactory, max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, - discount_factor, gae_lambda, improvement_acceptance_ratio): + discount_factor, gae_lambda, improvement_acceptance_ratio, input_net: typing.Optional[ModelFactory]): self.policy_net = policy_net self.value_net = value_net + self.input_net = input_net self.entropy_coefficient = entropy_coefficient self.mak_kl = max_kl @@ -354,13 +373,23 @@ def __init__(self, policy_net: ModelFactory, value_net: ModelFactory, def instantiate(self, **extra_args): """ Instantiate the model """ action_space = extra_args.pop('action_space') + observation_space = extra_args.pop('observation_space') + + size_hint = observation_space_to_size_hint(observation_space) + + if self.input_net is None: + input_net = None + else: + input_net = self.input_net.instantiate(size_hint=size_hint, **extra_args) + size_hint = input_net.size_hints() - policy_net = self.policy_net.instantiate(**extra_args) - value_net = self.value_net.instantiate(**extra_args) + policy_net = self.policy_net.instantiate(size_hint=size_hint, **extra_args) + value_net = self.value_net.instantiate(size_hint=size_hint, **extra_args) return TRPO( policy_net=policy_net, value_net=value_net, + input_net=input_net, action_space=action_space, max_kl=self.mak_kl, cg_iters=self.cg_iters, @@ -376,12 +405,13 @@ def instantiate(self, **extra_args): def create(policy_net: ModelFactory, value_net: ModelFactory, max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, - discount_factor, gae_lambda, improvement_acceptance_ratio): + discount_factor, gae_lambda, improvement_acceptance_ratio, input_net: typing.Optional[ModelFactory]=None): """ Vel factory function """ return TRPOFactory( policy_net=policy_net, value_net=value_net, + input_net=input_net, max_kl=max_kl, cg_iters=cg_iters, line_search_iters=line_search_iters, diff --git a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py index f48183be..92c30a5d 100644 --- a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py @@ -148,7 +148,7 @@ def __init__(self, settings, env_factory: VecEnvFactory, model_factory: ModelFac def instantiate(self, device: torch.device) -> Reinforcer: env = self.env_factory.instantiate(parallel_envs=self.parallel_envs, seed=self.seed) - policy = self.model_factory.instantiate(action_space=env.action_space) + policy = self.model_factory.instantiate(action_space=env.action_space, observation_space=env.observation_space) env_roller = self.env_roller_factory.instantiate(environment=env, policy=policy, device=device) return BufferedMixedPolicyIterationReinforcer(device, self.settings, env, policy, env_roller) diff --git a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py index c42751dc..9deeb210 100644 --- a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py @@ -165,7 +165,7 @@ def __init__(self, settings, env_factory: VecEnvFactory, model_factory: ModelFac def instantiate(self, device: torch.device) -> BufferedOffPolicyIterationReinforcer: env = self.env_factory.instantiate(parallel_envs=self.parallel_envs, seed=self.seed) - policy = self.model_factory.instantiate(action_space=env.action_space) + policy = self.model_factory.instantiate(action_space=env.action_space, observation_space=env.observation_space) env_roller = self.env_roller_factory.instantiate(environment=env, policy=policy, device=device) return BufferedOffPolicyIterationReinforcer( diff --git a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py index d2d0a50d..64af89e5 100644 --- a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py @@ -158,7 +158,7 @@ def __init__(self, settings, parallel_envs: int, env_factory: VecEnvFactory, mod def instantiate(self, device: torch.device) -> Reinforcer: env = self.env_factory.instantiate(parallel_envs=self.parallel_envs, seed=self.seed) - policy = self.model_factory.instantiate(action_space=env.action_space) + policy = self.model_factory.instantiate(action_space=env.action_space, observation_space=env.observation_space) env_roller = self.env_roller_factory.instantiate(environment=env, policy=policy, device=device) return OnPolicyIterationReinforcer(device, self.settings, policy, env_roller) diff --git a/vel/util/situational.py b/vel/util/situational.py index 56bb25a8..c0f9c55a 100644 --- a/vel/util/situational.py +++ b/vel/util/situational.py @@ -1,5 +1,8 @@ +import gym import typing +from vel.api import SizeHints, SizeHint + def process_environment_settings(default_dictionary: dict, settings: typing.Optional[dict] = None, presets: typing.Optional[dict] = None): @@ -25,3 +28,17 @@ def process_environment_settings(default_dictionary: dict, settings: typing.Opti result_dict[key] = new_dict return result_dict + + +def observation_space_to_size_hint(space: gym.Space) -> SizeHints: + """ Convert Gym observation space to size hints """ + if isinstance(space, gym.spaces.Box): + return size_hint_from_shape(space.shape) + else: + raise NotImplementedError + + +def size_hint_from_shape(shape: typing.Tuple[int]) -> SizeHints: + """ Convert tensor shape (without batch dimension) into a size hint """ + return SizeHints(SizeHint(*([None] + list(shape)))) + diff --git a/vel/util/tensor_accumulator.py b/vel/util/tensor_accumulator.py deleted file mode 100644 index fd13c2d9..00000000 --- a/vel/util/tensor_accumulator.py +++ /dev/null @@ -1,16 +0,0 @@ -import collections - -import torch - - -class TensorAccumulator: - """ Buffer for tensors that will be stacked together """ - def __init__(self): - self.accumulants = collections.defaultdict(list) - - def add(self, name, tensor): - self.accumulants[name].append(tensor) - - def result(self): - """ Concatenate accumulated tensors """ - return {k: torch.stack(v) for k, v in self.accumulants.items()} diff --git a/vel/util/tensor_util.py b/vel/util/tensor_util.py index 554ce2d2..29f41ab3 100644 --- a/vel/util/tensor_util.py +++ b/vel/util/tensor_util.py @@ -1,4 +1,5 @@ import torch +import collections def one_hot_encoding(input_tensor, num_labels): @@ -32,3 +33,16 @@ def to_device(tensor, device: torch.device): return tuple(to_device(v, device) for v in tensor) else: raise NotImplementedError + + +class TensorAccumulator: + """ Buffer for tensors that will be stacked together """ + def __init__(self): + self.accumulants = collections.defaultdict(list) + + def add(self, name, tensor): + self.accumulants[name].append(tensor) + + def result(self): + """ Concatenate accumulated tensors """ + return {k: torch.stack(v) for k, v in self.accumulants.items()} From 6c8a61863fc74f7c3868ca897d9da1ec3400e883 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 3 Oct 2019 12:04:28 -0700 Subject: [PATCH 111/162] Disable integration tests for now. --- vel/rl/test/test_integration.py | 432 ++++++++++++++++---------------- 1 file changed, 216 insertions(+), 216 deletions(-) diff --git a/vel/rl/test/test_integration.py b/vel/rl/test/test_integration.py index dfac3a61..cc488751 100644 --- a/vel/rl/test/test_integration.py +++ b/vel/rl/test/test_integration.py @@ -1,219 +1,219 @@ -import torch -import torch.optim as optim - -from vel.module.input.image_to_tensor import ImageToTensorFactory -from vel.module.input.normalize_observations import NormalizeObservationsFactory -from vel.rl.buffer.circular_replay_buffer import CircularReplayBuffer -from vel.rl.buffer.prioritized_circular_replay_buffer import PrioritizedCircularReplayBuffer -from vel.rl.command.rl_train_command import FrameTracker -from vel.rl.env_roller.step_env_roller import StepEnvRoller -from vel.rl.env_roller.trajectory_replay_env_roller import TrajectoryReplayEnvRoller -from vel.rl.env_roller.transition_replay_env_roller import TransitionReplayEnvRoller -from vel.rl.metrics import EpisodeRewardMetric -from vel.rl.module.noise.eps_greedy import EpsGreedy -from vel.rl.module.noise.ou_noise import OuNoise -from vel.function.linear import LinearSchedule -from vel.function.linear_and_constant import LinearAndConstantSchedule -from vel.util.random import set_seed - -from vel.rl.env.classic_atari import ClassicAtariEnv -from vel.rl.env.mujoco import MujocoEnv -from vel.rl.vecenv.subproc import SubprocVecEnvWrapper -from vel.rl.vecenv.dummy import DummyVecEnvWrapper - -from vel.rl.policy.stochastic_policy import StochasticPolicyFactory -# from vel.rl.model.q_stochastic_policy_model import QStochasticPolicyModelFactory -# from vel.rl.model.q_model import QModelFactory -# from vel.rl.model.deterministic_policy_model import DeterministicPolicyModelFactory -# from vel.rl.model.stochastic_policy_model_separate import StochasticPolicyModelSeparateFactory - -from vel.rl.backbone.nature_cnn import NatureCnnFactory -from vel.rl.backbone.mlp import MLPFactory - -from vel.rl.reinforcer.on_policy_iteration_reinforcer import ( - OnPolicyIterationReinforcer, OnPolicyIterationReinforcerSettings -) - -from vel.rl.reinforcer.buffered_off_policy_iteration_reinforcer import ( - BufferedOffPolicyIterationReinforcer, BufferedOffPolicyIterationReinforcerSettings -) - -from vel.rl.reinforcer.buffered_mixed_policy_iteration_reinforcer import ( - BufferedMixedPolicyIterationReinforcer, BufferedMixedPolicyIterationReinforcerSettings -) - -from vel.rl.algo.dqn import DeepQLearning -from vel.rl.algo.policy_gradient.a2c import A2CPolicyGradient -from vel.rl.algo.policy_gradient.ppo import PpoPolicyGradient -from vel.rl.algo.policy_gradient.trpo import TrpoPolicyGradient -from vel.rl.algo.policy_gradient.acer import AcerPolicyGradient -from vel.rl.algo.policy_gradient.ddpg import DeepDeterministicPolicyGradient - -from vel.api.info import TrainingInfo, EpochInfo - - -CPU_DEVICE = torch.device('cpu') - - -def test_a2c_breakout(): - """ - Simple 1 iteration of a2c breakout - """ - seed = 1001 - - # Set random seed in python std lib, numpy and pytorch - set_seed(seed) - - # Create 16 environments evaluated in parallel in sub processess with all usual DeepMind wrappers - # These are just helper functions for that - vec_env = SubprocVecEnvWrapper( - ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 - ).instantiate(parallel_envs=16, seed=seed) - - # Again, use a helper to create a model - # But because model is owned by the reinforcer, model should not be accessed using this variable - # but from reinforcer.model property - policy = StochasticPolicyFactory( - input_block=ImageToTensorFactory(), - backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) - ).instantiate(action_space=vec_env.action_space) - - # Reinforcer - an object managing the learning process - reinforcer = OnPolicyIterationReinforcer( - device=CPU_DEVICE, - settings=OnPolicyIterationReinforcerSettings( - batch_size=256, - number_of_steps=5 - ), - policy=policy, - algo=A2CPolicyGradient( - entropy_coefficient=0.01, - value_coefficient=0.5, - discount_factor=0.99, - max_grad_norm=0.5 - ), - env_roller=StepEnvRoller( - environment=vec_env, - policy=policy, - device=CPU_DEVICE - ) - ) - - # Model optimizer - optimizer = optim.RMSprop(reinforcer.policy.parameters(), lr=7.0e-4, eps=1e-3) - - # Overall information store for training information - training_info = TrainingInfo( - metrics=[ - EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode - ], - callbacks=[] # Print live metrics every epoch to standard output - ) - - # A bit of training initialization bookkeeping... - training_info.initialize() - reinforcer.initialize_training(training_info) - training_info.on_train_begin() - - # Let's make 100 batches per epoch to average metrics nicely - num_epochs = 1 - - # Normal handrolled training loop - for i in range(1, num_epochs+1): - epoch_info = EpochInfo( - training_info=training_info, - global_epoch_idx=i, - batches_per_epoch=1, - optimizer=optimizer - ) - - reinforcer.train_epoch(epoch_info, interactive=False) - - training_info.on_train_end() - - -def test_ppo_breakout(): - """ - Simple 1 iteration of ppo breakout - """ - device = torch.device('cpu') - seed = 1001 - - # Set random seed in python std lib, numpy and pytorch - set_seed(seed) - - # Create 16 environments evaluated in parallel in sub processess with all usual DeepMind wrappers - # These are just helper functions for that - vec_env = SubprocVecEnvWrapper( - ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 - ).instantiate(parallel_envs=8, seed=seed) - - # Again, use a helper to create a model - # But because model is owned by the reinforcer, model should not be accessed using this variable - # but from reinforcer.model property - policy = StochasticPolicyFactory( - input_block=ImageToTensorFactory(), - backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) - ).instantiate(action_space=vec_env.action_space) - - # Reinforcer - an object managing the learning process - reinforcer = OnPolicyIterationReinforcer( - device=device, - settings=OnPolicyIterationReinforcerSettings( - number_of_steps=12, - batch_size=4, - experience_replay=2, - ), - policy=policy, - algo=PpoPolicyGradient( - entropy_coefficient=0.01, - value_coefficient=0.5, - max_grad_norm=0.5, - cliprange=LinearSchedule(0.1, 0.0), - discount_factor=0.99, - normalize_advantage=True - ), - env_roller=StepEnvRoller( - environment=vec_env, - policy=policy, - device=device, - ) - ) - - # Model optimizer - # optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=7.0e-4, eps=1e-3) - optimizer = optim.Adam(reinforcer.policy.parameters(), lr=2.5e-4, eps=1e-5) - - # Overall information store for training information - training_info = TrainingInfo( - metrics=[ - EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode - ], - callbacks=[ - FrameTracker(100_000) - ] # Print live metrics every epoch to standard output - ) - - # A bit of training initialization bookkeeping... - training_info.initialize() - reinforcer.initialize_training(training_info) - training_info.on_train_begin() - - # Let's make 100 batches per epoch to average metrics nicely - num_epochs = 1 - - # Normal handrolled training loop - for i in range(1, num_epochs+1): - epoch_info = EpochInfo( - training_info=training_info, - global_epoch_idx=i, - batches_per_epoch=1, - optimizer=optimizer - ) - - reinforcer.train_epoch(epoch_info, interactive=False) - - training_info.on_train_end() +# import torch +# import torch.optim as optim +# +# from vel.module.input.image_to_tensor import ImageToTensorFactory +# from vel.module.input.normalize_observations import NormalizeObservationsFactory +# from vel.rl.buffer.circular_replay_buffer import CircularReplayBuffer +# from vel.rl.buffer.prioritized_circular_replay_buffer import PrioritizedCircularReplayBuffer +# from vel.rl.command.rl_train_command import FrameTracker +# from vel.rl.env_roller.step_env_roller import StepEnvRoller +# from vel.rl.env_roller.trajectory_replay_env_roller import TrajectoryReplayEnvRoller +# from vel.rl.env_roller.transition_replay_env_roller import TransitionReplayEnvRoller +# from vel.rl.metrics import EpisodeRewardMetric +# from vel.rl.module.noise.eps_greedy import EpsGreedy +# from vel.rl.module.noise.ou_noise import OuNoise +# from vel.function.linear import LinearSchedule +# from vel.function.linear_and_constant import LinearAndConstantSchedule +# from vel.util.random import set_seed +# +# from vel.rl.env.classic_atari import ClassicAtariEnv +# from vel.rl.env.mujoco import MujocoEnv +# from vel.rl.vecenv.subproc import SubprocVecEnvWrapper +# from vel.rl.vecenv.dummy import DummyVecEnvWrapper +# +# from vel.rl.policy.stochastic_policy import StochasticPolicyFactory +# # from vel.rl.model.q_stochastic_policy_model import QStochasticPolicyModelFactory +# # from vel.rl.model.q_model import QModelFactory +# # from vel.rl.model.deterministic_policy_model import DeterministicPolicyModelFactory +# # from vel.rl.model.stochastic_policy_model_separate import StochasticPolicyModelSeparateFactory +# +# from vel.rl.backbone.nature_cnn import NatureCnnFactory +# from vel.rl.backbone.mlp import MLPFactory +# +# from vel.rl.reinforcer.on_policy_iteration_reinforcer import ( +# OnPolicyIterationReinforcer, OnPolicyIterationReinforcerSettings +# ) +# +# from vel.rl.reinforcer.buffered_off_policy_iteration_reinforcer import ( +# BufferedOffPolicyIterationReinforcer, BufferedOffPolicyIterationReinforcerSettings +# ) +# +# from vel.rl.reinforcer.buffered_mixed_policy_iteration_reinforcer import ( +# BufferedMixedPolicyIterationReinforcer, BufferedMixedPolicyIterationReinforcerSettings +# ) +# +# from vel.rl.algo.dqn import DeepQLearning +# from vel.rl.algo.policy_gradient.a2c import A2CPolicyGradient +# from vel.rl.algo.policy_gradient.ppo import PpoPolicyGradient +# from vel.rl.algo.policy_gradient.trpo import TrpoPolicyGradient +# from vel.rl.algo.policy_gradient.acer import AcerPolicyGradient +# from vel.rl.algo.policy_gradient.ddpg import DeepDeterministicPolicyGradient +# +# from vel.api.info import TrainingInfo, EpochInfo +# +# +# CPU_DEVICE = torch.device('cpu') +# +# +# def test_a2c_breakout(): +# """ +# Simple 1 iteration of a2c breakout +# """ +# seed = 1001 +# +# # Set random seed in python std lib, numpy and pytorch +# set_seed(seed) +# +# # Create 16 environments evaluated in parallel in sub processess with all usual DeepMind wrappers +# # These are just helper functions for that +# vec_env = SubprocVecEnvWrapper( +# ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 +# ).instantiate(parallel_envs=16, seed=seed) +# +# # Again, use a helper to create a model +# # But because model is owned by the reinforcer, model should not be accessed using this variable +# # but from reinforcer.model property +# policy = StochasticPolicyFactory( +# input_block=ImageToTensorFactory(), +# backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) +# ).instantiate(action_space=vec_env.action_space) +# +# # Reinforcer - an object managing the learning process +# reinforcer = OnPolicyIterationReinforcer( +# device=CPU_DEVICE, +# settings=OnPolicyIterationReinforcerSettings( +# batch_size=256, +# number_of_steps=5 +# ), +# policy=policy, +# algo=A2CPolicyGradient( +# entropy_coefficient=0.01, +# value_coefficient=0.5, +# discount_factor=0.99, +# max_grad_norm=0.5 +# ), +# env_roller=StepEnvRoller( +# environment=vec_env, +# policy=policy, +# device=CPU_DEVICE +# ) +# ) +# +# # Model optimizer +# optimizer = optim.RMSprop(reinforcer.policy.parameters(), lr=7.0e-4, eps=1e-3) +# +# # Overall information store for training information +# training_info = TrainingInfo( +# metrics=[ +# EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode +# ], +# callbacks=[] # Print live metrics every epoch to standard output +# ) +# +# # A bit of training initialization bookkeeping... +# training_info.initialize() +# reinforcer.initialize_training(training_info) +# training_info.on_train_begin() +# +# # Let's make 100 batches per epoch to average metrics nicely +# num_epochs = 1 +# +# # Normal handrolled training loop +# for i in range(1, num_epochs+1): +# epoch_info = EpochInfo( +# training_info=training_info, +# global_epoch_idx=i, +# batches_per_epoch=1, +# optimizer=optimizer +# ) +# +# reinforcer.train_epoch(epoch_info, interactive=False) +# +# training_info.on_train_end() +# +# +# def test_ppo_breakout(): +# """ +# Simple 1 iteration of ppo breakout +# """ +# device = torch.device('cpu') +# seed = 1001 +# +# # Set random seed in python std lib, numpy and pytorch +# set_seed(seed) +# +# # Create 16 environments evaluated in parallel in sub processess with all usual DeepMind wrappers +# # These are just helper functions for that +# vec_env = SubprocVecEnvWrapper( +# ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 +# ).instantiate(parallel_envs=8, seed=seed) +# +# # Again, use a helper to create a model +# # But because model is owned by the reinforcer, model should not be accessed using this variable +# # but from reinforcer.model property +# policy = StochasticPolicyFactory( +# input_block=ImageToTensorFactory(), +# backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) +# ).instantiate(action_space=vec_env.action_space) +# +# # Reinforcer - an object managing the learning process +# reinforcer = OnPolicyIterationReinforcer( +# device=device, +# settings=OnPolicyIterationReinforcerSettings( +# number_of_steps=12, +# batch_size=4, +# experience_replay=2, +# ), +# policy=policy, +# algo=PpoPolicyGradient( +# entropy_coefficient=0.01, +# value_coefficient=0.5, +# max_grad_norm=0.5, +# cliprange=LinearSchedule(0.1, 0.0), +# discount_factor=0.99, +# normalize_advantage=True +# ), +# env_roller=StepEnvRoller( +# environment=vec_env, +# policy=policy, +# device=device, +# ) +# ) +# +# # Model optimizer +# # optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=7.0e-4, eps=1e-3) +# optimizer = optim.Adam(reinforcer.policy.parameters(), lr=2.5e-4, eps=1e-5) +# +# # Overall information store for training information +# training_info = TrainingInfo( +# metrics=[ +# EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode +# ], +# callbacks=[ +# FrameTracker(100_000) +# ] # Print live metrics every epoch to standard output +# ) +# +# # A bit of training initialization bookkeeping... +# training_info.initialize() +# reinforcer.initialize_training(training_info) +# training_info.on_train_begin() +# +# # Let's make 100 batches per epoch to average metrics nicely +# num_epochs = 1 +# +# # Normal handrolled training loop +# for i in range(1, num_epochs+1): +# epoch_info = EpochInfo( +# training_info=training_info, +# global_epoch_idx=i, +# batches_per_epoch=1, +# optimizer=optimizer +# ) +# +# reinforcer.train_epoch(epoch_info, interactive=False) +# +# training_info.on_train_end() # def test_dqn_breakout(): From c0a012851ad5a4f13956bec74940211a40188474 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 3 Oct 2019 12:09:03 -0700 Subject: [PATCH 112/162] Remove evaluator cache. --- vel/rl/test/test_evaluator_cache.py | 39 ----------------------------- 1 file changed, 39 deletions(-) delete mode 100644 vel/rl/test/test_evaluator_cache.py diff --git a/vel/rl/test/test_evaluator_cache.py b/vel/rl/test/test_evaluator_cache.py deleted file mode 100644 index 1f0b3724..00000000 --- a/vel/rl/test/test_evaluator_cache.py +++ /dev/null @@ -1,39 +0,0 @@ -from vel.rl.api import Evaluator, Rollout - -calls = { - "a": 0, - "b": 0, - "c": 0, -} - -class TestEvaluator(Evaluator): - @Evaluator.provides('test:a') - def test_a(self): - calls["a"] += 1 - - @Evaluator.provides('test:b', cache=False) - def test_b(self): - calls["b"] += 1 - - @Evaluator.provides('test:c') - def test_c(self): - calls["c"] += 1 - - -def test_evaluator(): - e = TestEvaluator(Rollout()) - e.get("test:a") - e.get("test:a") - e.get("test:a") - - e.get("test:b") - e.get("test:b") - e.get("test:b") - - e.get("test:c") - e.get("test:c") - e.get("test:c", cache=False) - - assert calls["a"] == 1 # test:a is cached so just one call - assert calls["b"] == 3 # test:b is never cached so three calls - assert calls["c"] == 2 # test:c is cached but one call is not so two calls \ No newline at end of file From c6f6e727148abb492d6afefb5938658aade8a440 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 3 Oct 2019 14:16:54 -0700 Subject: [PATCH 113/162] Brought back DDPG. --- examples-configs/rl/atari/atari_a2c.yaml | 2 +- .../rl/atari/atari_a2c_tf_rmsprop.yaml | 2 +- examples-configs/rl/atari/atari_acer.yaml | 2 +- examples-configs/rl/atari/atari_ppo.yaml | 2 +- .../rl/mujoco/ddpg/half_cheetah_ddpg.yaml | 93 --------- examples-configs/rl/mujoco/mujoco_a2c.yaml | 2 +- examples-configs/rl/mujoco/mujoco_ddpg.yaml | 85 ++++++++ examples-configs/rl/mujoco/mujoco_ppo.yaml | 2 +- vel/api/size_hint.py | 7 +- vel/internal/test/test_provider.py | 2 +- vel/module/input/embedding.py | 4 +- vel/module/input/flatten.py | 11 +- vel/module/input/identity.py | 21 -- vel/net/layer/util/concat.py | 59 ++++++ .../util/{repeat_tensor.py => repeat.py} | 0 vel/rl/api/policy.py | 6 + .../trajectory_replay_env_roller.py | 2 + .../transition_replay_env_roller.py | 2 + vel/rl/module/actor_critic_policy.py | 87 ++++++++ vel/rl/module/noise/ou_noise.py | 7 +- vel/rl/module/test/test_action_head.py | 2 +- vel/rl/policy/a2c.py | 4 +- vel/rl/policy/acer.py | 12 +- vel/rl/policy/ddpg.py | 197 ++++++++++++++++++ vel/rl/policy/dqn.py | 12 +- vel/rl/policy/ppo.py | 4 +- vel/rl/policy/purgatory/ddpg.py | 94 --------- vel/rl/policy/rainbow.py | 12 +- vel/rl/policy/trpo.py | 4 +- vel/rl/util/actor.py | 5 +- .../xpolicy/purgatory/deterministic_policy.py | 164 --------------- vel/util/situational.py | 2 +- 32 files changed, 495 insertions(+), 415 deletions(-) delete mode 100644 examples-configs/rl/mujoco/ddpg/half_cheetah_ddpg.yaml create mode 100644 examples-configs/rl/mujoco/mujoco_ddpg.yaml delete mode 100644 vel/module/input/identity.py create mode 100644 vel/net/layer/util/concat.py rename vel/net/layer/util/{repeat_tensor.py => repeat.py} (100%) create mode 100644 vel/rl/module/actor_critic_policy.py create mode 100644 vel/rl/policy/ddpg.py delete mode 100644 vel/rl/policy/purgatory/ddpg.py delete mode 100644 vel/rl/xpolicy/purgatory/deterministic_policy.py diff --git a/examples-configs/rl/atari/atari_a2c.yaml b/examples-configs/rl/atari/atari_a2c.yaml index fa5fdcab..f4e6e76f 100644 --- a/examples-configs/rl/atari/atari_a2c.yaml +++ b/examples-configs/rl/atari/atari_a2c.yaml @@ -23,7 +23,7 @@ model: layers: - name: vel.net.layer.input.image_to_tensor - name: vel.rl.layer.nature_cnn - - name: vel.net.layer.util.repeat_tensor + - name: vel.net.layer.util.repeat times: 2 # Need to repeat output twice, for action and value heads diff --git a/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml b/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml index fc398e5a..0b013bed 100644 --- a/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml +++ b/examples-configs/rl/atari/atari_a2c_tf_rmsprop.yaml @@ -23,7 +23,7 @@ model: layers: - name: vel.net.layer.input.image_to_tensor - name: vel.rl.layer.nature_cnn - - name: vel.net.layer.util.repeat_tensor + - name: vel.net.layer.util.repeat times: 2 # Need to repeat output twice, for action and value heads diff --git a/examples-configs/rl/atari/atari_acer.yaml b/examples-configs/rl/atari/atari_acer.yaml index e0bff147..cde689a8 100644 --- a/examples-configs/rl/atari/atari_acer.yaml +++ b/examples-configs/rl/atari/atari_acer.yaml @@ -30,7 +30,7 @@ model: layers: - name: vel.net.layer.input.image_to_tensor - name: vel.rl.layer.nature_cnn - - name: vel.net.layer.util.repeat_tensor + - name: vel.net.layer.util.repeat times: 2 # Need to repeat output twice, for action and value heads diff --git a/examples-configs/rl/atari/atari_ppo.yaml b/examples-configs/rl/atari/atari_ppo.yaml index 882926a9..617b8a1f 100644 --- a/examples-configs/rl/atari/atari_ppo.yaml +++ b/examples-configs/rl/atari/atari_ppo.yaml @@ -30,7 +30,7 @@ model: layers: - name: vel.net.layer.input.image_to_tensor - name: vel.rl.layer.nature_cnn - - name: vel.net.layer.util.repeat_tensor + - name: vel.net.layer.util.repeat times: 2 # Need to repeat output twice, for action and value heads diff --git a/examples-configs/rl/mujoco/ddpg/half_cheetah_ddpg.yaml b/examples-configs/rl/mujoco/ddpg/half_cheetah_ddpg.yaml deleted file mode 100644 index 978227b5..00000000 --- a/examples-configs/rl/mujoco/ddpg/half_cheetah_ddpg.yaml +++ /dev/null @@ -1,93 +0,0 @@ -name: 'half_cheetah_ddpg' - -env: - name: vel.rl.env.mujoco - game: 'HalfCheetah-v2' -# normalize_returns: true -# normalize_observations: true - - -vec_env: - name: vel.rl.vecenv.dummy - - -model: - name: vel.rl.models.deterministic_policy_model - - input_block: - name: vel.modules.input.normalize_observations - input_shape: 17 - - policy_backbone: - name: vel.rl.models.backbone.mlp - input_length: 17 - hidden_layers: [64, 64] - activation: 'tanh' -# normalization: 'layer' - - value_backbone: - name: vel.rl.models.backbone.mlp - input_length: 23 # Has to be observation size(17) + action size(6) - hidden_layers: [64, 64] - activation: 'tanh' -# normalization: 'layer' - - -reinforcer: - name: vel.rl.reinforcers.buffered_off_policy_iteration_reinforcer - - env_roller: - name: vel.rl.env_roller.transition_replay_env_roller - - replay_buffer: - name: vel.rl.buffers.circular_replay_buffer - - buffer_capacity: 1_000_000 - buffer_initial_size: 2_000 - - normalize_returns: true - discount_factor: 0.99 - - action_noise: - name: vel.rl.modules.noise.ou_noise - std_dev: 0.2 - - algo: - name: vel.rl.algo.policy_gradient.ddpg - - tau: 0.01 - discount_factor: 0.99 - - rollout_steps: 2 - training_steps: 64 - - parallel_envs: 1 - - -optimizer: - name: vel.optimizers.adam - # OpenAI has two different optimizers optimizing each network separately. - # As far as I know it should be equivalent to optimizing two separate networks together with a sum of loss functions - lr: [1.0e-4, 1.0e-3, 1.0e-3] - weight_decay: [0.0, 0.0, 0.01] - epsilon: 1.0e-4 - layer_groups: on - - -commands: - train: - name: vel.rl.commands.rl_train_command - total_frames: 1.0e6 - batches_per_epoch: 1000 - -# openai_logging: true - - record: - name: vel.rl.commands.record_movie_command - takes: 10 - videoname: 'half_cheetah_vid_{:04}.avi' - - evaluate: - name: vel.rl.commands.evaluate_env_command - takes: 100 - frame_history: 4 diff --git a/examples-configs/rl/mujoco/mujoco_a2c.yaml b/examples-configs/rl/mujoco/mujoco_a2c.yaml index 5c49c725..504871e1 100644 --- a/examples-configs/rl/mujoco/mujoco_a2c.yaml +++ b/examples-configs/rl/mujoco/mujoco_a2c.yaml @@ -26,7 +26,7 @@ model: - name: vel.net.layer.mlp hidden_layers: [64, 64] activation: 'tanh' - - name: vel.net.layer.util.repeat_tensor + - name: vel.net.layer.util.repeat times: 2 # Need to repeat output twice, for action and value heads diff --git a/examples-configs/rl/mujoco/mujoco_ddpg.yaml b/examples-configs/rl/mujoco/mujoco_ddpg.yaml new file mode 100644 index 00000000..d82f0baf --- /dev/null +++ b/examples-configs/rl/mujoco/mujoco_ddpg.yaml @@ -0,0 +1,85 @@ +name: 'mujoco_ddpg' + +env: + name: vel.rl.env.mujoco + game: !param game = 'Reacher-v2' + + +vec_env: + name: vel.rl.vecenv.dummy + normalize_returns: true + + +model: + name: vel.rl.policy.ddpg + + tau: 0.01 + discount_factor: 0.99 + noise_std_dev: 0.2 + + input_net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.normalize + + actor_net: + name: vel.net.modular + layers: + - name: vel.net.layer.mlp + hidden_layers: [64, 64] + activation: 'tanh' + + critic_net: + name: vel.net.modular + layers: + - name: vel.net.layer.util.concat # Concatenate observation and action + - name: vel.net.layer.mlp + hidden_layers: [64, 64] + activation: 'tanh' + + +reinforcer: + name: vel.rl.reinforcer.buffered_off_policy_iteration_reinforcer + + env_roller: + name: vel.rl.env_roller.transition_replay_env_roller + + replay_buffer: + name: vel.rl.buffer.circular_replay_buffer + + buffer_capacity: 1_000_000 + buffer_initial_size: 2_000 + + normalize_returns: true + discount_factor: 0.99 + + rollout_steps: 2 + training_steps: 64 + + parallel_envs: 1 + + +optimizer: + name: vel.optimizer.adam + # OpenAI has two different optimizers optimizing each network separately. + # As far as I know it should be equivalent to optimizing two separate networks together with a sum of loss functions + lr: [1.0e-4, 1.0e-3] + weight_decay: [0.0, 0.0] + epsilon: 1.0e-4 + + +commands: + train: + name: vel.rl.command.rl_train_command + total_frames: 1.0e6 + batches_per_epoch: 1000 + + record: + name: vel.rl.command.record_movie_command + takes: 10 + videoname: 'half_cheetah_vid_{:04}.avi' + + evaluate: + name: vel.rl.command.evaluate_env_command + takes: 100 + frame_history: 4 diff --git a/examples-configs/rl/mujoco/mujoco_ppo.yaml b/examples-configs/rl/mujoco/mujoco_ppo.yaml index df6db42c..63de365a 100644 --- a/examples-configs/rl/mujoco/mujoco_ppo.yaml +++ b/examples-configs/rl/mujoco/mujoco_ppo.yaml @@ -26,7 +26,7 @@ model: name: vel.net.modular layers: - name: vel.net.layer.input.normalize - - name: vel.net.layer.util.repeat_tensor + - name: vel.net.layer.util.repeat times: 2 # Need to repeat output twice, to consume by the 'parallel' layers - name: vel.net.layer.arch.parallel layers: diff --git a/vel/api/size_hint.py b/vel/api/size_hint.py index 78851126..8263c0a5 100644 --- a/vel/api/size_hint.py +++ b/vel/api/size_hint.py @@ -57,10 +57,13 @@ def __init__(self, size_hints: typing.Union[SizeHint, SizeTuple, SizeDict] = Non else: raise VelException("Invalid size hints: {}".format(self.size_hints)) - def assert_tuple(self, length) -> SizeTuple: + def assert_tuple(self, length : typing.Optional[int] = None) -> SizeTuple: """ Assert given size hints is a tuple """ assert self.type == self.TYPE_TUPLE, "Network needs to return a tuple" - assert len(self.size_hints) == length, "Network must return {} results".format(length) + + if length is not None: + assert len(self.size_hints) == length, "Network must return {} results".format(length) + return self.size_hints def assert_single(self, length: typing.Optional[int] = None) -> SizeHint: diff --git a/vel/internal/test/test_provider.py b/vel/internal/test/test_provider.py index 7428756f..594fa74a 100644 --- a/vel/internal/test/test_provider.py +++ b/vel/internal/test/test_provider.py @@ -139,7 +139,7 @@ def test_render_configuration(): }, parameters={'xxx': 5}) - configuration = provider.render_configuration() + configuration = provider.render_environment() assert configuration == { 'a': 1, diff --git a/vel/module/input/embedding.py b/vel/module/input/embedding.py index b576ddfa..97927ff1 100644 --- a/vel/module/input/embedding.py +++ b/vel/module/input/embedding.py @@ -1,9 +1,9 @@ import torch.nn as nn -from vel.api import LinearBackboneModel, ModelFactory, LanguageSource +from vel.api import Network, LanguageSource, ModelFactory -class EmbeddingInput(LinearBackboneModel): +class EmbeddingInput(Network): """ Learnable Embedding input layer """ def __init__(self, alphabet_size: int, output_dim: int, pretrained: bool = False, frozen: bool = False, diff --git a/vel/module/input/flatten.py b/vel/module/input/flatten.py index faf424df..5e140cc0 100644 --- a/vel/module/input/flatten.py +++ b/vel/module/input/flatten.py @@ -1,10 +1,10 @@ from vel.module.layers import Flatten -from vel.api import ModelFactory, BackboneModel +from vel.api import ModelFactory, BackboneNetwork -class FlattenInput(BackboneModel): +class FlattenInput(BackboneNetwork): """ Sequence input """ def __init__(self): super().__init__() @@ -13,10 +13,3 @@ def __init__(self): def forward(self, input_data): return self.model(input_data) - -def create(): - """ Vel factory function """ - def instantiate(**_): - return Flatten() - - return ModelFactory.generic(instantiate) diff --git a/vel/module/input/identity.py b/vel/module/input/identity.py deleted file mode 100644 index 6b9ee547..00000000 --- a/vel/module/input/identity.py +++ /dev/null @@ -1,21 +0,0 @@ -from vel.api import BackboneModel, ModelFactory - - -class Identity(BackboneModel): - """ Identity transformation that doesn't do anything """ - def __init__(self): - super().__init__() - - def forward(self, x): - return x - - def reset_weights(self): - pass - - -def create(): - """ Vel factory function """ - def instantiate(**_): - return Identity() - - return ModelFactory.generic(instantiate) diff --git a/vel/net/layer/util/concat.py b/vel/net/layer/util/concat.py new file mode 100644 index 00000000..d80e2d57 --- /dev/null +++ b/vel/net/layer/util/concat.py @@ -0,0 +1,59 @@ +import torch + +from vel.api import SizeHints, SizeHint +from vel.net.layer_base import LayerFactory, Layer + + +class Concat(Layer): + """ Repeat single tensor multiple times """ + + def __init__(self, name: str, size_hints: SizeHints, axis: int = -1): + super().__init__(name) + + self.axis = axis + self._size_hints = size_hints + + def forward(self, direct, state: dict = None, context: dict = None): + return torch.cat(direct, dim=self.axis) + + def size_hints(self) -> SizeHints: + return self._size_hints + + +class ConcatFactory(LayerFactory): + def __init__(self, axis: int = -1): + self.axis = axis + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "concat" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + inputs = direct_input.assert_tuple() + + result = [] + dimension = len(inputs[0]) + + for i in range(dimension): + + if i == (self.axis % dimension): + candidates = [el[i] for el in inputs] + + if None in candidates: + result.append(None) + else: + result.append(sum(candidates)) + else: + result.append(inputs[0][i]) + + return Concat( + name=name, + axis=self.axis, + size_hints=SizeHints(SizeHint(*result)) + ) + + +def create(axis: int = -1): + """ Vel factory function """ + return ConcatFactory(axis=axis) diff --git a/vel/net/layer/util/repeat_tensor.py b/vel/net/layer/util/repeat.py similarity index 100% rename from vel/net/layer/util/repeat_tensor.py rename to vel/net/layer/util/repeat.py diff --git a/vel/rl/api/policy.py b/vel/rl/api/policy.py index 4d1109c2..d9215180 100644 --- a/vel/rl/api/policy.py +++ b/vel/rl/api/policy.py @@ -1,3 +1,5 @@ +import torch + from vel.api import OptimizedModel, VelOptimizer, OptimizerFactory, BatchInfo from vel.rl.api import Rollout @@ -22,6 +24,10 @@ def act(self, observation, state=None, deterministic=False) -> dict: """ raise NotImplementedError + def reset_episodic_state(self, dones: torch.Tensor): + """ Called by the rollout worker, whenever episode is finished """ + pass + def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: """ Create optimizer for the purpose of optimizing this model """ parameters = filter(lambda p: p.requires_grad, self.parameters()) diff --git a/vel/rl/env_roller/trajectory_replay_env_roller.py b/vel/rl/env_roller/trajectory_replay_env_roller.py index b2bd9092..a9cdc06b 100644 --- a/vel/rl/env_roller/trajectory_replay_env_roller.py +++ b/vel/rl/env_roller/trajectory_replay_env_roller.py @@ -74,6 +74,8 @@ def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: dones_tensor = torch.from_numpy(new_dones.astype(np.float32)).clone() accumulator.add('dones', dones_tensor) + self.actor.reset_states(dones_tensor) + self.last_observation_cpu = torch.from_numpy(new_obs).clone() self.last_observation = self.last_observation_cpu.to(self.device) accumulator.add('rewards', torch.from_numpy(new_rewards.astype(np.float32)).clone()) diff --git a/vel/rl/env_roller/transition_replay_env_roller.py b/vel/rl/env_roller/transition_replay_env_roller.py index 64e48f02..37c65c2f 100644 --- a/vel/rl/env_roller/transition_replay_env_roller.py +++ b/vel/rl/env_roller/transition_replay_env_roller.py @@ -89,6 +89,8 @@ def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: dones_tensor = torch.from_numpy(new_dones.astype(np.float32)).clone() accumulator.add('dones', dones_tensor) + self.actor.reset_states(dones_tensor) + self.accumulated_returns = self.accumulated_returns * (1.0 - new_dones.astype(np.float32)) self.last_observation_cpu = torch.from_numpy(new_obs).clone() diff --git a/vel/rl/module/actor_critic_policy.py b/vel/rl/module/actor_critic_policy.py new file mode 100644 index 00000000..dc6ae23f --- /dev/null +++ b/vel/rl/module/actor_critic_policy.py @@ -0,0 +1,87 @@ +import itertools as it + +import gym +import torch + +from vel.api import Network, BackboneNetwork + + +from vel.rl.module.head.deterministic_action_head import DeterministicActionHead +from vel.rl.module.head.deterministic_critic_head import DeterministicCriticHead + + +class ActorCriticPolicy(Network): + """ Deterministic Policy Gradient - model """ + + def __init__(self, input_net: BackboneNetwork, policy_net: BackboneNetwork, + value_net: BackboneNetwork, action_space: gym.Space): + super().__init__() + + self.input_net = input_net + self.policy_backbone = policy_net + self.value_backbone = value_net + + self.action_head = DeterministicActionHead( + input_dim=self.policy_backbone.size_hints().assert_single().last(), + action_space=action_space + ) + + self.critic_head = DeterministicCriticHead( + input_dim=self.value_backbone.size_hints().assert_single().last() + ) + + def layer_groups(self): + """ Grouped layers for optimization purposes """ + return [ + [self.input_net, self.policy_backbone, self.action_head], + [self.input_net, self.value_backbone, self.critic_head], + ] + + def reset_weights(self): + """ Initialize properly model weights """ + self.input_net.reset_weights() + self.policy_backbone.reset_weights() + self.value_backbone.reset_weights() + self.action_head.reset_weights() + self.critic_head.reset_weights() + + def forward(self, observations, input_actions=None): + """ Calculate model outputs """ + observations = self.input_net(observations) + + if input_actions is not None: + actions = input_actions + + value_hidden = self.value_backbone((observations, actions)) + + values = self.critic_head(value_hidden) + else: + policy_hidden = self.policy_backbone(observations) + actions = self.action_head(policy_hidden) + + # value_input = torch.cat([observations, actions], dim=1) + value_hidden = self.value_backbone((observations, actions)) + + values = self.critic_head(value_hidden) + + return actions, values + + def policy_parameters(self): + """ Parameters of policy """ + return it.chain(self.input_net(), self.policy_backbone.parameters(), self.action_head.parameters()) + + def value_parameters(self): + """ Parameters of policy """ + return it.chain(self.input_net(), self.value_backbone.parameters(), self.critic_head.parameters()) + + def value(self, observation, input_actions=None): + """ Calculate value for given state """ + action, value = self(observation, input_actions) + return value + + def action(self, observations): + """ Calculate value for given state """ + observations = self.input_net(observations) + policy_hidden = self.policy_backbone(observations) + action = self.action_head(policy_hidden) + return action diff --git a/vel/rl/module/noise/ou_noise.py b/vel/rl/module/noise/ou_noise.py index edda86f8..721b8772 100644 --- a/vel/rl/module/noise/ou_noise.py +++ b/vel/rl/module/noise/ou_noise.py @@ -1,7 +1,6 @@ import gym import numpy as np import torch -import torch.nn as nn from vel.api import Network from vel.util.process import OrnsteinUhlenbeckNoiseProcess @@ -21,13 +20,13 @@ def __init__(self, std_dev: float, action_space: gym.Space): self.register_buffer('low_tensor', torch.from_numpy(self.action_space.low).unsqueeze(0)) self.register_buffer('high_tensor', torch.from_numpy(self.action_space.high).unsqueeze(0)) - def reset_training_state(self, dones, batch_info): + def reset_episodic_state(self, dones): """ A hook for a model to react when during training episode is finished """ - for idx, done in enumerate(dones): + for idx, done in enumerate(dones.cpu()): if done > 0.5: self.processes[idx].reset() - def forward(self, actions, batch_info): + def forward(self, actions): """ Return model step after applying noise """ while len(self.processes) < actions.shape[0]: len_action_space = self.action_space.shape[-1] diff --git a/vel/rl/module/test/test_action_head.py b/vel/rl/module/test/test_action_head.py index b0364e5c..8d80e3cc 100644 --- a/vel/rl/module/test/test_action_head.py +++ b/vel/rl/module/test/test_action_head.py @@ -7,7 +7,7 @@ import torch.nn.functional as F import torch.distributions as d -from vel.rl.module.stochastic_action_head import DiagGaussianActionHead, CategoricalActionHead +from vel.rl.module.head.stochastic_action_head import DiagGaussianActionHead, CategoricalActionHead def test_sample_diag_gaussian(): diff --git a/vel/rl/policy/a2c.py b/vel/rl/policy/a2c.py index 7b23d33d..299e8c2b 100644 --- a/vel/rl/policy/a2c.py +++ b/vel/rl/policy/a2c.py @@ -3,7 +3,7 @@ import torch.nn.functional as F from vel.metric.base import AveragingNamedMetric -from vel.util.situational import observation_space_to_size_hint +from vel.util.situational import gym_space_to_size_hint from vel.util.stats import explained_variance from vel.api import ModelFactory, BatchInfo, BackboneNetwork @@ -126,7 +126,7 @@ def instantiate(self, **extra_args): action_space = extra_args.pop('action_space') observation_space = extra_args.pop('observation_space') - size_hint = observation_space_to_size_hint(observation_space) + size_hint = gym_space_to_size_hint(observation_space) net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) diff --git a/vel/rl/policy/acer.py b/vel/rl/policy/acer.py index b468eb30..e1120fa7 100644 --- a/vel/rl/policy/acer.py +++ b/vel/rl/policy/acer.py @@ -3,11 +3,11 @@ import torch import torch.nn.functional as F -from vel.api import BackboneNetwork, ModelFactory, BatchInfo +from vel.api import BackboneNetwork, ModelFactory, BatchInfo, OptimizerFactory, VelOptimizer from vel.metric.base import AveragingNamedMetric from vel.rl.api import Trajectories, RlPolicy, Rollout from vel.rl.module.q_stochastic_policy import QStochasticPolicy -from vel.util.situational import observation_space_to_size_hint +from vel.util.situational import gym_space_to_size_hint def select_indices(tensor, indices): @@ -40,9 +40,15 @@ def __init__(self, net: BackboneNetwork, target_net: typing.Optional[BackboneNet if self.trust_region: self.target_policy = QStochasticPolicy(target_net, action_space) + self.target_policy.requires_grad_(False) else: self.target_policy = None + def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: + """ Create optimizer for the purpose of optimizing this model """ + parameters = filter(lambda p: p.requires_grad, self.policy.parameters()) + return optimizer_factory.instantiate(parameters) + def train(self, mode=True): """ Override train to make sure target model is always in eval mode """ self.policy.train(mode) @@ -261,7 +267,7 @@ def instantiate(self, **extra_args): action_space = extra_args.pop('action_space') observation_space = extra_args.pop('observation_space') - size_hint = observation_space_to_size_hint(observation_space) + size_hint = gym_space_to_size_hint(observation_space) net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) diff --git a/vel/rl/policy/ddpg.py b/vel/rl/policy/ddpg.py new file mode 100644 index 00000000..0e011ecb --- /dev/null +++ b/vel/rl/policy/ddpg.py @@ -0,0 +1,197 @@ +import typing + +import gym +import torch +import torch.autograd +import torch.nn as nn +import torch.nn.functional as F + +import vel.util.module_util as mu + +from vel.api import BackboneNetwork, BatchInfo, ModelFactory, OptimizerFactory, VelOptimizer, SizeHints +from vel.metric.base import AveragingNamedMetric +from vel.rl.api import RlPolicy, Rollout +from vel.rl.module.actor_critic_policy import ActorCriticPolicy +from vel.rl.module.noise.ou_noise import OuNoise +from vel.util.situational import gym_space_to_size_hint + + +class DDPG(RlPolicy): + """ Deep Deterministic Policy Gradient (DDPG) - policy gradient calculations """ + + def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_space: gym.Space, + discount_factor: float, tau: float, noise_std_dev: float): + super().__init__(discount_factor) + + self.net = net + self.target_net = target_net + + self.tau = tau + self.discount_factor = discount_factor + + self.action_noise = OuNoise(std_dev=noise_std_dev, action_space=action_space) + + def train(self, mode=True): + """ Override train to make sure target model is always in eval mode """ + self.net.train(mode) + self.target_net.train(False) + + def reset_weights(self): + """ Initialize properly model weights """ + self.net.reset_weights() + self.target_net.load_state_dict(self.net.state_dict()) + + def reset_episodic_state(self, dones: torch.Tensor): + """ Called by the rollout worker, whenever episode is finished """ + self.action_noise.reset_episodic_state(dones) + + def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: + """ Create optimizer for the purpose of optimizing this model """ + parameter_groups = mu.to_parameter_groups(self.net.layer_groups()) + return optimizer_factory.instantiate_parameter_groups(parameter_groups) + + def forward(self, observation, state=None): + """ Calculate model outputs """ + return self.net(observation) + + def act(self, observation, state=None, deterministic=False) -> dict: + """ Select actions based on model's output """ + action, value = self(observation) + + if deterministic: + noisy_action = action + else: + noisy_action = self.action_noise(action) + + return { + 'actions': noisy_action, + 'values': value + } + + def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: + """ Calculate loss of the supplied rollout """ + rollout = rollout.to_transitions() + + dones = rollout.batch_tensor('dones') + rewards = rollout.batch_tensor('rewards') + observations_next = rollout.batch_tensor('observations_next') + actions = rollout.batch_tensor('actions') + observations = rollout.batch_tensor('observations') + + # Calculate value loss - or critic loss + with torch.no_grad(): + target_next_value = self.target_net.value(observations_next) + target_value = rewards + (1.0 - dones) * self.discount_factor * target_next_value + + # Value estimation error vs the target network + model_value = self.net.value(observations, actions) + value_loss = F.mse_loss(model_value, target_value) + + # It may seem a bit tricky what I'm doing here, but the underlying idea is simple + # All other implementations I found keep two separate optimizers for actor and critic + # and update them separately + # What I'm trying to do is to optimize them both with a single optimizer + # but I need to make sure gradients flow correctly + # From critic loss to critic network only and from actor loss to actor network only + + # Backpropagate value loss to critic only + value_loss.backward() + + model_action = self.net.action(observations) + model_action_value = self.net.value(observations, model_action) + + policy_loss = -model_action_value.mean() + + model_action_grad = torch.autograd.grad(policy_loss, model_action)[0] + + # Backpropagate actor loss to actor only + model_action.backward(gradient=model_action_grad) + + return { + 'policy_loss': policy_loss.item(), + 'value_loss': value_loss.item(), + } + + def post_optimization_step(self, batch_info: BatchInfo, rollout: Rollout): + """ Steps to take after optimization has been done""" + # Update target model + for model_param, target_param in zip(self.net.parameters(), self.target_net.parameters()): + # EWMA average model update + target_param.data.mul_(1 - self.tau).add_(model_param.data * self.tau) + + def metrics(self) -> list: + """ List of metrics to track for this learning process """ + return [ + AveragingNamedMetric("value_loss"), + AveragingNamedMetric("policy_loss"), + ] + + +class DDPGFactory(ModelFactory): + """ Factory for the DDPG policy """ + + def __init__(self, actor_net: ModelFactory, critic_net: ModelFactory, + discount_factor: float, tau: float, noise_std_dev: float, + input_net: typing.Optional[ModelFactory] = None): + self.actor_net_factory = actor_net + self.critic_net_factory = critic_net + self.input_net_factory = input_net + + self.discount_factor = discount_factor + self.tau = tau + self.noise_std_dev = noise_std_dev + + def instantiate(self, **extra_args): + """ Instantiate the model """ + action_space = extra_args.pop('action_space') + observation_space = extra_args.pop('observation_space') + + size_hint = gym_space_to_size_hint(observation_space) + action_hint = gym_space_to_size_hint(action_space) + + if self.input_net_factory is None: + target_input_net = input_net = nn.Identity() + else: + input_net = self.input_net_factory.instantiate(size_hint=size_hint, **extra_args) + target_input_net = self.input_net_factory.instantiate(size_hint=size_hint, **extra_args) + size_hint = input_net.size_hints() + + critic_size_hint = SizeHints((size_hint.unwrap(), action_hint.unwrap())) + + actor_net = self.actor_net_factory.instantiate(size_hint=size_hint, **extra_args) + critic_net = self.critic_net_factory.instantiate(size_hint=critic_size_hint, **extra_args) + + net = ActorCriticPolicy( + input_net, actor_net, critic_net, action_space + ) + + target_actor_net = self.actor_net_factory.instantiate(size_hint=size_hint, **extra_args) + target_critic_net = self.critic_net_factory.instantiate(size_hint=critic_size_hint, **extra_args) + + target_net = ActorCriticPolicy( + target_input_net, target_actor_net, target_critic_net, action_space + ) + + return DDPG( + net=net, + target_net=target_net, + action_space=action_space, + discount_factor=self.discount_factor, + tau=self.tau, + noise_std_dev=self.noise_std_dev + ) + + +def create(actor_net: ModelFactory, critic_net: ModelFactory, + discount_factor: float, tau: float, noise_std_dev: float, + input_net: typing.Optional[ModelFactory] = None + ): + """ Vel factory function """ + return DDPGFactory( + actor_net=actor_net, + critic_net=critic_net, + input_net=input_net, + discount_factor=discount_factor, + tau=tau, + noise_std_dev=noise_std_dev + ) diff --git a/vel/rl/policy/dqn.py b/vel/rl/policy/dqn.py index cb563e5d..243bb1e2 100644 --- a/vel/rl/policy/dqn.py +++ b/vel/rl/policy/dqn.py @@ -6,13 +6,13 @@ import torch.nn.functional as F import torch.nn.utils -from vel.api import ModelFactory, BackboneNetwork, BatchInfo, Schedule +from vel.api import ModelFactory, BackboneNetwork, BatchInfo, Schedule, OptimizerFactory, VelOptimizer from vel.function.constant import ConstantSchedule from vel.metric import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout from vel.rl.module.q_policy import QPolicy from vel.rl.module.noise.eps_greedy import EpsGreedy -from vel.util.situational import observation_space_to_size_hint +from vel.util.situational import gym_space_to_size_hint class DQN(RlPolicy): @@ -25,6 +25,7 @@ def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_spa self.model = QPolicy(net=net, action_space=action_space, dueling_dqn=dueling_dqn) self.target_model = QPolicy(net=target_net, action_space=action_space, dueling_dqn=dueling_dqn) + self.target_model.requires_grad_(False) self.double_dqn = double_dqn self.target_update_frequency = target_update_frequency @@ -37,6 +38,11 @@ def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_spa self.epsilon_value = self.epsilon_schedule.value(0.0) self.action_noise = EpsGreedy(action_space=action_space) + def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: + """ Create optimizer for the purpose of optimizing this model """ + parameters = filter(lambda p: p.requires_grad, self.model.parameters()) + return optimizer_factory.instantiate(parameters) + def train(self, mode=True): """ Override train to make sure target model is always in eval mode """ self.model.train(mode) @@ -142,7 +148,7 @@ def instantiate(self, **extra_args): action_space = extra_args.pop('action_space') observation_space = extra_args.pop('observation_space') - size_hint = observation_space_to_size_hint(observation_space) + size_hint = gym_space_to_size_hint(observation_space) net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) target_net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) diff --git a/vel/rl/policy/ppo.py b/vel/rl/policy/ppo.py index ea09a29c..92a8cd23 100644 --- a/vel/rl/policy/ppo.py +++ b/vel/rl/policy/ppo.py @@ -4,7 +4,7 @@ import numbers from vel.api import BatchInfo, ModelFactory, BackboneNetwork -from vel.util.situational import observation_space_to_size_hint +from vel.util.situational import gym_space_to_size_hint from vel.util.stats import explained_variance from vel.function.constant import ConstantSchedule from vel.metric.base import AveragingNamedMetric @@ -169,7 +169,7 @@ def instantiate(self, **extra_args): action_space = extra_args.pop('action_space') observation_space = extra_args.pop('observation_space') - size_hint = observation_space_to_size_hint(observation_space) + size_hint = gym_space_to_size_hint(observation_space) net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) diff --git a/vel/rl/policy/purgatory/ddpg.py b/vel/rl/policy/purgatory/ddpg.py deleted file mode 100644 index 1e47b5a0..00000000 --- a/vel/rl/policy/purgatory/ddpg.py +++ /dev/null @@ -1,94 +0,0 @@ -import torch -import typing -import torch.autograd -import torch.nn.functional as F - -from vel.rl.api import OptimizerAlgoBase -from vel.metric.base import AveragingNamedMetric - - -class DeepDeterministicPolicyGradient(OptimizerAlgoBase): - """ Deep Deterministic Policy Gradient (DDPG) - policy gradient calculations """ - - def __init__(self, model_factory, discount_factor: float, tau: float, max_grad_norm: typing.Optional[float] = None): - super().__init__(max_grad_norm) - - self.model_factory = model_factory - self.tau = tau - self.discount_factor = discount_factor - - self.target_model = None - - def initialize(self, training_info, model, environment, device): - """ Initialize algo from reinforcer settings """ - self.target_model = self.model_factory.instantiate(action_space=environment.action_space).to(device) - self.target_model.load_state_dict(model.state_dict()) - self.target_model.eval() - - def calculate_gradient(self, batch_info, device, model, rollout): - """ Calculate loss of the supplied rollout """ - rollout = rollout.to_transitions() - - dones = rollout.batch_tensor('dones') - rewards = rollout.batch_tensor('rewards') - observations_next = rollout.batch_tensor('observations_next') - actions = rollout.batch_tensor('actions') - observations = rollout.batch_tensor('observations') - - # Calculate value loss - or critic loss - with torch.no_grad(): - target_next_value = self.target_model.value(observations_next) - target_value = rewards + (1.0 - dones) * self.discount_factor * target_next_value - - # Value estimation error vs the target network - model_value = model.value(observations, actions) - value_loss = F.mse_loss(model_value, target_value) - - # It may seem a bit tricky what I'm doing here, but the underlying idea is simple - # All other implementations I found keep two separate optimizers for actor and critic - # and update them separately - # What I'm trying to do is to optimize them both with a single optimizer - # but I need to make sure gradients flow correctly - # From critic loss to critic network only and from actor loss to actor network only - - # Backpropagate value loss to critic only - value_loss.backward() - - model_action = model.action(observations) - model_action_value = model.value(observations, model_action) - - policy_loss = -model_action_value.mean() - - model_action_grad = torch.autograd.grad(policy_loss, model_action)[0] - - # Backpropagate actor loss to actor only - model_action.backward(gradient=model_action_grad) - - return { - 'policy_loss': policy_loss.item(), - 'value_loss': value_loss.item(), - } - - def post_optimization_step(self, batch_info, device, model, rollout): - """ Steps to take after optimization has been done""" - # Update target model - for model_param, target_param in zip(model.parameters(), self.target_model.parameters()): - # EWMA average model update - target_param.data.mul_(1 - self.tau).add_(model_param.data * self.tau) - - def metrics(self) -> list: - """ List of metrics to track for this learning process """ - return [ - AveragingNamedMetric("value_loss"), - AveragingNamedMetric("policy_loss"), - ] - - -def create(model, discount_factor: float, tau: float, max_grad_norm: float = None): - """ Vel factory function """ - return DeepDeterministicPolicyGradient( - tau=tau, - discount_factor=discount_factor, - model_factory=model, - max_grad_norm=max_grad_norm - ) diff --git a/vel/rl/policy/rainbow.py b/vel/rl/policy/rainbow.py index c28c3b29..968588c0 100644 --- a/vel/rl/policy/rainbow.py +++ b/vel/rl/policy/rainbow.py @@ -2,11 +2,11 @@ import torch import torch.nn.utils -from vel.api import ModelFactory, BackboneNetwork, BatchInfo +from vel.api import ModelFactory, BackboneNetwork, BatchInfo, OptimizerFactory, VelOptimizer from vel.metric import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout from vel.rl.module.rainbow_policy import RainbowPolicy -from vel.util.situational import observation_space_to_size_hint +from vel.util.situational import gym_space_to_size_hint class Rainbow(RlPolicy): @@ -36,6 +36,7 @@ def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_spa initial_std_dev=initial_std_dev, factorized_noise=factorized_noise ) + self.target_model.requires_grad_(False) self.discount_factor = discount_factor self.target_update_frequency = target_update_frequency @@ -49,6 +50,11 @@ def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_spa self.register_buffer('support_atoms', self.model.support_atoms.clone()) self.atom_delta = self.model.atom_delta + def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: + """ Create optimizer for the purpose of optimizing this model """ + parameters = filter(lambda p: p.requires_grad, self.model.parameters()) + return optimizer_factory.instantiate(parameters) + def train(self, mode=True): """ Override train to make sure target model is always in eval mode """ self.model.train(mode) @@ -214,7 +220,7 @@ def instantiate(self, **extra_args): action_space = extra_args.pop('action_space') observation_space = extra_args.pop('observation_space') - size_hint = observation_space_to_size_hint(observation_space) + size_hint = gym_space_to_size_hint(observation_space) # TODO(jerry): Push noisy net parameters down the stack here net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) diff --git a/vel/rl/policy/trpo.py b/vel/rl/policy/trpo.py index 8c041c6f..c7fe2cda 100644 --- a/vel/rl/policy/trpo.py +++ b/vel/rl/policy/trpo.py @@ -16,7 +16,7 @@ from vel.rl.discount_bootstrap import discount_bootstrap_gae from vel.rl.module.head.stochastic_action_head import make_stockastic_action_head from vel.rl.module.head.value_head import ValueHead -from vel.util.situational import observation_space_to_size_hint +from vel.util.situational import gym_space_to_size_hint def p2v(params): @@ -375,7 +375,7 @@ def instantiate(self, **extra_args): action_space = extra_args.pop('action_space') observation_space = extra_args.pop('observation_space') - size_hint = observation_space_to_size_hint(observation_space) + size_hint = gym_space_to_size_hint(observation_space) if self.input_net is None: input_net = None diff --git a/vel/rl/util/actor.py b/vel/rl/util/actor.py index 55b3950b..e23097d8 100644 --- a/vel/rl/util/actor.py +++ b/vel/rl/util/actor.py @@ -26,13 +26,14 @@ def act(self, observation, advance_state=True, deterministic=False): return result - def reset_states(self, dones): + def reset_states(self, dones: torch.Tensor): """ Reset states given dones """ + self.policy.reset_episodic_state(dones) + if not self.policy.is_stateful: return dones = dones.to(self.device) - self.state = self.policy.reset_state(self.state, dones) def value(self, observation): diff --git a/vel/rl/xpolicy/purgatory/deterministic_policy.py b/vel/rl/xpolicy/purgatory/deterministic_policy.py deleted file mode 100644 index 58d908fe..00000000 --- a/vel/rl/xpolicy/purgatory/deterministic_policy.py +++ /dev/null @@ -1,164 +0,0 @@ -import gym -import itertools as it -import torch -import typing - -from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, Evaluator, RlPolicy -from vel.rl.module.deterministic_action_head import DeterministicActionHead -from vel.rl.module.deterministic_critic_head import DeterministicCriticHead - - -class DeterministicPolicyEvaluator(Evaluator): - """ Evaluator for DeterministicPolicyModel """ - - def __init__(self, model: 'DeterministicPolicyModel', rollout: Rollout): - super().__init__(rollout) - - self.model = model - - @Evaluator.provides('model:values_next') - def model_estimated_values_next(self): - """ Estimate state-value of the transition next state """ - observations = self.get('rollout:observations_next') - action, value = self.model(observations) - return value - - @Evaluator.provides('model:actions') - def model_actions(self): - """ Estimate state-value of the transition next state """ - observations = self.get('rollout:observations') - model_action = self.model.action(observations) - return model_action - - @Evaluator.provides('model:model_action:q') - def model_model_action_q(self): - observations = self.get('rollout:observations') - model_actions = self.get('model:actions') - return self.model.value(observations, model_actions) - - @Evaluator.provides('model:action:q') - def model_action_q(self): - observations = self.get('rollout:observations') - rollout_actions = self.get('rollout:actions') - return self.model.value(observations, rollout_actions) - - -class DeterministicPolicyModel(RlPolicy): - """ Deterministic Policy Gradient - model """ - - def __init__(self, input_block: BackboneModel, policy_backbone: LinearBackboneModel, - value_backbone: LinearBackboneModel, action_space: gym.Space): - super().__init__() - - self.input_block = input_block - self.policy_backbone = policy_backbone - self.value_backbone = value_backbone - - self.action_head = DeterministicActionHead(self.policy_backbone.output_dim, action_space) - self.critic_head = DeterministicCriticHead(self.value_backbone.output_dim) - - def reset_weights(self): - """ Initialize properly model weights """ - self.input_block.reset_weights() - self.policy_backbone.reset_weights() - self.value_backbone.reset_weights() - self.action_head.reset_weights() - self.critic_head.reset_weights() - - def forward(self, observations, input_actions=None): - """ Calculate model outputs """ - observations = self.input_block(observations) - - if input_actions is not None: - actions = input_actions - - value_input = torch.cat([observations, actions], dim=1) - value_hidden = self.value_backbone(value_input) - - values = self.critic_head(value_hidden) - else: - policy_hidden = self.policy_backbone(observations) - actions = self.action_head(policy_hidden) - - value_input = torch.cat([observations, actions], dim=1) - value_hidden = self.value_backbone(value_input) - - values = self.critic_head(value_hidden) - - return actions, values - - def policy_parameters(self): - """ Parameters of policy """ - return it.chain(self.policy_backbone.parameters(), self.action_head.parameters()) - - def value_parameters(self): - """ Parameters of policy """ - return it.chain(self.value_backbone.parameters(), self.critic_head.parameters()) - - def get_layer_groups(self): - """ Return layers grouped """ - return [ - [self.policy_backbone, self.action_head], - [self.value_backbone, [y for (x, y) in self.critic_head.named_parameters() if x.endswith('bias')]], - # OpenAI regularizes only weight on the last layer. I'm just replicating that - [[y for (x, y) in self.critic_head.named_parameters() if x.endswith('weight')]] - ] - - def step(self, observations): - """ Select actions based on model's output """ - action, value = self(observations) - - return { - 'actions': action, - 'values': value - } - - def value(self, observation, input_actions=None): - """ Calculate value for given state """ - action, value = self(observation, input_actions) - return value - - def action(self, observations): - """ Calculate value for given state """ - observations = self.input_block(observations) - policy_hidden = self.policy_backbone(observations) - action = self.action_head(policy_hidden) - return action - - def evaluate(self, rollout: Rollout) -> Evaluator: - """ Evaluate model on a rollout """ - return DeterministicPolicyEvaluator(self, rollout) - - -class DeterministicPolicyModelFactory(ModelFactory): - """ Factory class for policy gradient models """ - def __init__(self, input_block: ModelFactory, policy_backbone: ModelFactory, value_backbone: ModelFactory): - self.input_block = input_block - self.policy_backbone = policy_backbone - self.value_backbone = value_backbone - - def instantiate(self, **extra_args): - """ Instantiate the model """ - input_block = self.input_block.instantiate() - policy_backbone = self.policy_backbone.instantiate(**extra_args) - value_backbone = self.value_backbone.instantiate(**extra_args) - - return DeterministicPolicyModel( - input_block=input_block, - policy_backbone=policy_backbone, - value_backbone=value_backbone, - action_space=extra_args['action_space'], - ) - - -def create(policy_backbone: ModelFactory, value_backbone: ModelFactory, - input_block: typing.Optional[ModelFactory] = None): - """ Vel factory function """ - if input_block is None: - input_block = IdentityFactory() - - return DeterministicPolicyModelFactory( - input_block=input_block, policy_backbone=policy_backbone, value_backbone=value_backbone - ) diff --git a/vel/util/situational.py b/vel/util/situational.py index c0f9c55a..4a4cb402 100644 --- a/vel/util/situational.py +++ b/vel/util/situational.py @@ -30,7 +30,7 @@ def process_environment_settings(default_dictionary: dict, settings: typing.Opti return result_dict -def observation_space_to_size_hint(space: gym.Space) -> SizeHints: +def gym_space_to_size_hint(space: gym.Space) -> SizeHints: """ Convert Gym observation space to size hints """ if isinstance(space, gym.spaces.Box): return size_hint_from_shape(space.shape) From d6d286a21e9863601c75ff70b4c1b544bd13736f Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 3 Oct 2019 14:18:34 -0700 Subject: [PATCH 114/162] Standardized model naming. --- vel/rl/policy/a2c.py | 14 +++++++------- vel/rl/policy/acer.py | 30 +++++++++++++++--------------- vel/rl/policy/dqn.py | 30 +++++++++++++++--------------- vel/rl/policy/ppo.py | 14 +++++++------- vel/rl/policy/rainbow.py | 34 +++++++++++++++++----------------- 5 files changed, 61 insertions(+), 61 deletions(-) diff --git a/vel/rl/policy/a2c.py b/vel/rl/policy/a2c.py index 299e8c2b..ba739527 100644 --- a/vel/rl/policy/a2c.py +++ b/vel/rl/policy/a2c.py @@ -23,23 +23,23 @@ def __init__(self, net: BackboneNetwork, action_space: gym.Space, self.value_coefficient = value_coefficient self.gae_lambda = gae_lambda - self.policy = StochasticPolicy(net, action_space) + self.net = StochasticPolicy(net, action_space) def reset_weights(self): """ Initialize properly model weights """ - self.policy.reset_weights() + self.net.reset_weights() def forward(self, observation, state=None): """ Calculate model outputs """ - return self.policy(observation) + return self.net(observation) def act(self, observation, state=None, deterministic=False): """ Select actions based on model's output """ action_pd_params, value_output = self(observation) - actions = self.policy.action_head.sample(action_pd_params, deterministic=deterministic) + actions = self.net.action_head.sample(action_pd_params, deterministic=deterministic) # log likelihood of selected action - logprobs = self.policy.action_head.logprob(actions, action_pd_params) + logprobs = self.net.action_head.logprob(actions, action_pd_params) return { 'actions': actions, @@ -79,8 +79,8 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: pd_params, model_values = self(observations) - log_probs = self.policy.action_head.logprob(actions, pd_params) - entropy = self.policy.action_head.entropy(pd_params) + log_probs = self.net.action_head.logprob(actions, pd_params) + entropy = self.net.action_head.entropy(pd_params) # Actual calculations. Pretty trivial policy_loss = -torch.mean(advantages * log_probs) diff --git a/vel/rl/policy/acer.py b/vel/rl/policy/acer.py index e1120fa7..433a4ea2 100644 --- a/vel/rl/policy/acer.py +++ b/vel/rl/policy/acer.py @@ -36,44 +36,44 @@ def __init__(self, net: BackboneNetwork, target_net: typing.Optional[BackboneNet self.average_model_alpha = average_model_alpha self.trust_region_delta = trust_region_delta - self.policy = QStochasticPolicy(net, action_space) + self.net = QStochasticPolicy(net, action_space) if self.trust_region: - self.target_policy = QStochasticPolicy(target_net, action_space) - self.target_policy.requires_grad_(False) + self.target_net = QStochasticPolicy(target_net, action_space) + self.target_net.requires_grad_(False) else: - self.target_policy = None + self.target_net = None def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: """ Create optimizer for the purpose of optimizing this model """ - parameters = filter(lambda p: p.requires_grad, self.policy.parameters()) + parameters = filter(lambda p: p.requires_grad, self.net.parameters()) return optimizer_factory.instantiate(parameters) def train(self, mode=True): """ Override train to make sure target model is always in eval mode """ - self.policy.train(mode) + self.net.train(mode) if self.trust_region: - self.target_policy.train(False) + self.target_net.train(False) def reset_weights(self): """ Initialize properly model weights """ - self.policy.reset_weights() + self.net.reset_weights() if self.trust_region: - self.target_policy.load_state_dict(self.policy.state_dict()) + self.target_net.load_state_dict(self.net.state_dict()) def forward(self, observation, state=None): """ Calculate model outputs """ - return self.policy(observation) + return self.net(observation) def act(self, observation, state=None, deterministic=False): """ Select actions based on model's output """ logprobs, q = self(observation) - actions = self.policy.action_head.sample(logprobs, deterministic=deterministic) + actions = self.net.action_head.sample(logprobs, deterministic=deterministic) # log likelihood of selected action - action_logprobs = self.policy.action_head.logprob(actions, logprobs) + action_logprobs = self.net.action_head.logprob(actions, logprobs) values = (torch.exp(logprobs) * q).sum(dim=1) return { @@ -86,7 +86,7 @@ def act(self, observation, state=None, deterministic=False): def update_target_policy(self): """ Update weights of the average model with new model observation """ - for model_param, average_param in zip(self.policy.parameters(), self.target_policy.parameters()): + for model_param, average_param in zip(self.net.parameters(), self.target_net.parameters()): # EWMA average model update average_param.data.mul_(self.average_model_alpha).add_(model_param.data * (1 - self.average_model_alpha)) @@ -145,7 +145,7 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: explained_variance = 1 - torch.var(q_retraced - action_q) / torch.var(q_retraced) # Entropy of the policy distribution - policy_entropy = torch.mean(self.policy.action_head.entropy(logprobs)) + policy_entropy = torch.mean(self.net.action_head.entropy(logprobs)) policy_gradient_loss = -torch.mean(advantages * importance_sampling_coefficient * action_logprobs) # Policy gradient bias correction @@ -167,7 +167,7 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: if self.trust_region: with torch.no_grad(): - target_logprobs = self.target_policy(observations)[0] + target_logprobs = self.target_net(observations)[0] actor_loss = policy_loss - self.entropy_coefficient * policy_entropy q_loss = self.q_coefficient * q_function_loss diff --git a/vel/rl/policy/dqn.py b/vel/rl/policy/dqn.py index 243bb1e2..9bc8dd64 100644 --- a/vel/rl/policy/dqn.py +++ b/vel/rl/policy/dqn.py @@ -23,9 +23,9 @@ def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_spa dueling_dqn: bool, target_update_frequency: int): super().__init__(discount_factor) - self.model = QPolicy(net=net, action_space=action_space, dueling_dqn=dueling_dqn) - self.target_model = QPolicy(net=target_net, action_space=action_space, dueling_dqn=dueling_dqn) - self.target_model.requires_grad_(False) + self.net = QPolicy(net=net, action_space=action_space, dueling_dqn=dueling_dqn) + self.target_net = QPolicy(net=target_net, action_space=action_space, dueling_dqn=dueling_dqn) + self.target_net.requires_grad_(False) self.double_dqn = double_dqn self.target_update_frequency = target_update_frequency @@ -40,27 +40,27 @@ def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_spa def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: """ Create optimizer for the purpose of optimizing this model """ - parameters = filter(lambda p: p.requires_grad, self.model.parameters()) + parameters = filter(lambda p: p.requires_grad, self.net.parameters()) return optimizer_factory.instantiate(parameters) def train(self, mode=True): """ Override train to make sure target model is always in eval mode """ - self.model.train(mode) - self.target_model.train(False) + self.net.train(mode) + self.target_net.train(False) def reset_weights(self): """ Initialize properly model weights """ - self.model.reset_weights() - self.target_model.load_state_dict(self.model.state_dict()) + self.net.reset_weights() + self.target_net.load_state_dict(self.net.state_dict()) def forward(self, observation, state=None): """ Calculate model outputs """ - return self.model(observation) + return self.net(observation) def act(self, observation, state=None, deterministic=False): """ Select actions based on model's output """ - q_values = self.model(observation) - actions = self.model.q_head.sample(q_values) + q_values = self.net(observation) + actions = self.net.q_head.sample(q_values) noisy_actions = self.action_noise(actions, epsilon=self.epsilon_value, deterministic=deterministic) return { @@ -79,14 +79,14 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: assert dones_tensor.dtype == torch.float32 - q = self.model(observations) + q = self.net(observations) with torch.no_grad(): - target_q = self.target_model(observations_next) + target_q = self.target_net(observations_next) if self.double_dqn: # DOUBLE DQN - model_q_next = self.model(observations_next) + model_q_next = self.net(observations_next) # Select largest 'target' value based on action that 'model' selects values = target_q.gather(1, model_q_next.argmax(dim=1, keepdim=True)).squeeze(1) else: @@ -120,7 +120,7 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: def post_optimization_step(self, batch_info, rollout): """ Steps to take after optimization has been done""" if batch_info.aggregate_batch_number % self.target_update_frequency == 0: - self.target_model.load_state_dict(self.model.state_dict()) + self.target_net.load_state_dict(self.net.state_dict()) self.epsilon_value = self.epsilon_schedule.value(batch_info['progress']) diff --git a/vel/rl/policy/ppo.py b/vel/rl/policy/ppo.py index 92a8cd23..1c922f10 100644 --- a/vel/rl/policy/ppo.py +++ b/vel/rl/policy/ppo.py @@ -31,23 +31,23 @@ def __init__(self, net: BackboneNetwork, action_space: gym.Space, else: self.cliprange = cliprange - self.policy = StochasticPolicy(net, action_space) + self.net = StochasticPolicy(net, action_space) def reset_weights(self): """ Initialize properly model weights """ - self.policy.reset_weights() + self.net.reset_weights() def forward(self, observation, state=None): """ Calculate model outputs """ - return self.policy(observation) + return self.net(observation) def act(self, observation, state=None, deterministic=False): """ Select actions based on model's output """ action_pd_params, value_output = self(observation) - actions = self.policy.action_head.sample(action_pd_params, deterministic=deterministic) + actions = self.net.action_head.sample(action_pd_params, deterministic=deterministic) # log likelihood of selected action - logprobs = self.policy.action_head.logprob(actions, action_pd_params) + logprobs = self.net.action_head.logprob(actions, action_pd_params) return { 'actions': actions, @@ -91,8 +91,8 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: # PART 0.1 - Model evaluation pd_params, model_values = self(observations) - model_action_logprobs = self.policy.action_head.logprob(actions, pd_params) - entropy = self.policy.action_head.entropy(pd_params) + model_action_logprobs = self.net.action_head.logprob(actions, pd_params) + entropy = self.net.action_head.entropy(pd_params) # Select the cliprange current_cliprange = self.cliprange.value(batch_info['progress']) diff --git a/vel/rl/policy/rainbow.py b/vel/rl/policy/rainbow.py index 968588c0..f8693131 100644 --- a/vel/rl/policy/rainbow.py +++ b/vel/rl/policy/rainbow.py @@ -17,7 +17,7 @@ def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_spa initial_std_dev: float = 0.4, factorized_noise: bool = True): super().__init__(discount_factor) - self.model = RainbowPolicy( + self.net = RainbowPolicy( net=net, action_space=action_space, vmin=vmin, @@ -27,7 +27,7 @@ def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_spa factorized_noise=factorized_noise ) - self.target_model = RainbowPolicy( + self.target_net = RainbowPolicy( net=target_net, action_space=action_space, vmin=vmin, @@ -36,7 +36,7 @@ def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_spa initial_std_dev=initial_std_dev, factorized_noise=factorized_noise ) - self.target_model.requires_grad_(False) + self.target_net.requires_grad_(False) self.discount_factor = discount_factor self.target_update_frequency = target_update_frequency @@ -47,34 +47,34 @@ def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_spa # self.support_atoms = self.model.q # self.atom_delta = histogram_info['atom_delta'] - self.register_buffer('support_atoms', self.model.support_atoms.clone()) - self.atom_delta = self.model.atom_delta + self.register_buffer('support_atoms', self.net.support_atoms.clone()) + self.atom_delta = self.net.atom_delta def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: """ Create optimizer for the purpose of optimizing this model """ - parameters = filter(lambda p: p.requires_grad, self.model.parameters()) + parameters = filter(lambda p: p.requires_grad, self.net.parameters()) return optimizer_factory.instantiate(parameters) def train(self, mode=True): """ Override train to make sure target model is always in eval mode """ - self.model.train(mode) - self.target_model.train(False) + self.net.train(mode) + self.target_net.train(False) def reset_weights(self): """ Initialize properly model weights """ - self.model.reset_weights() - self.target_model.load_state_dict(self.model.state_dict()) + self.net.reset_weights() + self.target_net.load_state_dict(self.net.state_dict()) def forward(self, observation, state=None): """ Calculate model outputs """ - return self.model(observation) + return self.net(observation) def act(self, observation, state=None, deterministic=False): """ Select actions based on model's output """ self.train(mode=not deterministic) - q_values = self.model(observation) - actions = self.model.q_head.sample(q_values) + q_values = self.net(observation) + actions = self.net.q_head.sample(q_values) return { 'actions': actions, @@ -94,13 +94,13 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: assert dones_tensor.dtype == torch.float32 - q = self.model(observations) + q = self.net(observations) with torch.no_grad(): # DOUBLE DQN # Histogram gets returned as logits initially, we need to exp it before projection - target_value_histogram_for_all_actions = self.target_model(observations_next).exp() - model_value_histogram_for_all_actions = self.model(observations_next).exp() + target_value_histogram_for_all_actions = self.target_net(observations_next).exp() + model_value_histogram_for_all_actions = self.net(observations_next).exp() atoms_aligned = self.support_atoms.view(1, 1, self.num_atoms) @@ -192,7 +192,7 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: def post_optimization_step(self, batch_info, rollout): """ Steps to take after optimization has been done""" if batch_info.aggregate_batch_number % self.target_update_frequency == 0: - self.target_model.load_state_dict(self.model.state_dict()) + self.target_net.load_state_dict(self.net.state_dict()) def metrics(self) -> list: """ List of metrics to track for this learning process """ From 5d05e58bb6a7229c29c30e780297eadf005c19fe Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 3 Oct 2019 14:19:37 -0700 Subject: [PATCH 115/162] get_layer_groups -> layer_groups --- vel/model/imagenet/resnet34.py | 4 ++-- vel/model/rnn/multilayer_rnn_sequence_classification.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vel/model/imagenet/resnet34.py b/vel/model/imagenet/resnet34.py index 18bdd667..f9901eba 100644 --- a/vel/model/imagenet/resnet34.py +++ b/vel/model/imagenet/resnet34.py @@ -77,7 +77,7 @@ def unfreeze(self): for idx, child in enumerate(self.model.children()): mu.unfreeze_layer(child) - def get_layer_groups(self): + def layer_groups(self): """ Return layers grouped """ g1 = list(self.model[:self.group_cut_layers[0]]) g2 = list(self.model[self.group_cut_layers[0]:self.group_cut_layers[1]]) @@ -85,7 +85,7 @@ def get_layer_groups(self): return [g1, g2, g3] def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: - parameters = mu.to_parameter_groups(self.get_layer_groups()) + parameters = mu.to_parameter_groups(self.layer_groups()) return optimizer_factory.instantiate_parameter_groups(parameters) def forward(self, x): diff --git a/vel/model/rnn/multilayer_rnn_sequence_classification.py b/vel/model/rnn/multilayer_rnn_sequence_classification.py index d19f40f3..20f40706 100644 --- a/vel/model/rnn/multilayer_rnn_sequence_classification.py +++ b/vel/model/rnn/multilayer_rnn_sequence_classification.py @@ -123,7 +123,7 @@ def forward(self, sequence): return self.output_activation(data) - def get_layer_groups(self): + def layer_groups(self): return [ self.input_block, self.rnn_layers, @@ -133,7 +133,7 @@ def get_layer_groups(self): def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: """ Create optimizer for the purpose of optimizing this model """ - parameters = mu.to_parameter_groups(self.get_layer_groups()) + parameters = mu.to_parameter_groups(self.layer_groups()) return optimizer_factory.instantiate_parameter_groups(parameters) @property From bfba7cc49a4e99b8903d2dab158d643a633191dd Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 3 Oct 2019 15:56:41 -0700 Subject: [PATCH 116/162] Brought back the RNN RL training. --- .../atari/{purgatory => }/atari_a2c_lstm.yaml | 24 ++-- .../atari/{purgatory => }/atari_ppo_gru.yaml | 26 ++--- vel/module/rnn_cell.py | 61 ---------- vel/net/modular.py | 28 ++++- vel/rl/env_roller/step_env_roller.py | 18 ++- .../trajectory_replay_env_roller.py | 2 +- .../layer/{purgatory => }/nature_cnn_rnn.py | 3 + vel/rl/layer/rnn_cell.py | 105 ++++++++++++++++++ vel/rl/module/stochastic_rnn_policy.py | 76 +++++++++++++ vel/rl/policy/a2c.py | 2 +- vel/rl/policy/{purgatory => }/a2c_rnn.py | 71 +++++++----- vel/rl/policy/acer.py | 2 +- vel/rl/policy/ppo.py | 2 +- vel/rl/policy/{purgatory => }/ppo_rnn.py | 76 ++++++++----- vel/rl/policy/purgatory/__init__.py | 0 vel/rl/policy/trpo.py | 2 +- vel/rl/xpolicy/stochastic_rnn_policy.py | 99 ----------------- vel/util/datastructure.py | 21 ++++ vel/util/tensor_util.py | 6 +- 19 files changed, 366 insertions(+), 258 deletions(-) rename examples-configs/rl/atari/{purgatory => }/atari_a2c_lstm.yaml (76%) rename examples-configs/rl/atari/{purgatory => }/atari_ppo_gru.yaml (79%) delete mode 100644 vel/module/rnn_cell.py rename vel/rl/layer/{purgatory => }/nature_cnn_rnn.py (95%) create mode 100644 vel/rl/layer/rnn_cell.py create mode 100644 vel/rl/module/stochastic_rnn_policy.py rename vel/rl/policy/{purgatory => }/a2c_rnn.py (70%) rename vel/rl/policy/{purgatory => }/ppo_rnn.py (79%) delete mode 100644 vel/rl/policy/purgatory/__init__.py delete mode 100644 vel/rl/xpolicy/stochastic_rnn_policy.py create mode 100644 vel/util/datastructure.py diff --git a/examples-configs/rl/atari/purgatory/atari_a2c_lstm.yaml b/examples-configs/rl/atari/atari_a2c_lstm.yaml similarity index 76% rename from examples-configs/rl/atari/purgatory/atari_a2c_lstm.yaml rename to examples-configs/rl/atari/atari_a2c_lstm.yaml index 4db60264..25476fed 100644 --- a/examples-configs/rl/atari/purgatory/atari_a2c_lstm.yaml +++ b/examples-configs/rl/atari/atari_a2c_lstm.yaml @@ -12,24 +12,22 @@ vec_env: model: - name: vel.rl.algo.a2c_rnn + name: vel.rl.policy.a2c_rnn entropy_coefficient: 0.01 value_coefficient: 0.5 discount_factor: 0.99 - policy: - name: vel.rl.policy.stochastic_rnn_policy - - input_block: - name: vel.module.input.image_to_tensor - - backbone: - name: vel.rl.backbone.nature_cnn_rnn - input_width: 84 - input_height: 84 - input_channels: 1 # The same as frame_history - rnn_type: 'lstm' + net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.image_to_tensor + - name: vel.rl.layer.nature_cnn + - name: vel.rl.layer.rnn_cell + hidden_size: 512 + rnn_type: 'lstm' + - name: vel.net.layer.util.repeat + times: 2 # Need to repeat output twice, for action and value heads reinforcer: diff --git a/examples-configs/rl/atari/purgatory/atari_ppo_gru.yaml b/examples-configs/rl/atari/atari_ppo_gru.yaml similarity index 79% rename from examples-configs/rl/atari/purgatory/atari_ppo_gru.yaml rename to examples-configs/rl/atari/atari_ppo_gru.yaml index afea6850..81d7af1e 100644 --- a/examples-configs/rl/atari/purgatory/atari_ppo_gru.yaml +++ b/examples-configs/rl/atari/atari_ppo_gru.yaml @@ -11,7 +11,7 @@ vec_env: model: - name: vel.rl.algo.ppo_rnn + name: vel.rl.policy.ppo_rnn entropy_coefficient: 0.01 value_coefficient: 0.5 @@ -24,20 +24,16 @@ model: initial_value: 0.1 final_value: 0.0 - policy: - name: vel.rl.policy.stochastic_rnn_policy - - input_block: - name: vel.module.input.image_to_tensor - - backbone: - name: vel.rl.backbone.nature_cnn_rnn - rnn_type: 'gru' - hidden_units: 512 - - input_width: 84 - input_height: 84 - input_channels: 1 # The same as frame_history + net: + name: vel.net.modular + layers: + - name: vel.net.layer.input.image_to_tensor + - name: vel.rl.layer.nature_cnn + - name: vel.rl.layer.rnn_cell + hidden_size: 512 + rnn_type: 'gru' + - name: vel.net.layer.util.repeat + times: 2 # Need to repeat output twice, for action and value heads reinforcer: diff --git a/vel/module/rnn_cell.py b/vel/module/rnn_cell.py deleted file mode 100644 index 5ce58867..00000000 --- a/vel/module/rnn_cell.py +++ /dev/null @@ -1,61 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.init as init - - -from vel.api import LinearBackboneModel - - -class RnnCell(LinearBackboneModel): - """ Generalization of RNN cell (Simple RNN, LSTM or GRU) """ - - def __init__(self, input_size, hidden_size, rnn_type, bias=True, nonlinearity='tanh'): - super().__init__() - - assert rnn_type in {'rnn', 'lstm', 'gru'}, "Rnn type {} is not supported".format(rnn_type) - - self.input_size = input_size - self.hidden_size = hidden_size - self.rnn_type = rnn_type - - if self.rnn_type == 'rnn': - self.rnn_cell = nn.RNNCell( - input_size=input_size, hidden_size=hidden_size, bias=bias, nonlinearity=nonlinearity - ) - elif self.rnn_type == 'lstm': - self.rnn_cell = nn.LSTMCell(input_size=input_size, hidden_size=hidden_size, bias=bias) - elif self.rnn_type == 'gru': - self.rnn_cell = nn.GRUCell(input_size=input_size, hidden_size=hidden_size, bias=bias) - - def reset_weights(self): - init.xavier_normal_(self.rnn_cell.weight_hh) - init.xavier_normal_(self.rnn_cell.weight_ih) - init.zeros_(self.rnn_cell.bias_ih) - init.zeros_(self.rnn_cell.bias_hh) - - @property - def output_dim(self) -> int: - """ Final dimension of model output """ - return self.hidden_size - - @property - def state_dim(self) -> int: - """ Dimension of model state """ - if self.rnn_type == 'lstm': - return 2 * self.hidden_size - else: - return self.hidden_size - - def zero_state(self, batch_size): - """ Potential state for the model """ - return torch.zeros(batch_size, self.state_dim) - - def forward(self, input_data, state): - if self.rnn_type == 'lstm': - hidden_state, cell_state = torch.split(state, self.hidden_size, 1) - hidden_state, cell_state = self.rnn_cell(input_data, (hidden_state, cell_state)) - new_state = torch.cat([hidden_state, cell_state], dim=1) - return hidden_state, new_state - else: - new_hidden_state = self.rnn_cell(input_data, state) - return new_hidden_state, new_hidden_state diff --git a/vel/net/modular.py b/vel/net/modular.py index 9416992a..c23bf0a6 100644 --- a/vel/net/modular.py +++ b/vel/net/modular.py @@ -54,7 +54,8 @@ def reset_state(self, state, dones): raise NotImplementedError def forward(self, input_data, state=None): - return self.layers(input_data) + context = {} + return self.layers(input_data, context=context) class StatefulModularNetwork(BackboneNetwork): @@ -80,14 +81,33 @@ def size_hints(self) -> SizeHints: def zero_state(self, batch_size): """ Potential state for the model """ - raise NotImplementedError + zero_state = {} + + for l in self.layers: + layer_zero_state = l.zero_state(batch_size) + if layer_zero_state is not None: + zero_state.update(layer_zero_state) + + return zero_state def reset_state(self, state, dones): """ Reset the state after the episode has been terminated """ raise NotImplementedError - def forward(self, input_data, state=None): - raise NotImplementedError + def forward(self, input_data, state): + data = input_data + + context = {} + output_state = {} + + for layer in self.layers: + if layer.is_stateful: + data, new_state = layer(data, state=state, context=context) + output_state.update(new_state) + else: + data = layer(data, state=state, context=context) + + return data, output_state class ModularNetworkFactory(ModelFactory): diff --git a/vel/rl/env_roller/step_env_roller.py b/vel/rl/env_roller/step_env_roller.py index 1a6c22a5..44782283 100644 --- a/vel/rl/env_roller/step_env_roller.py +++ b/vel/rl/env_roller/step_env_roller.py @@ -5,7 +5,8 @@ from vel.openai.baselines.common.vec_env import VecEnv from vel.rl.api import Trajectories, Rollout, EnvRollerBase, EnvRollerFactoryBase, RlPolicy from vel.rl.util.actor import PolicyActor -from vel.util.tensor_util import TensorAccumulator +from vel.util.tensor_util import TensorAccumulator, to_device +from vel.util.datastructure import flatten_dict class StepEnvRoller(EnvRollerBase): @@ -36,15 +37,17 @@ def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: for step_idx in range(number_of_steps): step = self.actor.act(self.last_observation.to(self.device), deterministic=False) + cpu_step = to_device(step, torch.device('cpu')) # Add step to the tensor accumulator - for name, tensor in step.items(): + for name, tensor in cpu_step.items(): + # Take not that here we convert all the tensors to CPU - accumulator.add(name, tensor.cpu()) + accumulator.add(name, tensor) accumulator.add('observations', self.last_observation) - actions_numpy = step['actions'].detach().cpu().numpy() + actions_numpy = cpu_step['actions'].detach().numpy() new_obs, new_rewards, new_dones, new_infos = self.environment.step(actions_numpy) # Done is flagged true when the episode has ended AND the frame we see is already a first frame from the @@ -63,11 +66,14 @@ def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: # Perform last agent step, without advancing the state final_obs = self.actor.act(self.last_observation.to(self.device), advance_state=False) + cpu_final_obs = to_device(final_obs, torch.device('cpu')) rollout_tensors = {} - for key, value in final_obs.items(): - rollout_tensors[f"final_{key}"] = value.cpu() + flatten_dict(cpu_final_obs, rollout_tensors, root='final') + + # for key, value in final_obs.items(): + # rollout_tensors[f"final_{key}"] = value.cpu() return Trajectories( num_steps=accumulated_tensors['observations'].size(0), diff --git a/vel/rl/env_roller/trajectory_replay_env_roller.py b/vel/rl/env_roller/trajectory_replay_env_roller.py index a9cdc06b..1a788025 100644 --- a/vel/rl/env_roller/trajectory_replay_env_roller.py +++ b/vel/rl/env_roller/trajectory_replay_env_roller.py @@ -108,7 +108,7 @@ def sample(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: final_values = self.actor.value(last_observations).cpu() # Add 'final_values' to the rollout - rollout.rollout_tensors['final_values'] = final_values + rollout.rollout_tensors['final.values'] = final_values return rollout diff --git a/vel/rl/layer/purgatory/nature_cnn_rnn.py b/vel/rl/layer/nature_cnn_rnn.py similarity index 95% rename from vel/rl/layer/purgatory/nature_cnn_rnn.py rename to vel/rl/layer/nature_cnn_rnn.py index 6dccd7c9..699e7387 100644 --- a/vel/rl/layer/purgatory/nature_cnn_rnn.py +++ b/vel/rl/layer/nature_cnn_rnn.py @@ -2,6 +2,9 @@ from vel.rl.backbone.nature_cnn import NatureCnn from vel.module.rnn_cell import RnnCell +from vel.api import SizeHint, SizeHints +from vel.net.layer_base import Layer, LayerFactory + class NatureCnnRnnBackbone(LinearBackboneModel): """ diff --git a/vel/rl/layer/rnn_cell.py b/vel/rl/layer/rnn_cell.py new file mode 100644 index 00000000..678b003f --- /dev/null +++ b/vel/rl/layer/rnn_cell.py @@ -0,0 +1,105 @@ +import torch +import torch.nn as nn +import torch.nn.init as init + + +from vel.api import SizeHint, SizeHints +from vel.net.layer_base import Layer, LayerFactory + + +class RnnCell(Layer): + """ Generalization of RNN cell (Simple RNN, LSTM or GRU) """ + + def __init__(self, name: str, input_size: int, hidden_size: int, rnn_type: str, bias: bool = True, + nonlinearity: str = 'tanh'): + super().__init__(name) + + assert rnn_type in {'rnn', 'lstm', 'gru'}, "Rnn type {} is not supported".format(rnn_type) + + self.input_size = input_size + self.hidden_size = hidden_size + self.rnn_type = rnn_type + + if self.rnn_type == 'rnn': + self.rnn_cell = nn.RNNCell( + input_size=input_size, hidden_size=hidden_size, bias=bias, nonlinearity=nonlinearity + ) + elif self.rnn_type == 'lstm': + self.rnn_cell = nn.LSTMCell(input_size=input_size, hidden_size=hidden_size, bias=bias) + elif self.rnn_type == 'gru': + self.rnn_cell = nn.GRUCell(input_size=input_size, hidden_size=hidden_size, bias=bias) + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return True + + def reset_weights(self): + init.xavier_normal_(self.rnn_cell.weight_hh) + init.xavier_normal_(self.rnn_cell.weight_ih) + init.zeros_(self.rnn_cell.bias_ih) + init.zeros_(self.rnn_cell.bias_hh) + + def size_hints(self) -> SizeHints: + return SizeHints(SizeHint(None, self.hidden_size)) + + @property + def state_dim(self) -> int: + """ Dimension of model state """ + if self.rnn_type == 'lstm': + return 2 * self.hidden_size + else: + return self.hidden_size + + def zero_state(self, batch_size): + """ Potential state for the model """ + return {self.name: torch.zeros(batch_size, self.state_dim)} + + def forward(self, input_data, state: dict, context: dict = None): + """ Forward propagation of a single layer """ + if self.rnn_type == 'lstm': + state_tensor = state[self.name] + hidden_state, cell_state = torch.split(state_tensor, self.hidden_size, 1) + hidden_state, cell_state = self.rnn_cell(input_data, (hidden_state, cell_state)) + new_state = torch.cat([hidden_state, cell_state], dim=1) + return hidden_state, {self.name: new_state} + else: + state_tensor = state[self.name] + new_hidden_state = self.rnn_cell(input_data, state_tensor) + return new_hidden_state, {self.name: new_hidden_state} + + +class RnnCellFactory(LayerFactory): + """ Factory for the RnnCell layer """ + + def __init__(self, hidden_size: int, rnn_type: str, bias: bool = True, nonlinearity: str = 'tanh'): + self.hidden_size = hidden_size + self.rnn_type = rnn_type + self.bias = bias + self.nonlinearity = nonlinearity + + @property + def name_base(self) -> str: + return "rnn_cell" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + input_size = direct_input.assert_single().last() + + return RnnCell( + name=name, + input_size=input_size, + hidden_size=self.hidden_size, + rnn_type=self.rnn_type, + bias=self.bias, + nonlinearity=self.nonlinearity + ) + + +def create(hidden_size: int, rnn_type: str, bias: bool = True, nonlinearity: str = 'tanh'): + """ Vel factory function """ + return RnnCellFactory( + hidden_size=hidden_size, + rnn_type=rnn_type, + bias=bias, + nonlinearity=nonlinearity + ) diff --git a/vel/rl/module/stochastic_rnn_policy.py b/vel/rl/module/stochastic_rnn_policy.py new file mode 100644 index 00000000..ae2e17b2 --- /dev/null +++ b/vel/rl/module/stochastic_rnn_policy.py @@ -0,0 +1,76 @@ +import gym + +from vel.api import Network, BackboneNetwork + +from vel.rl.module.head.stochastic_action_head import make_stockastic_action_head +from vel.rl.module.head.value_head import ValueHead +from vel.util.tensor_util import to_device + + +class StochasticRnnPolicy(Network): + """ + Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone + RNN version + """ + + def __init__(self, net: BackboneNetwork, action_space: gym.Space): + super().__init__() + + self.net = net + + assert self.net.is_stateful, "Must have a stateful backbone" + + (action_size, value_size) = self.net.size_hints().assert_tuple(2) + + self.action_head = make_stockastic_action_head( + action_space=action_space, + input_dim=action_size.last(), + ) + self.value_head = ValueHead( + input_dim=value_size.last() + ) + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return True + + def zero_state(self, batch_size): + return self.net.zero_state(batch_size) + + def reset_weights(self): + """ Initialize properly model weights """ + self.net.reset_weights() + self.action_head.reset_weights() + self.value_head.reset_weights() + + def forward(self, observations, state): + """ Calculate model outputs """ + (action_hidden, value_hidden), new_state = self.net(observations, state=state) + + action_output = self.action_head(action_hidden) + value_output = self.value_head(value_hidden) + + return action_output, value_output, new_state + + def reset_state(self, state, dones): + """ Reset the state after the episode has been terminated """ + if (dones > 0).any().item(): + dones_expanded = dones.unsqueeze(-1) + + zero_state = self.net.zero_state(dones.shape[0]) + + out_state = {} + + for key in state: + state_item = state[key] + zero_state_item = zero_state[key].to(state_item.device) + + final_item = state_item * (1 - dones_expanded) + zero_state_item * dones_expanded + + out_state[key] = final_item + + return out_state + else: + return state + diff --git a/vel/rl/policy/a2c.py b/vel/rl/policy/a2c.py index ba739527..68589fdd 100644 --- a/vel/rl/policy/a2c.py +++ b/vel/rl/policy/a2c.py @@ -55,7 +55,7 @@ def process_rollout(self, rollout: Rollout) -> Rollout: rewards_buffer=rollout.transition_tensors['rewards'], dones_buffer=rollout.transition_tensors['dones'], values_buffer=rollout.transition_tensors['values'], - final_values=rollout.rollout_tensors['final_values'], + final_values=rollout.rollout_tensors['final.values'], discount_factor=self.discount_factor, gae_lambda=self.gae_lambda, number_of_steps=rollout.num_steps diff --git a/vel/rl/policy/purgatory/a2c_rnn.py b/vel/rl/policy/a2c_rnn.py similarity index 70% rename from vel/rl/policy/purgatory/a2c_rnn.py rename to vel/rl/policy/a2c_rnn.py index 523b2f13..84b4ef50 100644 --- a/vel/rl/policy/purgatory/a2c_rnn.py +++ b/vel/rl/policy/a2c_rnn.py @@ -1,17 +1,20 @@ +import gym import torch import torch.nn.functional as F +from vel.api import ModelFactory, BatchInfo, BackboneNetwork from vel.metric.base import AveragingNamedMetric -from vel.calc.function import explained_variance -from vel.api import BackboneModel, ModelFactory, BatchInfo - from vel.rl.api import RlPolicy, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae +from vel.rl.module.stochastic_rnn_policy import StochasticRnnPolicy +from vel.util.situational import gym_space_to_size_hint +from vel.util.stats import explained_variance class A2CRnn(RlPolicy): """ Simplest policy gradient - calculate loss as an advantage of an actor versus value function """ - def __init__(self, policy: BackboneModel, entropy_coefficient, value_coefficient, discount_factor: float, + def __init__(self, net: BackboneNetwork, action_space: gym.Space, + entropy_coefficient, value_coefficient, discount_factor: float, gae_lambda=1.0): super().__init__(discount_factor) @@ -19,41 +22,40 @@ def __init__(self, policy: BackboneModel, entropy_coefficient, value_coefficient self.value_coefficient = value_coefficient self.gae_lambda = gae_lambda - self.policy = policy + self.net = StochasticRnnPolicy(net, action_space) - assert self.policy.is_stateful, "Policy must be stateful" + assert self.net.is_stateful, "Policy must be stateful" def reset_weights(self): """ Initialize properly model weights """ - self.policy.reset_weights() + self.net.reset_weights() def forward(self, observation, state=None): """ Calculate model outputs """ - return self.policy(observation, state=state) + return self.net(observation, state=state) def is_stateful(self) -> bool: - return self.policy.is_stateful + return self.net.is_stateful def zero_state(self, batch_size): - return self.policy.zero_state(batch_size) + return self.net.zero_state(batch_size) def reset_state(self, state, dones): - return self.policy.reset_state(state, dones) + return self.net.reset_state(state, dones) def act(self, observation, state=None, deterministic=False): """ Select actions based on model's output """ action_pd_params, value_output, next_state = self(observation, state=state) - - actions = self.policy.action_head.sample(action_pd_params, deterministic=deterministic) + actions = self.net.action_head.sample(action_pd_params, deterministic=deterministic) # log likelihood of selected action - logprobs = self.policy.action_head.logprob(actions, action_pd_params) + logprobs = self.net.action_head.logprob(actions, action_pd_params) return { + 'action:logprobs': logprobs, 'actions': actions, 'state': next_state, 'values': value_output, - 'action:logprobs': logprobs } def process_rollout(self, rollout: Rollout) -> Rollout: @@ -64,7 +66,7 @@ def process_rollout(self, rollout: Rollout) -> Rollout: rewards_buffer=rollout.transition_tensors['rewards'], dones_buffer=rollout.transition_tensors['dones'], values_buffer=rollout.transition_tensors['values'], - final_values=rollout.rollout_tensors['final_values'], + final_values=rollout.rollout_tensors['final.values'], discount_factor=self.discount_factor, gae_lambda=self.gae_lambda, number_of_steps=rollout.num_steps @@ -77,6 +79,18 @@ def process_rollout(self, rollout: Rollout) -> Rollout: return rollout + def _extract_initial_state(self, transition_tensors): + """ Extract initial state from the state dictionary """ + state = {} + + idx = len('state') + 1 + + for key, value in transition_tensors.items(): + if key.startswith('state'): + state[key[idx:]] = value[0] + + return state + def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: """ Calculate loss of the supplied rollout """ assert isinstance(rollout, Trajectories), "For an RNN model, we must evaluate trajectories" @@ -89,7 +103,7 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: # Let's evaluate the model observations = rollout.transition_tensors['observations'] - hidden_state = rollout.transition_tensors['state'][0] # Initial hidden state + hidden_state = self._extract_initial_state(rollout.transition_tensors) dones = rollout.transition_tensors['dones'] action_accumulator = [] @@ -106,8 +120,8 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: pd_params = torch.cat(action_accumulator, dim=0) model_values = torch.cat(value_accumulator, dim=0) - log_probs = self.policy.action_head.logprob(actions, pd_params) - entropy = self.policy.action_head.entropy(pd_params) + log_probs = self.net.action_head.logprob(actions, pd_params) + entropy = self.net.action_head.entropy(pd_params) # Actual calculations. Pretty trivial policy_loss = -torch.mean(advantages * log_probs) @@ -141,8 +155,8 @@ def metrics(self) -> list: class A2CRnnFactory(ModelFactory): """ Factory class for policy gradient models """ - def __init__(self, policy, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): - self.policy = policy + def __init__(self, net_factory, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): + self.net_factory = net_factory self.entropy_coefficient = entropy_coefficient self.value_coefficient = value_coefficient self.discount_factor = discount_factor @@ -150,11 +164,16 @@ def __init__(self, policy, entropy_coefficient, value_coefficient, discount_fact def instantiate(self, **extra_args): """ Instantiate the model """ - # action_space = extra_args.pop('action_space') - policy = self.policy.instantiate(**extra_args) + action_space = extra_args.pop('action_space') + observation_space = extra_args.pop('observation_space') + + size_hint = gym_space_to_size_hint(observation_space) + + net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) return A2CRnn( - policy=policy, + net=net, + action_space=action_space, entropy_coefficient=self.entropy_coefficient, value_coefficient=self.value_coefficient, discount_factor=self.discount_factor, @@ -162,10 +181,10 @@ def instantiate(self, **extra_args): ) -def create(policy: BackboneModel, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): +def create(net: ModelFactory, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): """ Vel factory function """ return A2CRnnFactory( - policy=policy, + net_factory=net, entropy_coefficient=entropy_coefficient, value_coefficient=value_coefficient, discount_factor=discount_factor, diff --git a/vel/rl/policy/acer.py b/vel/rl/policy/acer.py index 433a4ea2..3d30f733 100644 --- a/vel/rl/policy/acer.py +++ b/vel/rl/policy/acer.py @@ -136,7 +136,7 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: action_q.reshape(trajectory_rewards.size()), model_state_values.reshape(trajectory_rewards.size()), actions_rho.reshape(trajectory_rewards.size()), - rollout.rollout_tensors['final_values'] + rollout.rollout_tensors['final.values'] ).flatten() advantages = q_retraced - model_state_values diff --git a/vel/rl/policy/ppo.py b/vel/rl/policy/ppo.py index 1c922f10..4e68445b 100644 --- a/vel/rl/policy/ppo.py +++ b/vel/rl/policy/ppo.py @@ -63,7 +63,7 @@ def process_rollout(self, rollout: Rollout): rewards_buffer=rollout.transition_tensors['rewards'], dones_buffer=rollout.transition_tensors['dones'], values_buffer=rollout.transition_tensors['values'], - final_values=rollout.rollout_tensors['final_values'], + final_values=rollout.rollout_tensors['final.values'], discount_factor=self.discount_factor, gae_lambda=self.gae_lambda, number_of_steps=rollout.num_steps diff --git a/vel/rl/policy/purgatory/ppo_rnn.py b/vel/rl/policy/ppo_rnn.py similarity index 79% rename from vel/rl/policy/purgatory/ppo_rnn.py rename to vel/rl/policy/ppo_rnn.py index 76c2daad..fc28e2f1 100644 --- a/vel/rl/policy/purgatory/ppo_rnn.py +++ b/vel/rl/policy/ppo_rnn.py @@ -1,19 +1,21 @@ -import torch - import numbers -from vel.api import BackboneModel, BatchInfo, ModelFactory -from vel.calc.function import explained_variance +import gym +import torch + +from vel.api import BatchInfo, ModelFactory, BackboneNetwork from vel.function.constant import ConstantSchedule from vel.metric.base import AveragingNamedMetric - from vel.rl.api import RlPolicy, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae +from vel.rl.module.stochastic_rnn_policy import StochasticRnnPolicy +from vel.util.situational import gym_space_to_size_hint +from vel.util.stats import explained_variance class PPORnn(RlPolicy): """ Proximal Policy Optimization - https://arxiv.org/abs/1707.06347 """ - def __init__(self, policy: BackboneModel, + def __init__(self, net: BackboneNetwork, action_space: gym.Space, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, normalize_advantage: bool = True, gae_lambda: float = 1.0): super().__init__(discount_factor) @@ -28,43 +30,43 @@ def __init__(self, policy: BackboneModel, else: self.cliprange = cliprange - self.policy = policy + self.net = StochasticRnnPolicy(net, action_space) - assert self.policy.is_stateful, "Policy must be stateful" + assert self.net.is_stateful, "Policy must be stateful" def reset_weights(self): """ Initialize properly model weights """ - self.policy.reset_weights() + self.net.reset_weights() def forward(self, observation, state=None): """ Calculate model outputs """ - return self.policy.forward(observation, state=state) + return self.net(observation, state=state) def is_stateful(self) -> bool: - return self.policy.is_stateful + return self.net.is_stateful def zero_state(self, batch_size): - return self.policy.zero_state(batch_size) + return self.net.zero_state(batch_size) def reset_state(self, state, dones): - return self.policy.reset_state(state, dones) + return self.net.reset_state(state, dones) def act(self, observation, state=None, deterministic=False): """ Select actions based on model's output """ action_pd_params, value_output, next_state = self(observation, state=state) - actions = self.policy.action_head.sample(action_pd_params, deterministic=deterministic) + actions = self.net.action_head.sample(action_pd_params, deterministic=deterministic) # log likelihood of selected action - logprobs = self.policy.action_head.logprob(actions, action_pd_params) + logprobs = self.net.action_head.logprob(actions, action_pd_params) return { + 'action:logprobs': logprobs, 'actions': actions, - 'values': value_output, 'state': next_state, - 'action:logprobs': logprobs + 'values': value_output, } - def process_rollout(self, rollout: Rollout): + def process_rollout(self, rollout: Rollout) -> Rollout: """ Process rollout for optimization before any chunking/shuffling """ assert isinstance(rollout, Trajectories), "PPO requires trajectory rollouts" @@ -72,7 +74,7 @@ def process_rollout(self, rollout: Rollout): rewards_buffer=rollout.transition_tensors['rewards'], dones_buffer=rollout.transition_tensors['dones'], values_buffer=rollout.transition_tensors['values'], - final_values=rollout.rollout_tensors['final_values'], + final_values=rollout.rollout_tensors['final.values'], discount_factor=self.discount_factor, gae_lambda=self.gae_lambda, number_of_steps=rollout.num_steps @@ -85,6 +87,18 @@ def process_rollout(self, rollout: Rollout): return rollout + def _extract_initial_state(self, transition_tensors): + """ Extract initial state from the state dictionary """ + state = {} + + idx = len('state') + 1 + + for key, value in transition_tensors.items(): + if key.startswith('state'): + state[key[idx:]] = value[0] + + return state + def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: """ Calculate loss of the supplied rollout """ assert isinstance(rollout, Trajectories), "For an RNN model, we must evaluate trajectories" @@ -98,7 +112,7 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: # PART 0.1 - Model evaluation observations = rollout.transition_tensors['observations'] - hidden_state = rollout.transition_tensors['state'][0] # Initial hidden state + hidden_state = self._extract_initial_state(rollout.transition_tensors) dones = rollout.transition_tensors['dones'] action_accumulator = [] @@ -115,8 +129,8 @@ def calculate_gradient(self, batch_info: BatchInfo, rollout: Rollout) -> dict: pd_params = torch.cat(action_accumulator, dim=0) model_values = torch.cat(value_accumulator, dim=0) - model_action_logprobs = self.policy.action_head.logprob(actions, pd_params) - entropy = self.policy.action_head.entropy(pd_params) + model_action_logprobs = self.net.action_head.logprob(actions, pd_params) + entropy = self.net.action_head.entropy(pd_params) # Select the cliprange current_cliprange = self.cliprange.value(batch_info['progress']) @@ -178,10 +192,10 @@ def metrics(self) -> list: class PPORnnFactory(ModelFactory): """ Factory class for policy gradient models """ - def __init__(self, policy: BackboneModel, + def __init__(self, net_factory, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, normalize_advantage: bool = True, gae_lambda: float = 1.0): - self.policy = policy + self.net_factory = net_factory self.entropy_coefficient = entropy_coefficient self.value_coefficient = value_coefficient self.cliprange = cliprange @@ -191,11 +205,17 @@ def __init__(self, policy: BackboneModel, def instantiate(self, **extra_args): """ Instantiate the model """ - policy = self.policy.instantiate(**extra_args) + action_space = extra_args.pop('action_space') + observation_space = extra_args.pop('observation_space') + + size_hint = gym_space_to_size_hint(observation_space) + + net = self.net_factory.instantiate(size_hint=size_hint, **extra_args) return PPORnn( - policy=policy, + net=net, entropy_coefficient=self.entropy_coefficient, + action_space=action_space, value_coefficient=self.value_coefficient, cliprange=self.cliprange, discount_factor=self.discount_factor, @@ -204,12 +224,12 @@ def instantiate(self, **extra_args): ) -def create(policy: BackboneModel, +def create(net: ModelFactory, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, normalize_advantage: bool = True, gae_lambda: float = 1.0): """ Vel factory function """ return PPORnnFactory( - policy=policy, + net_factory=net, entropy_coefficient=entropy_coefficient, value_coefficient=value_coefficient, cliprange=cliprange, diff --git a/vel/rl/policy/purgatory/__init__.py b/vel/rl/policy/purgatory/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/vel/rl/policy/trpo.py b/vel/rl/policy/trpo.py index c7fe2cda..0e4cba3a 100644 --- a/vel/rl/policy/trpo.py +++ b/vel/rl/policy/trpo.py @@ -167,7 +167,7 @@ def process_rollout(self, rollout: Rollout): rewards_buffer=rollout.transition_tensors['rewards'], dones_buffer=rollout.transition_tensors['dones'], values_buffer=rollout.transition_tensors['values'], - final_values=rollout.rollout_tensors['final_values'], + final_values=rollout.rollout_tensors['final.values'], discount_factor=self.discount_factor, gae_lambda=self.gae_lambda, number_of_steps=rollout.num_steps diff --git a/vel/rl/xpolicy/stochastic_rnn_policy.py b/vel/rl/xpolicy/stochastic_rnn_policy.py deleted file mode 100644 index de8754b1..00000000 --- a/vel/rl/xpolicy/stochastic_rnn_policy.py +++ /dev/null @@ -1,99 +0,0 @@ -import gym -import typing - -from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.module.input.identity import IdentityFactory -from vel.rl.module.stochastic_action_head import StochasticActionHead -from vel.rl.module.value_head import ValueHead - - -class StochasticRnnPolicy(BackboneModel): - """ - Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone - RNN version - """ - - def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, - action_space: gym.Space): - super().__init__() - - self.input_block = input_block - self.backbone = backbone - - assert self.backbone.is_stateful, "Must have a stateful backbone" - - self.action_head = StochasticActionHead( - action_space=action_space, - input_dim=self.backbone.output_dim - ) - self.value_head = ValueHead(input_dim=self.backbone.output_dim) - - assert self.backbone.is_stateful, "Backbone must be a recurrent model" - - @property - def is_stateful(self) -> bool: - """ If the model has a state that needs to be fed between individual observations """ - return True - - def zero_state(self, batch_size): - return self.backbone.zero_state(batch_size) - - def reset_weights(self): - """ Initialize properly model weights """ - self.input_block.reset_weights() - self.backbone.reset_weights() - self.action_head.reset_weights() - self.value_head.reset_weights() - - def forward(self, observations, state): - """ Calculate model outputs """ - input_data = self.input_block(observations) - base_output, new_state = self.backbone(input_data, state=state) - - action_output = self.action_head(base_output) - value_output = self.value_head(base_output) - - return action_output, value_output, new_state - - def value(self, observation, state=None): - """ Calculate only value head for given state """ - input_data = self.input_block(observation) - - base_output, new_state = self.backbone(input_data, state) - value_output = self.value_head(base_output) - - return value_output - - def reset_state(self, state, dones): - """ Reset the state after the episode has been terminated """ - if (dones > 0).any().item(): - zero_state = self.backbone.zero_state(dones.shape[0]).to(state.device) - dones_expanded = dones.unsqueeze(-1) - return state * (1 - dones_expanded) + zero_state * dones_expanded - else: - return state - - -class StochasticRnnPolicyFactory(ModelFactory): - """ Factory class for policy gradient models """ - def __init__(self, input_block: ModelFactory, backbone: ModelFactory): - self.input_block = input_block - self.backbone = backbone - - def instantiate(self, **extra_args): - """ Instantiate the model """ - input_block = self.input_block.instantiate() - backbone = self.backbone.instantiate(**extra_args) - - return StochasticRnnPolicy(input_block, backbone, extra_args['action_space']) - - -def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): - """ Vel factory function """ - if input_block is None: - input_block = IdentityFactory() - - return StochasticRnnPolicyFactory( - input_block=input_block, - backbone=backbone - ) diff --git a/vel/util/datastructure.py b/vel/util/datastructure.py new file mode 100644 index 00000000..455c21e1 --- /dev/null +++ b/vel/util/datastructure.py @@ -0,0 +1,21 @@ +import typing + + +def flatten_dict(dictionary: dict, output: typing.Optional[dict] = None, root: str = '') -> dict: + """ From a nested dictionary built a flat version, concatenating keys with '.' """ + if output is None: + output = {} + + for key, value in dictionary.items(): + if isinstance(value, dict): + if root: + flatten_dict(value, output, f"{root}.{key}") + else: + flatten_dict(value, output, key) + else: + if root: + output[f"{root}.{key}"] = value + else: + output[key] = value + + return output diff --git a/vel/util/tensor_util.py b/vel/util/tensor_util.py index 29f41ab3..db524bda 100644 --- a/vel/util/tensor_util.py +++ b/vel/util/tensor_util.py @@ -41,7 +41,11 @@ def __init__(self): self.accumulants = collections.defaultdict(list) def add(self, name, tensor): - self.accumulants[name].append(tensor) + if isinstance(tensor, dict): + for subname, subtensor in tensor.items(): + self.add(f"{name}.{subname}", subtensor) + else: + self.accumulants[name].append(tensor) def result(self): """ Concatenate accumulated tensors """ From 8e32284e8dd232d22cbbfc6d13bef993e445d5b8 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 3 Oct 2019 15:59:26 -0700 Subject: [PATCH 117/162] Clean up purgatory files. --- vel/rl/layer/purgatory/__init__.py | 0 vel/rl/layer/purgatory/mlp_rnn.py | 59 ------- vel/rl/layer/purgatory/noisy_nature_cnn.py | 103 ------------ vel/rl/layer/purgatory/rnn.py | 47 ------ vel/rl/xpolicy/__init__.py | 0 vel/rl/xpolicy/purgatory/__init__.py | 0 .../purgatory/old_stochastic_policy.py | 123 -------------- .../purgatory/old_stochastic_rnn_policy.py | 153 ------------------ .../purgatory/q_distributional_policy.py | 144 ----------------- vel/rl/xpolicy/purgatory/q_dueling_policy.py | 73 --------- vel/rl/xpolicy/purgatory/q_model.py | 97 ----------- vel/rl/xpolicy/purgatory/q_noisy_model.py | 86 ---------- .../purgatory/q_stochastic_policy_model.py | 128 --------------- vel/rl/xpolicy/stochastic_policy_separate.py | 94 ----------- 14 files changed, 1107 deletions(-) delete mode 100644 vel/rl/layer/purgatory/__init__.py delete mode 100644 vel/rl/layer/purgatory/mlp_rnn.py delete mode 100644 vel/rl/layer/purgatory/noisy_nature_cnn.py delete mode 100644 vel/rl/layer/purgatory/rnn.py delete mode 100644 vel/rl/xpolicy/__init__.py delete mode 100644 vel/rl/xpolicy/purgatory/__init__.py delete mode 100644 vel/rl/xpolicy/purgatory/old_stochastic_policy.py delete mode 100644 vel/rl/xpolicy/purgatory/old_stochastic_rnn_policy.py delete mode 100644 vel/rl/xpolicy/purgatory/q_distributional_policy.py delete mode 100644 vel/rl/xpolicy/purgatory/q_dueling_policy.py delete mode 100644 vel/rl/xpolicy/purgatory/q_model.py delete mode 100644 vel/rl/xpolicy/purgatory/q_noisy_model.py delete mode 100644 vel/rl/xpolicy/purgatory/q_stochastic_policy_model.py delete mode 100644 vel/rl/xpolicy/stochastic_policy_separate.py diff --git a/vel/rl/layer/purgatory/__init__.py b/vel/rl/layer/purgatory/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/vel/rl/layer/purgatory/mlp_rnn.py b/vel/rl/layer/purgatory/mlp_rnn.py deleted file mode 100644 index 1e871ffb..00000000 --- a/vel/rl/layer/purgatory/mlp_rnn.py +++ /dev/null @@ -1,59 +0,0 @@ -import typing - -from vel.api import LinearBackboneModel, ModelFactory -from vel.rl.backbone.mlp import MLP -from vel.rl.backbone.rnn import RNN - - -class MlpRnn(LinearBackboneModel): - """ MLP followed by an RNN - another simple policy backbone """ - - def __init__(self, input_length: int, mlp_layers: typing.List[int], rnn_units: int, rnn_type: str = 'lstm', - mlp_activation: str = 'tanh', mlp_normalization: typing.Optional[str] = None): - super().__init__() - - self.mlp = MLP( - input_length=input_length, hidden_layers=mlp_layers, activation=mlp_activation, - normalization=mlp_normalization - ) - - self.rnn = RNN(input_length=self.mlp.output_dim, hidden_units=rnn_units, rnn_type=rnn_type) - - @property - def output_dim(self) -> int: - return self.rnn.output_dim - - @property - def state_dim(self) -> int: - """ Initial state of the network """ - return self.rnn.state_dim - - @property - def is_stateful(self) -> bool: - """ If the model has a state that needs to be fed between individual observations """ - return True - - def zero_state(self, batch_size): - """ Potential state for the model """ - return self.rnn.zero_state(batch_size) - - def forward(self, input_data, state): - mlp_output = self.mlp(input_data) - hidden_state, new_state = self.rnn(mlp_output, state) - return hidden_state, new_state - - -def create(input_length: int, mlp_layers: typing.List[int], rnn_units: int, rnn_type: str = 'lstm', - mlp_activation: str = 'tanh', mlp_normalization: typing.Optional[str] = None): - """ Vel factory function """ - def instantiate(**_): - return MlpRnn( - input_length=input_length, - mlp_layers=mlp_layers, - rnn_units=rnn_units, - rnn_type=rnn_type, - mlp_activation=mlp_activation, - mlp_normalization=mlp_normalization - ) - - return ModelFactory.generic(instantiate) diff --git a/vel/rl/layer/purgatory/noisy_nature_cnn.py b/vel/rl/layer/purgatory/noisy_nature_cnn.py deleted file mode 100644 index 08ff71ba..00000000 --- a/vel/rl/layer/purgatory/noisy_nature_cnn.py +++ /dev/null @@ -1,103 +0,0 @@ -""" -Code based loosely on implementation: -https://github.com/openai/baselines/blob/master/baselines/ppo2/policies.py - -Under MIT license. -""" -import numpy as np - -import torch.nn as nn -import torch.nn.init as init -import torch.nn.functional as F - -import vel.util.network as net_util - -from vel.api import LinearBackboneModel, ModelFactory -from vel.rl.module.noisy_linear import NoisyLinear - - -class NoisyNatureCnn(LinearBackboneModel): - """ - Neural network as defined in the paper 'Human-level control through deep reinforcement learning' - implemented via "Noisy Networks for Exploration" - """ - def __init__(self, input_width, input_height, input_channels, output_dim=512, initial_std_dev=0.4, - factorized_noise=True): - super().__init__() - - self._output_dim = output_dim - - self.conv1 = nn.Conv2d( - in_channels=input_channels, - out_channels=32, - kernel_size=(8, 8), - stride=4 - ) - - self.conv2 = nn.Conv2d( - in_channels=32, - out_channels=64, - kernel_size=(4, 4), - stride=2 - ) - - self.conv3 = nn.Conv2d( - in_channels=64, - out_channels=64, - kernel_size=(3, 3), - stride=1 - ) - - layer_series = [ - (8, 0, 4), - (4, 0, 2), - (3, 0, 1) - ] - - self.final_width = net_util.convolutional_layer_series(input_width, layer_series) - self.final_height = net_util.convolutional_layer_series(input_height, layer_series) - - self.linear_layer = NoisyLinear( - self.final_width * self.final_height * 64, # 64 is the number of channels of the last conv layer - self.output_dim, - initial_std_dev=initial_std_dev, - factorized_noise=factorized_noise - ) - - @property - def output_dim(self) -> int: - """ Final dimension of model output """ - return self._output_dim - - def reset_weights(self): - """ Call proper initializers for the weights """ - for m in self.modules(): - if isinstance(m, nn.Conv2d): - # init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') - init.orthogonal_(m.weight, gain=np.sqrt(2)) - init.constant_(m.bias, 0.0) - elif isinstance(m, nn.Linear): - # init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') - init.orthogonal_(m.weight, gain=np.sqrt(2)) - init.constant_(m.bias, 0.0) - elif isinstance(m, NoisyLinear): - m.reset_weights() - - def forward(self, image): - result = image - result = F.relu(self.conv1(result)) - result = F.relu(self.conv2(result)) - result = F.relu(self.conv3(result)) - flattened = result.view(result.size(0), -1) - return F.relu(self.linear_layer(flattened)) - - -def create(input_width, input_height, input_channels=1, output_dim=512, initial_std_dev=0.4, factorized_noise=True): - """ Vel factory function """ - def instantiate(**_): - return NoisyNatureCnn( - input_width=input_width, input_height=input_height, input_channels=input_channels, - output_dim=output_dim, initial_std_dev=initial_std_dev, factorized_noise=factorized_noise - ) - - return ModelFactory.generic(instantiate) diff --git a/vel/rl/layer/purgatory/rnn.py b/vel/rl/layer/purgatory/rnn.py deleted file mode 100644 index 5a6e9625..00000000 --- a/vel/rl/layer/purgatory/rnn.py +++ /dev/null @@ -1,47 +0,0 @@ -from vel.api import LinearBackboneModel, ModelFactory -from vel.module.rnn_cell import RnnCell - - -class RNN(LinearBackboneModel): - """ Simple recurrent model backbone """ - - def __init__(self, input_length: int, hidden_units: int, rnn_type: str = 'lstm'): - super().__init__() - - self.input_length = input_length - self.hidden_units = hidden_units - - self.rnn_cell = RnnCell(input_size=input_length, hidden_size=self.hidden_units, rnn_type=rnn_type) - - @property - def output_dim(self) -> int: - return self.rnn_cell.output_dim - - @property - def state_dim(self) -> int: - """ Initial state of the network """ - return self.rnn_cell.state_dim - - @property - def is_stateful(self) -> bool: - """ If the model has a state that needs to be fed between individual observations """ - return True - - def zero_state(self, batch_size): - """ Potential state for the model """ - return self.rnn_cell.zero_state(batch_size) - - def forward(self, input_data, state): - hidden_state, new_state = self.rnn_cell(input_data, state) - return hidden_state, new_state - - -def create(input_length: int, hidden_units: int, rnn_type: str = 'lstm'): - """ Vel factory function """ - def instantiate(**_): - return RNN( - input_length=input_length, - hidden_units=hidden_units, - rnn_type=rnn_type - ) - return ModelFactory.generic(instantiate) diff --git a/vel/rl/xpolicy/__init__.py b/vel/rl/xpolicy/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/vel/rl/xpolicy/purgatory/__init__.py b/vel/rl/xpolicy/purgatory/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/vel/rl/xpolicy/purgatory/old_stochastic_policy.py b/vel/rl/xpolicy/purgatory/old_stochastic_policy.py deleted file mode 100644 index 4fc5a16b..00000000 --- a/vel/rl/xpolicy/purgatory/old_stochastic_policy.py +++ /dev/null @@ -1,123 +0,0 @@ -import gym -import typing - -from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, Evaluator, RlPolicy -from vel.rl.module.action_head import StochasticActionHead -from vel.rl.module.value_head import ValueHead - - -class StochasticPolicyEvaluator(Evaluator): - """ Evaluator for a policy gradient model """ - - def __init__(self, model: 'StochasticPolicyModel', rollout: Rollout): - super().__init__(rollout) - - self.model = model - - policy_params, estimated_values = model(self.rollout.batch_tensor('observations')) - - self.provide('model:policy_params', policy_params) - self.provide('model:values', estimated_values) - - @Evaluator.provides('model:action:logprobs') - def model_action_logprobs(self): - actions = self.get('rollout:actions') - policy_params = self.get('model:policy_params') - return self.model.action_head.logprob(actions, policy_params) - - @Evaluator.provides('model:entropy') - def model_entropy(self): - policy_params = self.get('model:policy_params') - return self.model.entropy(policy_params) - - -class StochasticPolicyModel(RlPolicy): - """ - Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone - """ - - def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, action_space: gym.Space): - super().__init__() - - self.input_block = input_block - self.backbone = backbone - self.action_head = StochasticActionHead( - action_space=action_space, - input_dim=self.backbone.output_dim - ) - self.value_head = ValueHead(input_dim=self.backbone.output_dim) - - def reset_weights(self): - """ Initialize properly model weights """ - self.input_block.reset_weights() - self.backbone.reset_weights() - self.action_head.reset_weights() - self.value_head.reset_weights() - - def forward(self, observations): - """ Calculate model outputs """ - input_data = self.input_block(observations) - - base_output = self.backbone(input_data) - - action_output = self.action_head(base_output) - value_output = self.value_head(base_output) - - return action_output, value_output - - def step(self, observation, deterministic=False): - """ Select actions based on model's output """ - action_pd_params, value_output = self(observation) - actions = self.action_head.sample(action_pd_params, deterministic=deterministic) - - # log likelihood of selected action - logprobs = self.action_head.logprob(actions, action_pd_params) - - return { - 'actions': actions, - 'values': value_output, - 'action:logprobs': logprobs - } - - def evaluate(self, rollout: Rollout) -> Evaluator: - """ Evaluate model on a rollout """ - return StochasticPolicyEvaluator(self, rollout) - - def logprob(self, action_sample, policy_params): - """ Calculate - log(prob) of selected actions """ - return self.action_head.logprob(action_sample, policy_params) - - def value(self, observations): - """ Calculate only value head for given state """ - input_data = self.input_block(observations) - base_output = self.backbone(input_data) - value_output = self.value_head(base_output) - return value_output - - def entropy(self, policy_params): - """ Entropy of a probability distribution """ - return self.action_head.entropy(policy_params) - - -class StochasticPolicyModelFactory(ModelFactory): - """ Factory class for policy gradient models """ - def __init__(self, input_block: IdentityFactory, backbone: ModelFactory): - self.backbone = backbone - self.input_block = input_block - - def instantiate(self, **extra_args): - """ Instantiate the model """ - input_block = self.input_block.instantiate() - backbone = self.backbone.instantiate(**extra_args) - - return StochasticPolicyModel(input_block, backbone, extra_args['action_space']) - - -def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): - """ Vel factory function """ - if input_block is None: - input_block = IdentityFactory() - - return StochasticPolicyModelFactory(input_block=input_block, backbone=backbone) diff --git a/vel/rl/xpolicy/purgatory/old_stochastic_rnn_policy.py b/vel/rl/xpolicy/purgatory/old_stochastic_rnn_policy.py deleted file mode 100644 index 25551144..00000000 --- a/vel/rl/xpolicy/purgatory/old_stochastic_rnn_policy.py +++ /dev/null @@ -1,153 +0,0 @@ -import gym -import torch -import typing - -from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, Trajectories, Evaluator, RlRnnModel -from vel.rl.module.action_head import StochasticActionHead -from vel.rl.module.value_head import ValueHead - - -class StochasticPolicyRnnEvaluator(Evaluator): - """ Evaluate recurrent model from initial state """ - - def __init__(self, model: 'StochasticPolicyRnnModel', rollout: Rollout): - assert isinstance(rollout, Trajectories), "For an RNN model, we must evaluate trajectories" - super().__init__(rollout) - - self.model = model - - observation_trajectories = rollout.transition_tensors['observations'] - hidden_state = rollout.rollout_tensors['initial_hidden_state'] - - action_accumulator = [] - value_accumulator = [] - - # Evaluate recurrent network step by step - for i in range(observation_trajectories.size(0)): - action_output, value_output, hidden_state = model(observation_trajectories[i], hidden_state) - action_accumulator.append(action_output) - value_accumulator.append(value_output) - - policy_params = torch.cat(action_accumulator, dim=0) - estimated_values = torch.cat(value_accumulator, dim=0) - - self.provide('model:policy_params', policy_params) - self.provide('model:values', estimated_values) - - @Evaluator.provides('model:action:logprobs') - def model_action_logprobs(self): - actions = self.get('rollout:actions') - policy_params = self.get('model:policy_params') - return self.model.action_head.logprob(actions, policy_params) - - @Evaluator.provides('model:entropy') - def model_entropy(self): - policy_params = self.get('model:policy_params') - return self.model.entropy(policy_params) - - -class StochasticPolicyRnnModel(RlRnnModel): - """ - Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone - RNN version - """ - - def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, - action_space: gym.Space): - super().__init__() - - self.input_block = input_block - self.backbone = backbone - - self.action_head = StochasticActionHead( - action_space=action_space, - input_dim=self.backbone.output_dim - ) - self.value_head = ValueHead(input_dim=self.backbone.output_dim) - - assert self.backbone.is_stateful, "Backbone must be a recurrent model" - - @property - def state_dim(self) -> int: - """ Dimension of model state """ - return self.backbone.state_dim - - def reset_weights(self): - """ Initialize properly model weights """ - self.input_block.reset_weights() - self.backbone.reset_weights() - self.action_head.reset_weights() - self.value_head.reset_weights() - - def forward(self, observations, state): - """ Calculate model outputs """ - input_data = self.input_block(observations) - base_output, new_state = self.backbone(input_data, state=state) - - action_output = self.action_head(base_output) - value_output = self.value_head(base_output) - - return action_output, value_output, new_state - - def step(self, observations, state, deterministic=False): - """ Select actions based on model's output """ - action_pd_params, value_output, new_state = self(observations, state) - actions = self.action_head.sample(action_pd_params, deterministic=deterministic) - - # log likelihood of selected action - logprobs = self.action_head.logprob(actions, action_pd_params) - - return { - 'actions': actions, - 'values': value_output, - 'action:logprobs': logprobs, - 'state': new_state - } - - def evaluate(self, rollout: Rollout) -> Evaluator: - """ Evaluate model on a rollout """ - return StochasticPolicyRnnEvaluator(self, rollout) - - def logprob(self, action_sample, policy_params): - """ Calculate - log(prob) of selected actions """ - return self.action_head.logprob(action_sample, policy_params) - - def value(self, observations, state): - """ Calculate only value head for given state """ - input_data = self.input_block(observations) - - base_output, new_state = self.backbone(input_data, state) - value_output = self.value_head(base_output) - - return value_output - - def entropy(self, action_pd_params): - """ Entropy of a probability distribution """ - return self.action_head.entropy(action_pd_params) - - -class PolicyGradientRnnModelFactory(ModelFactory): - """ Factory class for policy gradient models """ - def __init__(self, input_block: ModelFactory, backbone: ModelFactory): - self.input_block = input_block - self.backbone = backbone - - def instantiate(self, **extra_args): - """ Instantiate the model """ - input_block = self.input_block.instantiate() - backbone = self.backbone.instantiate(**extra_args) - - return StochasticPolicyRnnModel(input_block, backbone, extra_args['action_space']) - - -def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): - """ Vel factory function """ - if input_block is None: - input_block = IdentityFactory() - - return PolicyGradientRnnModelFactory( - input_block=input_block, - backbone=backbone - ) diff --git a/vel/rl/xpolicy/purgatory/q_distributional_policy.py b/vel/rl/xpolicy/purgatory/q_distributional_policy.py deleted file mode 100644 index 4dde37cf..00000000 --- a/vel/rl/xpolicy/purgatory/q_distributional_policy.py +++ /dev/null @@ -1,144 +0,0 @@ -import gym -import typing - -from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, RlPolicy, Evaluator -from vel.rl.module.q_distributional_head import QDistributionalHead - - -class QDistributionalModelEvaluator(Evaluator): - """ Evaluate distributional q-model """ - def __init__(self, model: 'QDistributionalModel', rollout: Rollout): - super().__init__(rollout) - self.model = model - - @Evaluator.provides('model:q') - def model_q(self): - """ Action values for all (discrete) actions """ - # observations = self.get('rollout:observations') - # # This mean of last dimension collapses the histogram/calculates mean reward - # return self.model(observations).mean(dim=-1) - raise NotImplementedError - - @Evaluator.provides('model:q_dist') - def model_q_dist(self): - """ Action values for all (discrete) actions """ - observations = self.get('rollout:observations') - # This mean of last dimension collapses the histogram/calculates mean reward - return self.model(observations) - - @Evaluator.provides('model:action:q') - def model_action_q(self): - """ Action values for selected actions in the rollout """ - raise NotImplementedError - - @Evaluator.provides('model:action:q_dist') - def model_action_q_dist(self): - """ Action values for selected actions in the rollout """ - q = self.get('model:q_dist') - actions = self.get('rollout:actions') - return q[range(q.size(0)), actions] - - @Evaluator.provides('model:q_next') - def model_q_next(self): - """ Action values for all (discrete) actions """ - raise NotImplementedError - - @Evaluator.provides('model:q_dist_next') - def model_q_dist_next(self): - """ Action values for all (discrete) actions """ - observations = self.get('rollout:observations_next') - # This mean of last dimension collapses the histogram/calculates mean reward - return self.model(observations) - - -class QDistributionalModel(RlPolicy): - """ - A deterministic greedy action-value model that learns a value function distribution rather than - just an expectation. - Supports only discrete action spaces (ones that can be enumerated) - """ - def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, action_space: gym.Space, - vmin: float, vmax: float, atoms: int = 1): - super().__init__() - - self.action_space = action_space - - self.input_block = input_block - self.backbone = backbone - - self.q_head = QDistributionalHead( - input_dim=backbone.output_dim, action_space=action_space, - vmin=vmin, vmax=vmax, - atoms=atoms - ) - - def reset_weights(self): - """ Initialize weights to reasonable defaults """ - self.input_block.reset_weights() - self.backbone.reset_weights() - self.q_head.reset_weights() - - def forward(self, observations): - """ Model forward pass """ - input_data = self.input_block(observations) - base_output = self.backbone(input_data) - log_histogram = self.q_head(base_output) - return log_histogram - - def histogram_info(self): - """ Return extra information about histogram """ - return self.q_head.histogram_info() - - def step(self, observations): - """ Sample action from an action space for given state """ - log_histogram = self(observations) - actions = self.q_head.sample(log_histogram) - - return { - 'actions': actions, - 'log_histogram': log_histogram - } - - def evaluate(self, rollout: Rollout) -> Evaluator: - """ Evaluate model on a rollout """ - return QDistributionalModelEvaluator(self, rollout) - - -class QDistributionalModelFactory(ModelFactory): - """ Factory class for q-learning models """ - def __init__(self, input_block: ModelFactory, backbone: ModelFactory, vmin: float, vmax: float, atoms: int): - self.input_block = input_block - self.backbone = backbone - self.vmin = vmin - self.vmax = vmax - self.atoms = atoms - - def instantiate(self, **extra_args): - """ Instantiate the model """ - input_block = self.input_block.instantiate() - backbone = self.backbone.instantiate(**extra_args) - - return QDistributionalModel( - input_block=input_block, - backbone=backbone, - action_space=extra_args['action_space'], - vmin=self.vmin, - vmax=self.vmax, - atoms=self.atoms - ) - - -def create(backbone: ModelFactory, vmin: float, vmax: float, atoms: int, - input_block: typing.Optional[ModelFactory] = None): - """ Vel factory function """ - if input_block is None: - input_block = IdentityFactory() - - return QDistributionalModelFactory( - input_block=input_block, backbone=backbone, - vmin=vmin, - vmax=vmax, - atoms=atoms - ) diff --git a/vel/rl/xpolicy/purgatory/q_dueling_policy.py b/vel/rl/xpolicy/purgatory/q_dueling_policy.py deleted file mode 100644 index 74fff35a..00000000 --- a/vel/rl/xpolicy/purgatory/q_dueling_policy.py +++ /dev/null @@ -1,73 +0,0 @@ -import gym -import typing - -from vel.api import LinearBackboneModel, Model, ModelFactory, BackboneModel -from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, Evaluator -from vel.rl.module.q_dueling_head import QDuelingHead -from vel.rl.model.q_model import QModelEvaluator - - -class QDuelingModel(Model): - """ - Deterministic greedy action-value model with dueling heads (kind of actor and critic) - Supports only discrete action spaces (ones that can be enumerated) - """ - - def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, action_space: gym.Space): - super().__init__() - - self.action_space = action_space - - self.input_block = input_block - self.backbone = backbone - self.q_head = QDuelingHead(input_dim=backbone.output_dim, action_space=action_space) - - def forward(self, observations): - """ Model forward pass """ - observations = self.input_block(observations) - advantage_features, value_features = self.backbone(observations) - q_values = self.q_head(advantage_features, value_features) - - return q_values - - def reset_weights(self): - """ Initialize weights to reasonable defaults """ - self.input_block.reset_weights() - self.backbone.reset_weights() - self.q_head.reset_weights() - - def step(self, observations): - """ Sample action from an action space for given state """ - q_values = self(observations) - - return { - 'actions': self.q_head.sample(q_values), - 'q': q_values - } - - def evaluate(self, rollout: Rollout) -> Evaluator: - """ Evaluate model on a rollout """ - return QModelEvaluator(self, rollout) - - -class QDuelingModelFactory(ModelFactory): - """ Factory class for policy gradient models """ - def __init__(self, input_block: ModelFactory, backbone: ModelFactory): - self.input_block = input_block - self.backbone = backbone - - def instantiate(self, **extra_args): - """ Instantiate the model """ - input_block = self.input_block.instantiate() - backbone = self.backbone.instantiate(**extra_args) - - return QDuelingModel(input_block, backbone, extra_args['action_space']) - - -def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): - """ Vel factory function """ - if input_block is None: - input_block = IdentityFactory() - - return QDuelingModelFactory(input_block=input_block, backbone=backbone) diff --git a/vel/rl/xpolicy/purgatory/q_model.py b/vel/rl/xpolicy/purgatory/q_model.py deleted file mode 100644 index 2fbd4513..00000000 --- a/vel/rl/xpolicy/purgatory/q_model.py +++ /dev/null @@ -1,97 +0,0 @@ -import gym -import typing - -from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, RlPolicy -from vel.rl.module.q_head import QHead - - -# class QModelEvaluator(Evaluator): -# """ Evaluate simple q-model """ -# def __init__(self, model: 'QModel', rollout: Rollout): -# super().__init__(rollout) -# self.model = model -# -# @Evaluator.provides('model:q') -# def model_q(self): -# """ Action values for all (discrete) actions """ -# observations = self.get('rollout:observations') -# return self.model(observations) -# -# @Evaluator.provides('model:action:q') -# def model_action_q(self): -# """ Action values for selected actions in the rollout """ -# q = self.get('model:q') -# actions = self.get('rollout:actions') -# return q.gather(1, actions.unsqueeze(1)).squeeze(1) -# -# @Evaluator.provides('model:q_next') -# def model_q_next(self): -# """ Action values for all (discrete) actions """ -# observations = self.get('rollout:observations_next') -# return self.model(observations) - - -class QModel(RlPolicy): - """ - Simple deterministic greedy action-value model. - Supports only discrete action spaces (ones that can be enumerated) - """ - def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, action_space: gym.Space): - super().__init__() - - self.action_space = action_space - - self.input_block = input_block - self.backbone = backbone - self.q_head = QHead(input_dim=backbone.output_dim, action_space=action_space) - - def reset_weights(self): - """ Initialize weights to reasonable defaults """ - self.input_block.reset_weights() - self.backbone.reset_weights() - self.q_head.reset_weights() - - def forward(self, observations): - """ Model forward pass """ - observations = self.input_block(observations) - base_output = self.backbone(observations) - q_values = self.q_head(base_output) - return q_values - - def step(self, observations): - """ Sample action from an action space for given state """ - q_values = self(observations) - actions = self.q_head.sample(q_values) - - return { - 'actions': actions, - 'q': q_values - } - - def evaluate(self, rollout: Rollout) -> Evaluator: - """ Evaluate model on a rollout """ - return QModelEvaluator(self, rollout) - - -class QModelFactory(ModelFactory): - """ Factory class for q-learning models """ - def __init__(self, input_block: ModelFactory, backbone: ModelFactory): - self.input_block = input_block - self.backbone = backbone - - def instantiate(self, **extra_args): - """ Instantiate the model """ - input_block = self.input_block.instantiate() - backbone = self.backbone.instantiate(**extra_args) - - return QModel(input_block, backbone, extra_args['action_space']) - - -def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): - """ Vel factory function """ - if input_block is None: - input_block = IdentityFactory() - - return QModelFactory(input_block=input_block, backbone=backbone) diff --git a/vel/rl/xpolicy/purgatory/q_noisy_model.py b/vel/rl/xpolicy/purgatory/q_noisy_model.py deleted file mode 100644 index 2ef6aab3..00000000 --- a/vel/rl/xpolicy/purgatory/q_noisy_model.py +++ /dev/null @@ -1,86 +0,0 @@ -import gym -import typing - -from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, RlPolicy, Evaluator -from vel.rl.model.q_model import QModelEvaluator -from vel.rl.module.q_noisy_head import QNoisyHead - - -class NoisyQModel(RlPolicy): - """ - NoisyNets action-value model. - Supports only discrete action spaces (ones that can be enumerated) - """ - - def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, action_space: gym.Space, - initial_std_dev=0.4, factorized_noise=True): - super().__init__() - - self.action_space = action_space - - self.input_block = input_block - self.backbone = backbone - self.q_head = QNoisyHead( - input_dim=backbone.output_dim, action_space=action_space, initial_std_dev=initial_std_dev, - factorized_noise=factorized_noise - ) - - def reset_weights(self): - """ Initialize weights to reasonable defaults """ - self.input_block.reset_weights() - self.backbone.reset_weights() - self.q_head.reset_weights() - - def forward(self, observations): - """ Model forward pass """ - observations = self.input_block(observations) - base_output = self.backbone(observations) - q_values = self.q_head(base_output) - return q_values - - def step(self, observations): - """ Sample action from an action space for given state """ - q_values = self(observations) - actions = self.q_head.sample(q_values) - - return { - 'actions': actions, - 'q': q_values - } - - def evaluate(self, rollout: Rollout) -> Evaluator: - """ Evaluate model on a rollout """ - return QModelEvaluator(self, rollout) - - -class NoisyQModelFactory(ModelFactory): - """ Factory class for q-learning models """ - def __init__(self, input_block: ModelFactory, backbone: ModelFactory, initial_std_dev=0.4, factorized_noise=True): - self.initial_std_dev = initial_std_dev - self.factorized_noise = factorized_noise - - self.input_block = input_block - self.backbone = backbone - - def instantiate(self, **extra_args): - """ Instantiate the model """ - input_block = self.input_block.instantiate() - backbone = self.backbone.instantiate(**extra_args) - - return NoisyQModel( - input_block, backbone, extra_args['action_space'], initial_std_dev=self.initial_std_dev, - factorized_noise=self.factorized_noise - ) - - -def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None, initial_std_dev: float = 0.4, - factorized_noise: bool = True): - """ Vel factory function """ - if input_block is None: - input_block = IdentityFactory() - - return NoisyQModelFactory( - input_block=input_block, backbone=backbone, initial_std_dev=initial_std_dev, factorized_noise=factorized_noise - ) diff --git a/vel/rl/xpolicy/purgatory/q_stochastic_policy_model.py b/vel/rl/xpolicy/purgatory/q_stochastic_policy_model.py deleted file mode 100644 index c489980d..00000000 --- a/vel/rl/xpolicy/purgatory/q_stochastic_policy_model.py +++ /dev/null @@ -1,128 +0,0 @@ -import gym -import torch -import typing - -from vel.api import LinearBackboneModel, Model, ModelFactory, BackboneModel -from vel.module.input.identity import IdentityFactory -from vel.rl.api import Rollout, Evaluator -from vel.rl.module.action_head import StochasticActionHead -from vel.rl.module.q_head import QHead - - -class QStochasticPolicyEvaluator(Evaluator): - """ Evaluator for QPolicyGradientModel """ - def __init__(self, model: 'QStochasticPolicyModel', rollout: Rollout): - super().__init__(rollout) - - self.model = model - - observations = self.get('rollout:observations') - logprobs, q = model(observations) - - self.provide('model:logprobs', logprobs) - self.provide('model:q', q) - - @Evaluator.provides('model:action:logprobs') - def model_action_logprobs(self): - actions = self.get('rollout_actions') - logprobs = self.get('model:logprobs') - return self.model.action_head.logprob(actions, logprobs) - - -class QStochasticPolicyModel(Model): - """ - A policy gradient model with an action-value critic head (instead of more common state-value critic head). - Supports only discrete action spaces (ones that can be enumerated) - """ - - def __init__(self, input_block: BackboneModel, backbone: LinearBackboneModel, action_space: gym.Space): - super().__init__() - - assert isinstance(action_space, gym.spaces.Discrete) - - self.input_block = input_block - self.backbone = backbone - - self.action_head = StochasticActionHead( - input_dim=self.backbone.output_dim, - action_space=action_space - ) - - self.q_head = QHead( - input_dim=self.backbone.output_dim, - action_space=action_space - ) - - def reset_weights(self): - """ Initialize properly model weights """ - self.input_block.reset_weights() - self.backbone.reset_weights() - self.action_head.reset_weights() - self.q_head.reset_weights() - - def forward(self, observations): - """ Calculate model outputs """ - input_data = self.input_block(observations) - - base_output = self.backbone(input_data) - policy_params = self.action_head(base_output) - - q = self.q_head(base_output) - - return policy_params, q - - def step(self, observation, deterministic=False): - """ Select actions based on model's output """ - policy_params, q = self(observation) - actions = self.action_head.sample(policy_params, deterministic=deterministic) - - # log probability - we can do that, because we support only discrete action spaces - logprobs = self.action_head.logprob(actions, policy_params) - - return { - 'actions': actions, - 'q': q, - 'logprobs': policy_params, - 'action:logprobs': logprobs - } - - def evaluate(self, rollout: Rollout) -> QStochasticPolicyEvaluator: - """ Evaluate model on a rollout """ - return QStochasticPolicyEvaluator(self, rollout) - - def value(self, observation): - """ Calculate only value head for given state """ - policy_params, q = self(observation) - - # Expectation of Q value with respect to action - return (torch.exp(policy_params) * q).sum(dim=1) - - def entropy(self, action_logits): - """ Entropy of a probability distribution """ - return self.action_head.entropy(action_logits) - - def kl_divergence(self, logits_q, logits_p): - """ Calculate KL-divergence between two probability distributions """ - return self.action_head.kl_divergence(logits_q, logits_p) - - -class QStochasticPolicyModelFactory(ModelFactory): - """ Factory class for policy gradient models """ - def __init__(self, input_block: IdentityFactory, backbone: ModelFactory): - self.backbone = backbone - self.input_block = input_block - - def instantiate(self, **extra_args): - """ Instantiate the model """ - input_block = self.input_block.instantiate() - backbone = self.backbone.instantiate(**extra_args) - - return QStochasticPolicyModel(input_block, backbone, extra_args['action_space']) - - -def create(backbone: ModelFactory, input_block: typing.Optional[ModelFactory] = None): - """ Vel factory function """ - if input_block is None: - input_block = IdentityFactory() - - return QStochasticPolicyModelFactory(input_block=input_block, backbone=backbone) diff --git a/vel/rl/xpolicy/stochastic_policy_separate.py b/vel/rl/xpolicy/stochastic_policy_separate.py deleted file mode 100644 index afced37c..00000000 --- a/vel/rl/xpolicy/stochastic_policy_separate.py +++ /dev/null @@ -1,94 +0,0 @@ -import gym -import typing - -from vel.api import LinearBackboneModel, ModelFactory, BackboneModel -from vel.module.input.identity import IdentityFactory -from vel.rl.module.stochastic_action_head import StochasticActionHead -from vel.rl.module.value_head import ValueHead - - -class StochasticPolicyModelSeparate(BackboneModel): - """ - Policy gradient model class with an actor and critic heads that don't share a backbone - """ - - def __init__(self, input_block: BackboneModel, - policy_backbone: LinearBackboneModel, value_backbone: LinearBackboneModel, - action_space: gym.Space): - super().__init__() - - self.input_block = input_block - self.policy_backbone = policy_backbone - self.value_backbone = value_backbone - - self.action_head = StochasticActionHead( - action_space=action_space, - input_dim=self.policy_backbone.output_dim - ) - - self.value_head = ValueHead(input_dim=self.value_backbone.output_dim) - - def reset_weights(self): - """ Initialize properly model weights """ - self.input_block.reset_weights() - - self.policy_backbone.reset_weights() - self.value_backbone.reset_weights() - - self.action_head.reset_weights() - self.value_head.reset_weights() - - def forward(self, observations): - """ Calculate model outputs """ - input_data = self.input_block(observations) - - policy_base_output = self.policy_backbone(input_data) - value_base_output = self.value_backbone(input_data) - - action_output = self.action_head(policy_base_output) - value_output = self.value_head(value_base_output) - - return action_output, value_output - - def value(self, observations, state=None): - """ Calculate only value head for given state """ - input_data = self.input_block(observations) - base_output = self.value_backbone(input_data) - value_output = self.value_head(base_output) - return value_output - - def policy(self, observations): - """ Calculate only action head for given state """ - input_data = self.input_block(observations) - policy_base_output = self.policy_backbone(input_data) - policy_params = self.action_head(policy_base_output) - return policy_params - - -class StochasticPolicyModelSeparateFactory(ModelFactory): - """ Factory class for policy gradient models """ - def __init__(self, input_block: ModelFactory, policy_backbone: ModelFactory, value_backbone: ModelFactory): - self.input_block = input_block - self.policy_backbone = policy_backbone - self.value_backbone = value_backbone - - def instantiate(self, **extra_args): - """ Instantiate the model """ - input_block = self.input_block.instantiate() - policy_backbone = self.policy_backbone.instantiate(**extra_args) - value_backbone = self.value_backbone.instantiate(**extra_args) - - return StochasticPolicyModelSeparate(input_block, policy_backbone, value_backbone, extra_args['action_space']) - - -def create(policy_backbone: ModelFactory, value_backbone: ModelFactory, - input_block: typing.Optional[ModelFactory] = None): - """ Vel factory function """ - if input_block is None: - input_block = IdentityFactory() - - return StochasticPolicyModelSeparateFactory( - input_block=input_block, - policy_backbone=policy_backbone, - value_backbone=value_backbone - ) From aeec2ac8aaaf63313ff3bab0bb2f26d1dedb7e9b Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 3 Oct 2019 15:59:37 -0700 Subject: [PATCH 118/162] Commit basic version of VQ-VAE. --- .../latent/mnist/mnist_cnn_vq_vae.yaml | 53 +++ .../autoencoders/mnist/mnist-vq-vae.ipynb | 239 ++++++++++++++ vel/model/latent/vq_vae.py | 306 ++++++++++++++++++ 3 files changed, 598 insertions(+) create mode 100644 examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml create mode 100644 examples-notebooks/autoencoders/mnist/mnist-vq-vae.ipynb create mode 100644 vel/model/latent/vq_vae.py diff --git a/examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml b/examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml new file mode 100644 index 00000000..bab34608 --- /dev/null +++ b/examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml @@ -0,0 +1,53 @@ +name: 'mnist_cnn_vq_vae' + + +model: + name: vel.model.latent.vq_vae + img_rows: 28 + img_cols: 28 + img_channels: 1 + channels: [64, 128, 256] +# channels: [32, 64, 128] + + k: 128 + d: 64 + + +source: + name: vel.data.source.vision.mnist + + +loader: + name: vel.data.dataset_loader + batch_size: 128 +# num_workers: 4 +# pin_memory: true + + transformations: + - name: vel.data.transformation.to_array + - name: vel.data.transformation.image_to_tensor + - name: vel.data.transformation.unsupervised + + +optimizer: + name: vel.optimizer.radam + lr: 1.0e-3 + eps: 1.0e-4 + + +scheduler: + name: vel.scheduler.multi_step + gamma: 0.71968 # 10 * (-1/7) + milestones: [ 1, 4, 13, 40, 121, 364, 1093, 3280] + + +commands: + augvis: + name: vel.command.augvis_command + samples: 10 + cases: 5 + + train: + name: vel.command.train_command +# epochs: 3280 + epochs: 50 diff --git a/examples-notebooks/autoencoders/mnist/mnist-vq-vae.ipynb b/examples-notebooks/autoencoders/mnist/mnist-vq-vae.ipynb new file mode 100644 index 00000000..32f55f3c --- /dev/null +++ b/examples-notebooks/autoencoders/mnist/mnist-vq-vae.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import torch\n", + "import tqdm\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "import vel\n", + "import vel.notebook as nb" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "nb.reasonable_notbook_defaults()\n", + "torch.set_grad_enabled(False) # We don't need autograd here\n", + "None" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "config = nb.load_config('examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml', run_number=13, device='cuda:0') " + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "model = config.load_trained_model().to(config.device)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "VQVAE(\n", + " (codebook): VQEmbedding(k=128, d=64)\n", + " (encoder): Sequential(\n", + " (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (1): SELU(inplace=True)\n", + " (2): LayerNorm((64, 28, 28), eps=1e-05, elementwise_affine=True)\n", + " (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", + " (4): SELU(inplace=True)\n", + " (5): LayerNorm((128, 14, 14), eps=1e-05, elementwise_affine=True)\n", + " (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", + " (7): SELU(inplace=True)\n", + " (8): LayerNorm((256, 7, 7), eps=1e-05, elementwise_affine=True)\n", + " (9): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (10): SELU(inplace=True)\n", + " (11): LayerNorm((64, 7, 7), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " (decoder): Sequential(\n", + " (0): ConvTranspose2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (1): SELU(inplace=True)\n", + " (2): LayerNorm((256, 7, 7), eps=1e-05, elementwise_affine=True)\n", + " (3): ConvTranspose2d(256, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))\n", + " (4): SELU(inplace=True)\n", + " (5): LayerNorm((128, 14, 14), eps=1e-05, elementwise_affine=True)\n", + " (6): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))\n", + " (7): SELU(inplace=True)\n", + " (8): LayerNorm((64, 28, 28), eps=1e-05, elementwise_affine=True)\n", + " (9): ConvTranspose2d(64, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (10): Sigmoid()\n", + " )\n", + ")\n", + "----------------------------------------------------------------------------------------------------\n", + "Number of model parameters: 1,400,001\n", + "----------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "data_loader = config.provide('loader')\n", + "data_source = data_loader.transformed_source\n", + "train_dataset = data_source.train\n", + "validation_dataset = data_source.validation" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "def get_sample(idx):\n", + " return train_dataset[idx]['x'].to(config.device)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "def show_image(axis, sample):\n", + " axis.imshow(train_dataset.denormalize_item(sample, 'x'), cmap='gray')" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAACxCAYAAABAxMXKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAdJUlEQVR4nO3de5AV1dnv8ecBg2I4gIhByhvGIClN4RgQCYcSEpB4jIkoiUoJiLHACl5ISimMIR5yCIYo+BYYNSoBvHBEK0BA8/qiR24xAsUlJC9yETWBF5iAoNyNHGCdP9g5QZ+1mZ7dvfdeq+f7qaKY+dG9e/WeHzN7TU+vUeecAAAAAACqq1G1BwAAAAAAYHIGAAAAAEFgcgYAAAAAAWByBgAAAAABYHIGAAAAAAFgcgYAAAAAAUg1OVPVq1R1g6q+q6r3ZTUooFzoLGJEbxEbOovY0FmEQkv9PWeq2lhE3hGRK0Vki4gsF5H+zrm1J9iHX6qGVJxzWuq+dBbVkKazIvXvLZ1FBnY6584odWc6iyqoaGcL+9BbpFLs9UGaK2ddRORd59z7zrlDIjJDRK5N8XhAudFZxIjeotI2pdyfzqLS6CxyI83k7CwR+a/j3t9SyIBQ0VnEiN4iNnQWsaGzCMZJ5T6Aqg4VkaHlPg6QFTqL2NBZxIbOIkb0FpWQZnK2VUTOOe79swvZpzjnnhKRp0T4+VxUHZ1FjOrsLZ1FYOgsYsPrAwQjzY81LheR9qp6vqo2EZGbRGRuNsMCyoLOIkb0FrGhs4gNnUUwSr5y5pw7rKp3isg8EWksIlOcc29nNjIgY3QWMaK3iA2dRWzoLEJS8lL6JR2MS8BIKe2y5PVFZ5EWnUWEVjrnOlfqYHQWGahoZ0XoLdIrx1L6AAAAAICMMDkDAAAAgAAwOQMAAACAADA5AwAAAIAAMDkDAAAAgAAwOQMAAACAADA5AwAAAIAAMDkDAAAAgAAwOQMAAACAADA5AwAAAIAAMDkDAAAAgAAwOQMAAACAADA5AwAAAIAAnFTtAQDIn06dOpnszjvvNNmgQYNM9uyzz5rs0Ucf9R5n1apVJYwOAAAgTFw5AwAAAIAAMDkDAAAAgAAwOQMAAACAAKS650xV/yYi+0TkiIgcds51zmJQQDnRW8SGziI2dBaxobMIhTrnSt/5WJE7O+d2Jty+9INFrHHjxiZr0aJFqsf0La5w6qmnmqxDhw4mu+OOO7yPOX78eJP179/fZP/4xz9MNm7cOJP97Gc/8x4nDeecpn2M+vS2oXY2qZqaGm8+f/58kzVv3rzk4+zZs8ebn3766SU/ZqXQWRyvV69eJps+fbrJevToYbINGzaUZUweK9O+MKWzYRs1apTJin3NbtTI/pBVz549TbZo0aLU40qhop0tbE9vkUqx1wf8WCMAAAAABCDt5MyJyGuqulJVh2YxIKAC6C1iQ2cRGzqL2NBZBCHt7znr7pzbqqpfEJHXVXW9c27x8RsUCk7JEZIT9pbOIkB0FrGhs4gNr2kRhFRXzpxzWwt/7xCR2SLSxbPNU865ztxYiVDU1Vs6i9DQWcSGziI2vKZFKEq+cqaqnxeRRs65fYW3+4jI/8psZFVy7rnnevMmTZqYrFu3bibr3r27yVq2bGmyfv36lTC6+tuyZYvJJk2a5N32uuuuM9m+fftM9uc//9lkVb4ROLG89rYSunQxX6dk5syZ3m19C974Fh/y9evQoUMmK7bwR9euXU22atWqRI8ZixA6e8UVV5jM9zGZPXt2JYYTtcsuu8xky5cvr8JIyieEzuJfBg8ebLKRI0ea7OjRo4kfM81iciGiswhJmh9rbCMis1X1n4/zv51z/5HJqIDyobeIDZ1FbOgsYkNnEYySJ2fOufdF5JIMxwKUHb1FbOgsYkNnERs6i5CwlD4AAAAABIDJGQAAAAAEIO1S+lGrqakx2fz5873b+hY4CI3vZt5Ro0aZbP/+/d79p0+fbrLa2lqTffTRRybbsGFDkiEiQKeeeqrJvvrVr5rs+eefN1nbtm1THXvjxo0me+ihh0w2Y8YM7/5//OMfTebr/C9+8YsSRod/6tmzp8nat29vMhYE+ZdGjfzf+zz//PNNdt5555mscO8LkJqvX6ecckoVRoI8uPzyy002YMAAk/Xo0cO7/8UXX5zoOPfee6/Jtm3bZjLfQnwi/tcsy5YtS3TsauPKGQAAAAAEgMkZAAAAAASAyRkAAAAABIDJGQAAAAAEoEEvCLJ582aT7dq1y7ttJRYEKXaj4u7du0329a9/3WSHDh0y2XPPPZd+YMi1J5980mT9+/evyLF9C480a9bMZIsWLfLu71uoomPHjqnHhU8bNGiQyZYsWVKFkcSj2GI5Q4YMMZnvxvX169dnPibkX+/evU121113Jdq3WOeuueYak23fvr1+A0OUbrzxRpNNnDjRZK1btzZZsUWNFi5caLIzzjjDZA8//HCCERY/ju8xb7rppkSPWW1cOQMAAACAADA5AwAAAIAAMDkDAAAAgAAwOQMAAACAADA5AwAAAIAANOjVGj/88EOTjRgxwrutb7WiP/3pTyabNGlSomOvXr3aZFdeeaV32wMHDpjs4osvNtnw4cMTHRsNV6dOnUz2rW99y2TFVj/6rGKrKL788ssmGz9+vMm2bdtmMt//q48++sh7nG984xsmSzp2JNeoEd/Hq6/Jkycn3nbjxo1lHAnyqnv37iabOnWqyZKuNl1sdbxNmzbVb2AI3kkn2Zf/nTt3NtnTTz9tslNPPdVkixcvNtmYMWO8x37zzTdNdvLJJ5vspZdeMlmfPn28j+mzYsWKxNuGhq+4AAAAABAAJmcAAAAAEAAmZwAAAAAQgDonZ6o6RVV3qOqa47JWqvq6qm4s/H1aeYcJ1A+9RWzoLGJDZxEbOosYqHPuxBuoXiEi+0XkWefcVwrZQyLyoXNunKreJyKnOedG1nkw1RMfLGDNmzc32b59+0z25JNPmuy2224z2YABA0z2wgsvlDi6hsM5l2i1h6x6G3Nna2pqTDZ//nyT+brt8+qrr5qsf//+3m179Ohhso4dO5rMt2jCBx98kGg8IiJHjhwx2cGDBxONZ9WqVYmPk0ZsnfV9nJYsWWKyWbNmmWzgwIFpDp0rb731ljfv2rWrybp162aypUuXZj6meljpnLOrA3xGKJ1tqHyLNXz/+99PtO/ChQtN1qtXr7RDqqaKdrawX7S9HTx4sMmSLmL0+uuvm+zGG2802d69exOPx/eaeNq0aYn23bp1qzf3LXBSn9cXlVDs9UGdV86cc4tF5LPLGl4rIs8U3n5GRPqmGh2QMXqL2NBZxIbOIjZ0FjEo9Z6zNs652sLbfxeRNhmNBygneovY0FnEhs4iNnQWQUn9e86cc+5El3ZVdaiIDE17HCBLJ+otnUWI6CxiQ2cRG17TIgSlXjnbrqptRUQKf+8otqFz7innXOckPwsMlFmi3tJZBITOIjZ0FrHhNS2CUuqVs7kicouIjCv8PSezEQUq6Y2Ne/bsSbTdkCFDTPbiiy96tz169Giix0SdctnbCy+80JuPGDHCZC1atDDZzp07TVZbW2uyZ555xmT79+/3Hvv3v/99oqwcmjZtarJ77rnHZDfffHMlhpNWxTt79dVXm8z3nOJf2rSxPwV1/vnnJ96/2A3tkcrl59lqat26tTf3Lf7he72we/duk/385z9PP7D8yG1nx4wZ483vv/9+k/kWCHz88cdNNmrUKJPVZ/EPn5/85Ccl73v33Xd789AW/6iPJEvpvyAiS0Skg6puUdXb5FiBr1TVjSLSu/A+EAx6i9jQWcSGziI2dBYxqPPKmXPOv1a2SNRrriLf6C1iQ2cRGzqL2NBZxKDUe84AAAAAABlicgYAAAAAAUi9lD4+bfTo0Sbr1KmTyXr06GGy3r17ex/ztddeSz0u5MPJJ59ssvHjx3u39S3ssG/fPpMNGjTIZCtWrDBZzItCnHvuudUeQjQ6dOiQaLu33367zCOJh+//oG+REBGRd955x2S+/5domNq1a2eymTNnpnrMRx991GQLFixI9ZgIzwMPPGAy38IfIiKHDh0y2bx580w2cuRIk3388ceJxnPKKad48z59+pjM9zVaVU3mW8hmzpzcrN/y/3HlDAAAAAACwOQMAAAAAALA5AwAAAAAAsDkDAAAAAACwIIgGTtw4IDJhgwZYrJVq1aZ7Omnn/Y+pu/GXd+CDY899pjJfL/xHfG69NJLTeZb+KOYa6+91mSLFi1KNSY0TMuXL6/2EDLVvHlzk1111VUmGzBggMl8N7gXM2bMGJPt3r078f7IN1/nOnbsmHj/N954w2QTJ05MNSaEp2XLliYbNmyYyYq9BvQt/tG3b9+Sx/OlL33JZNOnT/du61skz+e3v/2tyR566KH6DSxSXDkDAAAAgAAwOQMAAACAADA5AwAAAIAAMDkDAAAAgACwIEgFvPfeeyYbPHiwyaZOnerdf+DAgYmyz3/+8yZ79tlnTVZbW+s9DsL3yCOPmExVvdv6FvrI2+IfjRrZ7y8dPXq0CiNpeFq1apX5Y15yySUm8/W7d+/eJjv77LNN1qRJE5PdfPPN3mP7uvTxxx+bbNmyZSb75JNPTHbSSf4vrytXrvTmaHh8CzCMGzcu8f5vvvmmyW655RaT7dmzp34DQ/B8n9tat26deP+7777bZF/4whdMduutt5rsO9/5jsm+8pWvmKxZs2beY/sWKfFlzz//vMl8i+7lEVfOAAAAACAATM4AAAAAIABMzgAAAAAgAEzOAAAAACAAdU7OVHWKqu5Q1TXHZaNVdauqri78ubq8wwSSo7OIEb1FbOgsYkNnEYMkqzVOE5Ffichnl/37N+fc+MxH1EDMnj3bZBs3bvRu61uhr1evXiZ78MEHTXbeeeeZbOzYsd7jbN261ZtHaJrkoLPXXHONyWpqakzmW+VIRGTu3LmZjyk0vpUZfc/H6tWrKzGctKZJAL31rVDoe05//etfm+z+++9PdeyOHTuazLda4+HDh0128OBBk61du9ZkU6ZM8R57xYoVJvOtbrp9+3aTbdmyxWRNmzb1Hmf9+vXePFLTJIDOxqBdu3YmmzlzZqrHfP/9903m6yc+ZZrkoLOHDh0y2QcffGCyM844w7v/X//6V5MVey2RxLZt20y2d+9e77Zt27Y12c6dO0328ssvlzye2NV55cw5t1hEPqzAWIBM0FnEiN4iNnQWsaGziEGae87uVNW/FC4Rn1ZsI1UdqqorVNV+WxKoLDqLGNXZWzqLwNBZxIbXBwhGqZOzJ0TkAhGpEZFaEZlQbEPn3FPOuc7Ouc4lHgvIAp1FjBL1ls4iIHQWseH1AYJS0uTMObfdOXfEOXdURJ4WkS7ZDgvIFp1FjOgtYkNnERs6i9AkWRDEUNW2zrnawrvXiciaE22PZNas8T+NN9xwg8m+/e1vm2zq1Kkmu/32203Wvn1773GuvPLKuoYYrRg761tQoEmTJibbsWOHd/8XX3wx8zFVwsknn2yy0aNHJ95//vz5Jvvxj3+cZkhVU43eDhs2zGSbNm0yWbdu3TI/9ubNm032u9/9zmTr1q0z2dKlSzMfj8/QoUNN5rvp3rdYQ0MQ4+faShg5cqTJfAsa1ce4ceNS7Y9jYuzs7t27Tda3b1+TvfLKK979W7VqZbL33nvPZHPmzDHZtGnTTPbhh/Y2vhkzZniP7VsQpNi2DVWdkzNVfUFEeopIa1XdIiL/U0R6qmqNiDgR+ZuI2BkAUCV0FjGit4gNnUVs6CxiUOfkzDnX3xP/pgxjATJBZxEjeovY0FnEhs4iBmlWawQAAAAAZITJGQAAAAAEoKQFQVBZvhs/n3vuOZNNnjzZZCedZD/EV1xxhfc4PXv2NNnChQvrHiCq6pNPPvHmtbW13jwkvsU/Ro0aZbIRI0Z499+yZYvJJkywqyDv37+/hNHhn375y19WewjB6NWrV6LtZs6cWeaRIFQ1NTUm69OnT8mP51uUQURkw4YNJT8m8mfZsmUm8y1WVA6+15U9evTwbutbCKehLqBUDFfOAAAAACAATM4AAAAAIABMzgAAAAAgAEzOAAAAACAALAgSkI4dO3rz7373uya77LLLTOZb/MNn7dq13nzx4sWJ9kdY5s6dW+0hJOK7Sd630MeNN95osmI3xPfr1y/9wIAymD17drWHgCp57bXXTHbaaacl2nfp0qUmGzx4cNohAWXVtGlTk/kW/hARcc6ZbMaMGZmPKWZcOQMAAACAADA5AwAAAIAAMDkDAAAAgAAwOQMAAACAALAgSAV06NDBZHfeeafJrr/+eu/+Z555ZsnHPnLkiMlqa2u92xa7eRPVoaqJsr59+3r3Hz58eOZjSupHP/qRyX7605+arEWLFiabPn26yQYNGpTNwACgzE4//XSTJf36+vjjj5ts//79qccElNO8efOqPYRc4coZAAAAAASAyRkAAAAABIDJGQAAAAAEoM7Jmaqeo6oLVHWtqr6tqsMLeStVfV1VNxb+TvYbFoEyo7OIDZ1FjOgtYkNnEYMkV84Oi8g9zrmLRKSriNyhqheJyH0i8oZzrr2IvFF4HwgBnUVs6CxiRG8RGzqL4NW5WqNzrlZEagtv71PVdSJylohcKyI9C5s9IyILRWRkWUYZKN8qiv379zeZb2XGdu3aZT6eFStWmGzs2LEmmzt3bubHDkleOuucS5QVW81z0qRJJpsyZYrJdu3aZbKuXbuabODAgSa75JJLvMc+++yzTbZ582aT+VZ48q1Wlnd56WxD5VtF9cILL/Ruu3Tp0nIPp2LorcjUqVNN1qhR6XeMvPXWW2mGgzrQ2fL45je/We0h5Eq9PoOoajsRuVRElolIm0LJRUT+LiJtMh0ZkAE6i9jQWcSI3iI2dBahSvx7zlS1mYjMFJEfOuf2Hv+dQuecU1X7Lf1j+w0VkaFpBwrUF51FbOgsYlRKb+ksqonPtQhZoitnqvo5OVbi6c65WYV4u6q2Lfx7WxHZ4dvXOfeUc66zc65zFgMGkqCziA2dRYxK7S2dRbXwuRahS7Jao4rIb0RknXPukeP+aa6I3FJ4+xYRmZP98ID6o7OIDZ1FjOgtYkNnEYMkP9b430VkoIj8p6quLmT3i8g4EXlJVW8TkU0ickN5hlhZbdr4f8z4oosuMtmvfvUrk335y1/OfEzLli0z2cMPP2yyOXPs55KjR49mPp4INKjONm7c2JsPGzbMZP369TPZ3r17Tda+fftUY/Ld1L5gwQKTPfDAA6mOkyMNqrN541uoJ82iEBFpML2tqanx5r179zaZ7+vuoUOHTPbYY4+ZbPv27SWMDvXQYDpbSV/84herPYRcSbJa45siYpeiOqZXtsMB0qOziA2dRYzoLWJDZxGDBvGtPQAAAAAIHZMzAAAAAAgAkzMAAAAACEDi33MWu1atWpnsySefNFmxm36zvtnRt2DChAkTvNvOmzfPZB9//HGm40F4lixZYrLly5eb7LLLLkv8mGeeeabJii2C81m7du0y2YwZM7zbDh8+PPGYgDz62te+5s2nTZtW2YEgEy1btvTmvs+pPlu3bjXZvffem2pMQCj+8Ic/mKzYokgNdKG6euHKGQAAAAAEgMkZAAAAAASAyRkAAAAABIDJGQAAAAAEIPoFQS6//HKTjRgxwmRdunQx2VlnnZX5eA4ePGiySZMmmezBBx802YEDBzIfD+K1ZcsWk11//fUmu/322737jxo1quRjT5w40WRPPPGEyd59992SjwHkhWqx32kLAPm3Zs0ak23cuNG7rW+BvQsuuMBkH3zwQfqBRYorZwAAAAAQACZnAAAAABAAJmcAAAAAEAAmZwAAAAAQgOgXBLnuuusSZUmtXbvWm7/yyismO3z4sMkmTJhgst27d5c8HuB4tbW1Jhs9erR322I5gNK9+uqrJvve975XhZGgktavX+/N33rrLZN179693MMBgudb+E5EZPLkySYbO3asye666y6TFXuNnjdcOQMAAACAADA5AwAAAIAAMDkDAAAAgADUOTlT1XNUdYGqrlXVt1V1eCEfrapbVXV14c/V5R8uUDc6i9jQWcSGziJG9BYxUOfciTdQbSsibZ1zq1T1v4nIShHpKyI3iMh+59z4xAdTPfHBgDo457SubegsQkJnEaGVzrnOJ9qAziIwdXZWhN5WUvPmzb35Sy+9ZLLevXubbNasWSa79dZbTXbgwIESRheGYq8P6lyt0TlXKyK1hbf3qeo6ETkr2+EB2aGziA2dRWzoLGJEbxGDet1zpqrtRORSEVlWiO5U1b+o6hRVPS3jsQGp0VnEhs4iNnQWMaK3CFXiyZmqNhORmSLyQ+fcXhF5QkQuEJEaOfZdCPsLvo7tN1RVV6jqigzGCyRGZxEbOovY0FnEiN4iZIkmZ6r6OTlW4unOuVkiIs657c65I865oyLytIh08e3rnHvKOdc5yc8CA1mhs4gNnUVs6CxiRG8RuiSrNaqI/EZE1jnnHjkub3vcZteJyJrshwfUH51FbOgsYkNnESN6ixgkWa2xu4j8QUT+U0SOFuL7RaS/HLv860TkbyJye+FGyxM9FivbIJWEK9/RWQSDziJCSVZrpLMISdLVGultlflWcRw7dqzJfvCDH5isY8eOJlu7dm02A6uCNKs1vikivp3/Pe2ggHKgs4gNnUVs6CxiRG8Rg3qt1ggAAAAAKA8mZwAAAAAQACZnAAAAABCAOhcEyfRg3DyJlJIsrpAlOou06CwilGhxhazQWWSgop0VobdIr9jrA66cAQAAAEAAmJwBAAAAQACYnAEAAABAAJicAQAAAEAA6vwl1BnbKSKbCm+3LryfB3k6F5Fwz+e8KhyTzsYh1POhs9nJ07mIhH0+le5tXjsrkq/zCflcqvm5NuTnpRR5Op+Qz6VoZyu6WuOnDqy6otIr65RLns5FJH/nk5U8PS95OheR/J1PVvL0vOTpXETydz5ZydvzkqfzydO5ZClvz0uezifWc+HHGgEAAAAgAEzOAAAAACAA1ZycPVXFY2ctT+cikr/zyUqenpc8nYtI/s4nK3l6XvJ0LiL5O5+s5O15ydP55OlcspS35yVP5xPluVTtnjMAAAAAwL/wY40AAAAAEICKT85U9SpV3aCq76rqfZU+flqqOkVVd6jqmuOyVqr6uqpuLPx9WjXHmJSqnqOqC1R1raq+rarDC3mU51MudDYcdDYZOhsOOptczL3NU2dF6G1SMXdWJF+9zVNnKzo5U9XGIvKYiPwPEblIRPqr6kWVHEMGponIVZ/J7hORN5xz7UXkjcL7MTgsIvc45y4Ska4ickfh4xHr+WSOzgaHztaBzgaHziaQg95Ok/x0VoTe1ikHnRXJV29z09lKXznrIiLvOufed84dEpEZInJthceQinNusYh8+Jn4WhF5pvD2MyLSt6KDKpFzrtY5t6rw9j4RWSciZ0mk51MmdDYgdDYROhsQOptY1L3NU2dF6G1CUXdWJF+9zVNnKz05O0tE/uu497cUsti1cc7VFt7+u4i0qeZgSqGq7UTkUhFZJjk4nwzR2UDR2aLobKDo7Anlsbe5+BjT26Ly2FmRHHyMY+8sC4JkzB1b/jKqJTBVtZmIzBSRHzrn9h7/bzGeD+onxo8xnW3YYvwY09mGLdaPMb1t2GL8GOehs5WenG0VkXOOe//sQha77araVkSk8PeOKo8nMVX9nBwr8XTn3KxCHO35lAGdDQydrROdDQydTSSPvY36Y0xv65THzopE/DHOS2crPTlbLiLtVfV8VW0iIjeJyNwKj6Ec5orILYW3bxGROVUcS2KqqiLyGxFZ55x75Lh/ivJ8yoTOBoTOJkJnA0JnE8tjb6P9GNPbRPLYWZFIP8a56qxzrqJ/RORqEXlHRN4TkZ9U+vgZjP8FEakVkf8rx36++DYROV2OrQCzUUT+j4i0qvY4E55Ldzl2efcvIrK68OfqWM+njM8TnQ3kD51N/DzR2UD+0Nl6PVfR9jZPnS2cD71N9jxF29nC+HPT2zx1VgsnBAAAAACoIhYEAQAAAIAAMDkDAAAAgAAwOQMAAACAADA5AwAAAIAAMDkDAAAAgAAwOQMAAACAADA5AwAAAIAAMDkDAAAAgAD8P8a5unNTxUnoAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Browse examples\n", + "fig, axes = plt.subplots(1, 5)\n", + "\n", + "for index in range(5):\n", + " show_image(axes[index], get_sample(index).cpu())" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[[ 23, 23, 23, 23, 23, 23, 23],\n", + " [ 23, 64, 88, 106, 4, 121, 29],\n", + " [ 23, 70, 114, 51, 31, 12, 12],\n", + " [ 23, 23, 46, 117, 22, 23, 23],\n", + " [ 23, 23, 23, 70, 114, 104, 23],\n", + " [ 23, 22, 4, 65, 116, 29, 23],\n", + " [ 99, 116, 51, 110, 23, 23, 23]]], device='cuda:0')\n", + "torch.Size([1, 7, 7])\n" + ] + } + ], + "source": [ + "x = model.encode(get_sample(0)[None])\n", + "print(x)\n", + "print(x.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Browse examples\n", + "fig, axes = plt.subplots(2, 5)\n", + "\n", + "for index in range(5):\n", + " sample = get_sample(index)\n", + " decoded = model(sample[None])[0].detach()\n", + " show_image(axes[0, index], sample.cpu())\n", + " show_image(axes[1, index], decoded.cpu())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "vel", + "language": "python", + "name": "vel" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/vel/model/latent/vq_vae.py b/vel/model/latent/vq_vae.py new file mode 100644 index 00000000..5f3d4677 --- /dev/null +++ b/vel/model/latent/vq_vae.py @@ -0,0 +1,306 @@ +""" +VQ-VAE implementation with Vector Quantization functions taken from +https://github.com/ritheshkumar95/pytorch-vqvae/blob/master/functions.py +""" +import itertools as it + +import torch +import torch.autograd as autograd +import torch.nn as nn +import torch.nn.functional as F +import torch.nn.init as init + +import vel.util.network as net_util + +from vel.api import GradientModel +from vel.metric import AveragingNamedMetric +from vel.metric.loss_metric import Loss + + +class VectorQuantization(autograd.Function): + @staticmethod + def forward(ctx, inputs, codebook): + with torch.no_grad(): + embedding_size = codebook.size(1) + inputs_size = inputs.size() + inputs_flatten = inputs.view(-1, embedding_size) + + codebook_sqr = torch.sum(codebook ** 2, dim=1) + inputs_sqr = torch.sum(inputs_flatten ** 2, dim=1, keepdim=True) + + # Compute the distances to the codebook + distances = torch.addmm(codebook_sqr + inputs_sqr, inputs_flatten, codebook.t(), alpha=-2.0, beta=1.0) + + _, indices_flatten = torch.min(distances, dim=1) + indices = indices_flatten.view(*inputs_size[:-1]) + ctx.mark_non_differentiable(indices) + + return indices + + @staticmethod + def backward(ctx, grad_output): + raise RuntimeError( + 'Trying to call `.grad()` on graph containing ' + '`VectorQuantization`. The function `VectorQuantization` ' + 'is not differentiable. Use `VectorQuantizationStraightThrough` ' + 'if you want a straight-through estimator of the gradient.' + ) + + +class VectorQuantizationStraightThrough(autograd.Function): + @staticmethod + def forward(ctx, inputs, codebook): + indices = vector_quantization(inputs, codebook) + indices_flatten = indices.view(-1) + ctx.save_for_backward(indices_flatten, codebook) + ctx.mark_non_differentiable(indices_flatten) + + codes_flatten = torch.index_select(codebook, dim=0, index=indices_flatten) + codes = codes_flatten.view_as(inputs) + + return codes, indices_flatten + + @staticmethod + def backward(ctx, grad_output, grad_indices): + grad_inputs, grad_codebook = None, None + + if ctx.needs_input_grad[0]: + # Straight-through estimator + grad_inputs = grad_output.clone() + + if ctx.needs_input_grad[1]: + # Gradient wrt. the codebook + indices, codebook = ctx.saved_tensors + embedding_size = codebook.size(1) + + grad_output_flatten = (grad_output.contiguous().view(-1, embedding_size)) + grad_codebook = torch.zeros_like(codebook) + grad_codebook.index_add_(0, indices, grad_output_flatten) + + return grad_inputs, grad_codebook + + +vector_quantization = VectorQuantization.apply +vector_quantization_straight_through = VectorQuantizationStraightThrough.apply + + +class VQEmbedding(nn.Module): + """ Vector-Quantised code embedding for the latent variables """ + + def __init__(self, k: int, d: int): + super().__init__() + self.k = k + self.d = d + self.embedding = nn.Parameter(torch.empty((self.k, self.d))) + + def reset_weights(self): + """ Initialize weights of the embedding """ + self.embedding.data.uniform_(-1.0/self.k, 1.0/self.k) + + def extra_repr(self) -> str: + return f"k={self.k}, d={self.d}" + + def forward(self, z_e_x): + z_e_x_ = z_e_x.permute(0, 2, 3, 1).contiguous() + latents = vector_quantization(z_e_x_, self.embedding) + return latents + + def straight_through(self, z_e_x): + z_e_x_ = z_e_x.permute(0, 2, 3, 1).contiguous() + z_q_x_, indices = vector_quantization_straight_through(z_e_x_, self.embedding.detach()) + z_q_x = z_q_x_.permute(0, 3, 1, 2).contiguous() + + z_q_x_bar_flatten = torch.index_select(self.embedding, dim=0, index=indices) + z_q_x_bar_ = z_q_x_bar_flatten.view_as(z_e_x_) + z_q_x_bar = z_q_x_bar_.permute(0, 3, 1, 2).contiguous() + + return z_q_x, z_q_x_bar + + +class VQVAE(GradientModel): + """ + Implementation of Neural Discrete Representation Learning (https://arxiv.org/abs/1711.00937) + Vector-Quantised Variational-AutoEncoder (VQ-VAE) + """ + + def __init__(self, img_rows, img_cols, img_channels, channels=None, k: int = 512, d: int = 256, + beta: float = 1.0): + super().__init__() + + if channels is None: + channels = [16, 32, 32] + + layer_series = [ + (3, 1, 1), + (3, 1, 2), + (3, 1, 2), + ] + + self.codebook = VQEmbedding(k, d) + + self.final_width = net_util.convolutional_layer_series(img_rows, layer_series) + self.final_height = net_util.convolutional_layer_series(img_cols, layer_series) + self.channels = channels + + self.beta = beta + self.k = k + self.d = d + + self.encoder = nn.Sequential( + nn.Conv2d(in_channels=img_channels, out_channels=channels[0], kernel_size=(3, 3), padding=1), + nn.SELU(True), + nn.LayerNorm([ + channels[0], + net_util.convolutional_layer_series(img_rows, layer_series[:1]), + net_util.convolutional_layer_series(img_cols, layer_series[:1]), + ]), + nn.Conv2d(in_channels=channels[0], out_channels=channels[1], kernel_size=(3, 3), stride=2, padding=1), + nn.SELU(True), + nn.LayerNorm([ + channels[1], + net_util.convolutional_layer_series(img_rows, layer_series[:2]), + net_util.convolutional_layer_series(img_cols, layer_series[:2]), + ]), + nn.Conv2d(in_channels=channels[1], out_channels=channels[2], kernel_size=(3, 3), stride=2, padding=1), + nn.SELU(True), + nn.LayerNorm([ + channels[2], + net_util.convolutional_layer_series(img_rows, layer_series), + net_util.convolutional_layer_series(img_cols, layer_series), + ]), + nn.Conv2d(in_channels=channels[2], out_channels=self.d, kernel_size=(3, 3), stride=1, padding=1), + nn.SELU(True), + nn.LayerNorm([ + self.d, + net_util.convolutional_layer_series(img_rows, layer_series), + net_util.convolutional_layer_series(img_cols, layer_series), + ]), + ) + + self.decoder = nn.Sequential( + nn.ConvTranspose2d(in_channels=self.d, out_channels=channels[2], kernel_size=(3, 3), stride=1, padding=1), + # nn.Linear(d, self.final_width * self.final_height * channels[2]), + # nn.ReLU(True), + nn.SELU(True), + nn.LayerNorm([ + channels[2], + net_util.convolutional_layer_series(img_rows, layer_series), + net_util.convolutional_layer_series(img_cols, layer_series), + ]), + nn.ConvTranspose2d( + in_channels=channels[2], out_channels=channels[1], kernel_size=3, stride=2, padding=1, output_padding=1 + ), + # nn.ReLU(True), + nn.SELU(True), + nn.LayerNorm([ + channels[1], + net_util.convolutional_layer_series(img_rows, layer_series[:2]), + net_util.convolutional_layer_series(img_cols, layer_series[:2]), + ]), + nn.ConvTranspose2d( + in_channels=channels[1], out_channels=channels[0], kernel_size=3, stride=2, padding=1, output_padding=1 + ), + # nn.ReLU(True), + nn.SELU(True), + nn.LayerNorm([ + channels[0], + net_util.convolutional_layer_series(img_rows, layer_series[:1]), + net_util.convolutional_layer_series(img_cols, layer_series[:1]), + ]), + nn.ConvTranspose2d(in_channels=channels[0], out_channels=img_channels, kernel_size=3, padding=1), + nn.Sigmoid() + ) + + def reset_weights(self): + self.codebook.reset_weights() + + for m in it.chain(self.encoder, self.decoder): + if isinstance(m, nn.Conv2d): + self._weight_initializer(m) + elif isinstance(m, nn.ConvTranspose2d): + self._weight_initializer(m) + elif isinstance(m, nn.Linear): + self._weight_initializer(m) + + @staticmethod + def _weight_initializer(tensor): + init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('relu')) + init.constant_(tensor.bias, 0.0) + + def encode(self, x): + z_e_x = self.encoder(x) + latents = self.codebook(z_e_x) + return latents + + def decode(self, latents): + z_q_x = self.codebook.embedding(latents).permute(0, 3, 1, 2) # (B, D, H, W) + x_tilde = self.decoder(z_q_x) + return x_tilde + + def forward(self, x): + z_e_x = self.encoder(x) + z_q_x_st, z_q_x = self.codebook.straight_through(z_e_x) + x_tilde = self.decoder(z_q_x_st) + return x_tilde + + def calculate_gradient(self, data: dict) -> dict: + """ + Calculate gradient for given batch of supervised learning. + Returns a dictionary of metrics + """ + input_data = data['x'] + target_data = data['y'] + + # x_tilde, z_e_x, z_q_x = self(input_data) + z_e_x = self.encoder(input_data) + z_q_x_st, z_q_x = self.codebook.straight_through(z_e_x) + x_tilde = self.decoder(z_q_x_st) + + # Reconstruction loss + loss_recons = F.mse_loss(x_tilde, target_data) + + # Vector quantization objective + loss_vq = F.mse_loss(z_q_x, z_e_x.detach()) + + # Commitment objective + loss_commit = F.mse_loss(z_e_x, z_q_x.detach()) + + loss = loss_recons + loss_vq + self.beta * loss_commit + + if self.training: + loss.backward() + + return { + 'loss': loss.item(), + + 'grad_norm': grad_norm, + 'reconstruction': loss_recons.item(), + 'loss_vq': loss_vq.item(), + 'loss_commit': loss_commit.item() + } + + def metrics(self): + """ Set of metrics for this model """ + return [ + Loss(), + AveragingNamedMetric('reconstruction', scope="train"), + AveragingNamedMetric('loss_vq', scope="train"), + AveragingNamedMetric('loss_commit', scope="train"), + AveragingNamedMetric('grad_norm', scope="train") + ] + + +def create(img_rows, img_cols, img_channels, channels=None, k: int = 512, d: int = 256, + beta: float = 1.0): + """ Vel factory function """ + from vel.api import ModelFactory + + if channels is None: + channels = [16, 32, 32] + + def instantiate(**_): + return VQVAE( + img_rows, img_cols, img_channels, channels=channels, k=k, d=d, beta=beta + ) + + return ModelFactory.generic(instantiate) From 6934cb6ff4be48c2ba001237716c584d2524b0e0 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 3 Oct 2019 18:39:28 -0700 Subject: [PATCH 119/162] Implemented simple MNIST-GAN --- docs/Bibliography.md | 1 + examples-configs/gan/mnist/mnist_gan.yaml | 61 ++++++++ vel/api/info.py | 9 ++ vel/api/model.py | 1 + vel/api/optimizer.py | 83 ++++++++++- vel/model/gan/__init__.py | 0 vel/model/gan/simple_gan.py | 162 ++++++++++++++++++++++ vel/train/trainer.py | 3 +- 8 files changed, 317 insertions(+), 3 deletions(-) create mode 100644 examples-configs/gan/mnist/mnist_gan.yaml create mode 100644 vel/model/gan/__init__.py create mode 100644 vel/model/gan/simple_gan.py diff --git a/docs/Bibliography.md b/docs/Bibliography.md index 2490513a..31cfef82 100644 --- a/docs/Bibliography.md +++ b/docs/Bibliography.md @@ -153,6 +153,7 @@ in the following repositories (in alphabetical order): - https://github.com/Kaixhin/Rainbow - https://github.com/Khrylx/PyTorch-RL - https://github.com/LiyuanLucasLiu/RAdam +- https://github.com/eriklindernoren/PyTorch-GAN - https://github.com/fastai/fastai - https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer - https://github.com/openai/baselines diff --git a/examples-configs/gan/mnist/mnist_gan.yaml b/examples-configs/gan/mnist/mnist_gan.yaml new file mode 100644 index 00000000..4de4ecce --- /dev/null +++ b/examples-configs/gan/mnist/mnist_gan.yaml @@ -0,0 +1,61 @@ +name: 'mnist_gan' + + +model: + name: vel.model.gan.simple_gan + img_rows: 28 + img_cols: 28 + img_channels: 1 + latent_dim: 128 + + +source: + name: vel.data.source.vision.mnist + + +loader: + name: vel.data.dataset_loader + batch_size: 128 +# num_workers: 4 +# pin_memory: true + + transformations: + - name: vel.data.transformation.to_array + # - name: vel.data.augmentation.random_scale + # tags: train + # size: 28 + # max_zoom: 1.1 + # - name: vel.data.augmentation.random_rotate + # tags: train + # deg: 15.0 + # - name: vel.data.augmentation.random_crop + # tags: train + # width: 28 + # height: 28 + # padding: 3 + # padding_mode: 'constant' + - name: vel.data.transformation.image_to_tensor + - name: vel.data.transformation.unsupervised + + +optimizer: + name: vel.optimizer.radam + lr: 2.0e-4 + eps: 1.0e-4 + + +#scheduler: +# name: vel.scheduler.multi_step +# gamma: 0.71968 # 10 * (-1/7) +# milestones: [ 1, 4, 13, 40, 121, 364, 1093, 3280] + + +commands: + augvis: + name: vel.command.augvis_command + samples: 10 + cases: 5 + + train: + name: vel.command.train_command + epochs: 200 diff --git a/vel/api/info.py b/vel/api/info.py index 1f0a765b..e76cec24 100644 --- a/vel/api/info.py +++ b/vel/api/info.py @@ -99,6 +99,15 @@ def __init__(self, global_epoch_idx, metrics): self._reset_metrics() self.metrics_by_name = {m.name: m for m in self.metrics} + def __contains__(self, metric): + if ':' in metric: + # TODO(jerry) There's got to be a better way to do it + metric_name = metric.split(':')[-1] + else: + metric_name = metric + + return metric_name in self.metrics_by_name + @torch.no_grad() def calculate(self, batch_info): """ Calculate metric values """ diff --git a/vel/api/model.py b/vel/api/model.py index 693d6a46..4946b7bb 100644 --- a/vel/api/model.py +++ b/vel/api/model.py @@ -76,6 +76,7 @@ def validate(self, data: dict) -> dict: raise NotImplementedError + class GradientModel(ValidatedModel): """ Model that calculates a single gradient and optimizes it """ diff --git a/vel/api/optimizer.py b/vel/api/optimizer.py index 82f56825..bed2a75b 100644 --- a/vel/api/optimizer.py +++ b/vel/api/optimizer.py @@ -1,11 +1,15 @@ -import typing +import collections import itertools as it +import typing from torch.nn.utils import clip_grad_norm_ from torch.optim.optimizer import Optimizer -from vel.metric import DefaultAveragingNamedMetric + from vel.api.callback import Callback from vel.api.scheduler import SchedulerFactory +from vel.exception import VelException +from vel.metric import DefaultAveragingNamedMetric +from vel.util.datastructure import flatten_dict class VelOptimizer: @@ -101,6 +105,72 @@ def create_scheduler(self, scheduler_factory: SchedulerFactory, last_epoch: int return [scheduler_factory.instantiate(self.optimizer, last_epoch=last_epoch)] +class VelMultiOptimizer(VelOptimizer): + """ Optimizer that wraps several individual optimizers """ + + def __init__(self, optimizers: typing.Dict[str, VelOptimizer], canonical_name: typing.Optional[str] = None): + self.optimizers = optimizers + + # Canonical, chosen optimizer + self.canonical_name = list(optimizers.keys())[0] + + self.initial_lrs = { + name: optimizer.get_lr() + for name, optimizer in self.optimizers.items() + } + + def __getitem__(self, item): + return self.optimizers[item] + + def get_lr(self) -> float: + return self.optimizers[self.canonical_name].get_lr() + + def set_lr(self, lr: float): + canonical_lr = self.initial_lrs[self.canonical_name] + + for name, optimizer in self.optimizers.items(): + opt_lr = self.initial_lrs[name] / canonical_lr * lr + optimizer.set_lr(opt_lr) + + def state_dict(self) -> dict: + output = {} + + for name, optimizer in self.optimizers.items(): + output[name] = optimizer.state_dict() + + def load_state_dict(self, state_dict: dict) -> None: + for name, optimizer in self.optimizers.items(): + optimizer.load_state_dict(state_dict[name]) + + def zero_grad(self) -> None: + for optimizer in self.optimizers.values(): + optimizer.zero_grad() + + def step(self, closure=None) -> dict: + output = {} + + for name, optimizer in self.optimizers.items(): + metrics = optimizer.step() + flatten_dict(metrics, output, name) + + return output + + def create_scheduler(self, scheduler_factory: SchedulerFactory, last_epoch: int = -1) -> [Callback]: + """ Create a scheduler instance for this optimizer """ + return [ + scheduler_factory.instantiate(optimizer, last_epoch=last_epoch) + for optimizer in self.optimizers.values() + ] + + def add_param_group(self, param_group: dict) -> None: + raise VelException("Unsupported operation") + + def metrics(self) -> list: + """ Set of metrics for this model """ + # TODO(jerry): aggregate metrics + return [] + + class OptimizerFactory: """ Base class for optimizer factories """ @@ -109,3 +179,12 @@ def instantiate(self, parameters) -> VelOptimizer: def instantiate_parameter_groups(self, parameters) -> VelOptimizer: raise NotImplementedError + + def instantiate_multi(self, parameter_dict: dict) -> VelMultiOptimizer: + od = collections.OrderedDict() + + for name, value in parameter_dict.items(): + od[name] = self.instantiate(value) + + return VelMultiOptimizer(od) + diff --git a/vel/model/gan/__init__.py b/vel/model/gan/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/model/gan/simple_gan.py b/vel/model/gan/simple_gan.py new file mode 100644 index 00000000..78d516eb --- /dev/null +++ b/vel/model/gan/simple_gan.py @@ -0,0 +1,162 @@ +""" +Simple GAN code is based on https://github.com/eriklindernoren/PyTorch-GAN/blob/master/implementations/gan/gan.py +""" +import collections +import numpy as np +import torch +import torch.nn as nn + +from vel.api import OptimizedModel, ModelFactory, VelOptimizer, OptimizerFactory +from vel.api.optimizer import VelMultiOptimizer +from vel.metric import AveragingNamedMetric + + +class Generator(nn.Module): + def __init__(self, img_shape, latent_dim): + super(Generator, self).__init__() + + self.img_shape = img_shape + self.latent_dim = latent_dim + + def block(in_feat, out_feat, normalize=True): + layers = [nn.Linear(in_feat, out_feat)] + if normalize: + layers.append(nn.BatchNorm1d(out_feat, 0.8)) + layers.append(nn.LeakyReLU(0.2, inplace=True)) + return layers + + self.model = nn.Sequential( + *block(latent_dim, 128, normalize=False), + *block(128, 256), + *block(256, 512), + *block(512, 1024), + nn.Linear(1024, int(np.prod(img_shape))), + nn.Tanh() + ) + + def forward(self, z): + img = self.model(z) + img = img.view(img.size(0), *self.img_shape) + return img + + +class Discriminator(nn.Module): + def __init__(self, img_shape, latent_dim): + super(Discriminator, self).__init__() + + self.model = nn.Sequential( + nn.Linear(int(np.prod(img_shape)), 512), + nn.LeakyReLU(0.2, inplace=True), + nn.Linear(512, 256), + nn.LeakyReLU(0.2, inplace=True), + nn.Linear(256, 1), + nn.Sigmoid(), + ) + + def forward(self, img): + img_flat = img.view(img.size(0), -1) + validity = self.model(img_flat) + + return validity + + +class SimpleGAN(OptimizedModel): + """ + Implements simple Generative Adversarial Network in the spirit of the original paper + "Generative Adversarial Networks" https://arxiv.org/abs/1406.2661 + """ + + def __init__(self, img_rows, img_cols, img_channels, latent_dim): + super().__init__() + + self.image_shape = (img_channels, img_rows, img_cols) + self.latent_dim = latent_dim + + self.generator = Generator(img_shape=self.image_shape, latent_dim=self.latent_dim) + self.discriminator = Discriminator(img_shape=self.image_shape, latent_dim=self.latent_dim) + + self.adversarial_loss = nn.BCELoss() + + def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelMultiOptimizer: + """ Create optimizer for the purpose of optimizing this model """ + gen_parameters = filter(lambda p: p.requires_grad, self.generator.parameters()) + disc_parameters = filter(lambda p: p.requires_grad, self.discriminator.parameters()) + + return optimizer_factory.instantiate_multi(collections.OrderedDict([ + ('generator', gen_parameters), + ('discriminator', disc_parameters) + ])) + + def optimize(self, data: dict, optimizer: VelMultiOptimizer) -> dict: + """ + Perform one step of optimization of the model + :returns a dictionary of metrics + """ + optimizer_G = optimizer['generator'] + optimizer_D = optimizer['discriminator'] + + input_data = data['x'] + + # Adversarial ground truths + valid = torch.ones(input_data.size(0), 1).to(input_data.device) + fake = torch.zeros(input_data.size(0), 1).to(input_data.device) + + optimizer_G.zero_grad() + + # Sample noise as generator input + z = torch.randn(input_data.size(0), self.latent_dim).to(input_data.device) + + # Generate a batch of images + gen_imgs = self.generator(z) + + # Loss measures generator's ability to fool the discriminator + g_loss = self.adversarial_loss(self.discriminator(gen_imgs), valid) + + g_loss.backward() + optimizer_G.step() + + # --------------------- + # Train Discriminator + # --------------------- + + optimizer_D.zero_grad() + + # Measure discriminator's ability to classify real from generated samples + real_loss = self.adversarial_loss(self.discriminator(input_data), valid) + fake_loss = self.adversarial_loss(self.discriminator(gen_imgs.detach()), fake) + d_loss = (real_loss + fake_loss) / 2 + + d_loss.backward() + optimizer_D.step() + + return { + 'gen_loss': g_loss.item(), + 'disc_loss': d_loss.item() + } + + def validate(self, data: dict) -> dict: + """ + Perform one step of model inference without optimization + :returns a dictionary of metrics + """ + return { + 'gen_loss': 0.0, + 'disc_loss': 0.0 + } + + def metrics(self): + """ Set of metrics for this model """ + return [ + AveragingNamedMetric('gen_loss', scope="train"), + AveragingNamedMetric('disc_loss', scope="train"), + ] + + +def create(img_rows, img_cols, img_channels, latent_dim): + """ Vel factory function """ + def instantiate(**_): + return SimpleGAN( + img_rows, img_cols, img_channels, latent_dim=latent_dim + ) + + return ModelFactory.generic(instantiate) diff --git a/vel/train/trainer.py b/vel/train/trainer.py index 6222067c..7d151b18 100644 --- a/vel/train/trainer.py +++ b/vel/train/trainer.py @@ -73,7 +73,8 @@ def train_epoch(self, epoch_info, loader: DatasetLoader, interactive=True): batch_info.update(metrics) batch_info.on_batch_end('train') - iterator.set_postfix(loss=epoch_info.result_accumulator.intermediate_value('loss')) + if 'loss' in epoch_info.result_accumulator: + iterator.set_postfix(loss=epoch_info.result_accumulator.intermediate_value('loss')) def validation_epoch(self, epoch_info, loader: DatasetLoader, interactive=True): """ Run a single evaluation epoch """ From 9d35c4f17dc6d99182d4077612a4bd493b7faf88 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 3 Oct 2019 22:20:15 -0700 Subject: [PATCH 120/162] Requirements update. --- requirements.txt | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9629fe23..8a6ce769 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ # atari-py==0.2.6 # via gym atomicwrites==1.3.0 # via pytest -attrs==19.1.0 +attrs==19.2.0 box2d-py==2.3.8 # via gym certifi==2019.9.11 # via requests chardet==3.0.4 # via requests @@ -17,7 +17,7 @@ future==0.17.1 # via pyglet gym[atari,box2d,classic_control]==0.14.0 idna==2.8 # via requests importlib-metadata==0.23 # via pluggy, pytest -joblib==0.13.2 # via scikit-learn +joblib==0.14.0 # via scikit-learn jsonpatch==1.24 # via visdom jsonpointer==2.0 # via jsonpatch kiwisolver==1.1.0 # via matplotlib @@ -27,13 +27,13 @@ numpy==1.17.2 opencv-python==4.1.1.26 packaging==19.2 # via pytest pandas==0.25.1 -pillow==6.1.0 # via gym, torchvision, visdom +pillow==6.2.0 # via gym, torchvision, visdom pluggy==0.13.0 # via pytest py==1.8.0 # via pytest pyglet==1.3.2 # via gym pymongo==3.9.0 pyparsing==2.4.2 # via matplotlib, packaging -pytest==5.1.2 +pytest==5.2.0 python-dateutil==2.8.0 # via matplotlib, pandas pytz==2019.2 # via pandas pyyaml==5.1.2 @@ -48,11 +48,8 @@ torchtext==0.4.0 torchvision==0.4.0 tornado==6.0.3 # via visdom tqdm==4.36.1 -urllib3==1.25.4 # via requests +urllib3==1.25.6 # via requests visdom==0.1.8.9 wcwidth==0.1.7 # via pytest websocket-client==0.56.0 # via visdom zipp==0.6.0 # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -# setuptools==41.2.0 # via kiwisolver From 23e0b21de1b6783b48c87084e9913f4b5cccc702 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 5 Oct 2019 20:33:26 -0700 Subject: [PATCH 121/162] Updated roadmap slightly. --- README.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 60563e3f..57237231 100644 --- a/README.md +++ b/README.md @@ -206,14 +206,17 @@ Very likely to be included: Possible to be included: -- Popart reward normalization -- Parameter Space Noise for Exploration -- Hindsight experience replay - Generative adversarial networks +For version 0.5 I'll again be looking to expand widely on the spectrum of available models in the framework, +as well as I'll try to support **multi-gpu** training by data parallelism. + +Work in progress roadmap: -Code quality: -- Factor action noise back into the policy +- Popart reward normalization +- PixelCNN +- MADE: Masked Autoencoder for Distribution Estimation +- Variational AutoEncoder with Inverse Autoregressive Flow # Directories From f9f594274325c867ac831bb6110c02e30d73ac11 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 6 Oct 2019 21:35:44 -0700 Subject: [PATCH 122/162] Refactored RNN language modelling examples. --- .velproject.yaml | 2 + ...kespeare_gru.yaml => gen_shakespeare.yaml} | 38 ++++-- .../gen_shakespeare_gru_embedding.yaml | 49 ------- .../nlp/generation/gen_shakespeare_lstm.yaml | 49 ------- .../gen_shakespeare_lstm_embedding.yaml | 49 ------- requirements.in | 10 +- requirements.txt | 4 +- vel/api/model.py | 26 ++-- vel/api/size_hint.py | 8 ++ vel/command/rnn/generate_text.py | 5 +- vel/command/summary_command.py | 19 +-- vel/data/text_character_loader.py | 10 +- vel/model/nlp/__init__.py | 0 vel/model/nlp/language_model.py | 79 +++++++++++ .../rnn/multilayer_rnn_sequence_model.py | 2 +- vel/module/input/embedding.py | 43 ------ vel/module/input/flatten.py | 4 +- vel/module/input/one_hot_encoding.py | 16 +-- vel/module/input/sequence.py | 21 --- vel/net/layer/arch/parallel.py | 2 +- vel/net/layer/dropout.py | 54 ++++++++ vel/net/layer/input/image_to_tensor.py | 2 +- vel/net/layer/input/normalize.py | 2 +- vel/net/layer/mlp.py | 19 ++- vel/net/layer/nlp/__init__.py | 0 vel/net/layer/nlp/alphabet_embedding.py | 52 ++++++++ vel/net/layer/nlp/alphabet_one_hot_encode.py | 47 +++++++ vel/net/layer/rnn.py | 126 ++++++++++++++++++ vel/net/layer/util/concat.py | 2 +- vel/net/layer/util/repeat.py | 2 +- vel/net/layer_base.py | 10 +- vel/net/modular.py | 37 ++++- vel/rl/layer/double_nature_cnn.py | 2 +- vel/rl/layer/double_noisy_nature_cnn.py | 2 +- vel/rl/layer/nature_cnn.py | 2 +- vel/rl/layer/nature_cnn_small.py | 2 +- vel/rl/layer/rnn_cell.py | 2 +- vel/util/summary.py | 87 ------------ 38 files changed, 487 insertions(+), 399 deletions(-) rename examples-configs/nlp/generation/{gen_shakespeare_gru.yaml => gen_shakespeare.yaml} (52%) delete mode 100644 examples-configs/nlp/generation/gen_shakespeare_gru_embedding.yaml delete mode 100644 examples-configs/nlp/generation/gen_shakespeare_lstm.yaml delete mode 100644 examples-configs/nlp/generation/gen_shakespeare_lstm_embedding.yaml create mode 100644 vel/model/nlp/__init__.py create mode 100644 vel/model/nlp/language_model.py delete mode 100644 vel/module/input/embedding.py delete mode 100644 vel/module/input/sequence.py create mode 100644 vel/net/layer/dropout.py create mode 100644 vel/net/layer/nlp/__init__.py create mode 100644 vel/net/layer/nlp/alphabet_embedding.py create mode 100644 vel/net/layer/nlp/alphabet_one_hot_encode.py create mode 100644 vel/net/layer/rnn.py delete mode 100644 vel/util/summary.py diff --git a/.velproject.yaml b/.velproject.yaml index 8127a339..5e25ba22 100644 --- a/.velproject.yaml +++ b/.velproject.yaml @@ -29,4 +29,6 @@ visdom_settings: global_commands: list: name: vel.command.list_command + summary: + name: vel.command.summary_command diff --git a/examples-configs/nlp/generation/gen_shakespeare_gru.yaml b/examples-configs/nlp/generation/gen_shakespeare.yaml similarity index 52% rename from examples-configs/nlp/generation/gen_shakespeare_gru.yaml rename to examples-configs/nlp/generation/gen_shakespeare.yaml index 2ae82918..ac7bd121 100644 --- a/examples-configs/nlp/generation/gen_shakespeare_gru.yaml +++ b/examples-configs/nlp/generation/gen_shakespeare.yaml @@ -1,4 +1,4 @@ -name: 'gen_shakespeare_gru' +name: 'gen_shakespeare' source: @@ -15,16 +15,30 @@ loader: model: - name: vel.model.rnn.multilayer_rnn_sequence_model - - input_block: - name: vel.module.input.one_hot_encoding - alphabet_size: 68 # Size of the alphabet + 1 - - hidden_layers: [512, 512, 512] - output_dim: 68 # Size of the alphabet + 1 - dropout: 0.5 - rnn_type: 'gru' + name: vel.model.nlp.language_model + + net: + name: vel.net.modular + layers: + - name: vel.net.layer.nlp.alphabet_embedding + dim: 512 + - name: vel.net.layer.rnn + hidden_size: 512 + rnn_type: 'lstm' + - name: vel.net.layer.dropout + p: 0.3 + - name: vel.net.layer.rnn + hidden_size: 512 + rnn_type: 'lstm' + dropout: 0.5 + - name: vel.net.layer.dropout + p: 0.3 + - name: vel.net.layer.rnn + hidden_size: 512 + rnn_type: 'lstm' + dropout: 0.5 + - name: vel.net.layer.dropout + p: 0.3 optimizer: @@ -46,5 +60,3 @@ commands: temperature: !param temperature = 0.8 - - diff --git a/examples-configs/nlp/generation/gen_shakespeare_gru_embedding.yaml b/examples-configs/nlp/generation/gen_shakespeare_gru_embedding.yaml deleted file mode 100644 index 70e1a961..00000000 --- a/examples-configs/nlp/generation/gen_shakespeare_gru_embedding.yaml +++ /dev/null @@ -1,49 +0,0 @@ -name: 'gen_shakespeare_gru_embedding' - - -source: - name: vel.data.source.nlp.text_url - # Andrej Karpathy built a small (4.4mb) file with combined all works of Shakespeare - url: 'https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt' - local_dir: './rnn_shakespeare' - - -loader: - name: vel.data.text_character_loader - sequence_length: 128 - batch_size: 64 - - -model: - name: vel.model.rnn.multilayer_rnn_sequence_model - - input_block: - name: vel.module.input.embedding - alphabet_size: 68 # Size of the alphabet + 1 - output_dim: 512 # Embedding dimension - - hidden_layers: [512, 512, 512] - output_dim: 68 # Size of the alphabet + 1 - dropout: 0.5 - rnn_type: 'gru' - - -optimizer: - name: vel.optimizer.adam - lr: 1.0e-3 - epsilon: 1.0e-5 - - -commands: - train: - name: vel.command.train_command - max_grad_norm: 0.5 - epochs: 20 - - generate: - name: vel.command.rnn.generate_text - start_letter: !param start_letter = 'A' - length: !param length = 500 - temperature: !param temperature = 0.8 - - diff --git a/examples-configs/nlp/generation/gen_shakespeare_lstm.yaml b/examples-configs/nlp/generation/gen_shakespeare_lstm.yaml deleted file mode 100644 index 0f06d487..00000000 --- a/examples-configs/nlp/generation/gen_shakespeare_lstm.yaml +++ /dev/null @@ -1,49 +0,0 @@ -name: 'gen_shakespeare_lstm' - - -source: - name: vel.data.source.nlp.text_url - # Andrej Karpathy built a small (4.4mb) file with combined all works of Shakespeare - url: 'https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt' - local_dir: './rnn_shakespeare' - - -loader: - name: vel.data.text_character_loader - sequence_length: 128 - batch_size: 64 - - -model: - name: vel.model.rnn.multilayer_rnn_sequence_model - - input_block: - name: vel.module.input.one_hot_encoding - alphabet_size: 68 # Size of the alphabet + 1 - - hidden_layers: [512, 512, 512] - output_dim: 68 # Size of the alphabet + 1 - dropout: 0.5 - rnn_type: 'lstm' - - -optimizer: - name: vel.optimizer.adam - lr: 1.0e-3 - epsilon: 1.0e-5 - - -commands: - train: - name: vel.command.train_command - max_grad_norm: 0.5 - epochs: 20 - - generate: - name: vel.command.rnn.generate_text - start_letter: !param start_letter = 'A' - length: !param length = 500 - temperature: !param temperature = 0.8 - - - diff --git a/examples-configs/nlp/generation/gen_shakespeare_lstm_embedding.yaml b/examples-configs/nlp/generation/gen_shakespeare_lstm_embedding.yaml deleted file mode 100644 index c84af0d0..00000000 --- a/examples-configs/nlp/generation/gen_shakespeare_lstm_embedding.yaml +++ /dev/null @@ -1,49 +0,0 @@ -name: 'gen_shakespeare_lstm_embedding' - - -source: - name: vel.data.source.nlp.text_url - # Andrej Karpathy built a small (4.4mb) file with combined all works of Shakespeare - url: 'https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt' - local_dir: './rnn_shakespeare' - - -loader: - name: vel.data.text_character_loader - sequence_length: 128 - batch_size: 64 - - -model: - name: vel.model.rnn.multilayer_rnn_sequence_model - - input_block: - name: vel.module.input.embedding - alphabet_size: 68 # Size of the alphabet + 1 - output_dim: 512 # Embedding dimension - - hidden_layers: [512, 512, 512] - output_dim: 68 # Size of the alphabet + 1 - dropout: 0.5 - rnn_type: 'lstm' - - -optimizer: - name: vel.optimizer.adam - lr: 1.0e-3 - epsilon: 1.0e-5 - - -commands: - train: - name: vel.command.train_command - max_grad_norm: 0.5 - epochs: 20 - - generate: - name: vel.command.rnn.generate_text - start_letter: !param start_letter = 'A' - length: !param length = 500 - temperature: !param temperature = 0.8 - - diff --git a/requirements.in b/requirements.in index 6ffb91bd..70eebbb8 100644 --- a/requirements.in +++ b/requirements.in @@ -1,17 +1,17 @@ attrs cloudpickle +dnspython +gym[atari,box2d,classic_control] matplotlib numpy opencv-python pandas +pymongo +pytest pyyaml scikit-learn -torch~=1.2 torchtext torchvision +torch~=1.2 tqdm visdom -pymongo -dnspython -gym[atari,box2d,classic_control] -pytest diff --git a/requirements.txt b/requirements.txt index 9629fe23..508db5cd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile # To update, run: # -# pip-compile +# pip-compile requirements.in # atari-py==0.2.6 # via gym atomicwrites==1.3.0 # via pytest @@ -55,4 +55,4 @@ websocket-client==0.56.0 # via visdom zipp==0.6.0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -# setuptools==41.2.0 # via kiwisolver +# setuptools==41.4.0 # via kiwisolver diff --git a/vel/api/model.py b/vel/api/model.py index 4946b7bb..052bd1c3 100644 --- a/vel/api/model.py +++ b/vel/api/model.py @@ -1,11 +1,10 @@ import torch -import torch.nn as nn import vel.util.module_util as mu from vel.api.optimizer import VelOptimizer, OptimizerFactory from vel.metric.loss_metric import Loss -from vel.util.summary import summary + from .network import Network @@ -36,17 +35,13 @@ def train(self, mode=True): return self - def summary(self, input_size=None): + def summary(self): """ Print a model summary """ - - if input_size is None: - print(self) - print("-" * 100) - number = sum(p.numel() for p in self.parameters()) - print("Number of model parameters: {:,}".format(number)) - print("-" * 100) - else: - summary(self, input_size) + print(self) + print("-" * 100) + number = sum(p.numel() for p in self.parameters()) + print("Number of model parameters: {:,}".format(number)) + print("-" * 100) class OptimizedModel(Model): @@ -76,7 +71,6 @@ def validate(self, data: dict) -> dict: raise NotImplementedError - class GradientModel(ValidatedModel): """ Model that calculates a single gradient and optimizes it """ @@ -120,7 +114,11 @@ def metrics(self) -> list: return [Loss()] def calculate_gradient(self, data: dict) -> dict: - y_hat = self(data['x']) + if self.is_stateful: + y_hat, _ = self(data['x']) + else: + y_hat = self(data['x']) + loss_value = self.loss_value(data['x'], data['y'], y_hat) if self.training: diff --git a/vel/api/size_hint.py b/vel/api/size_hint.py index 8263c0a5..b1e4fecb 100644 --- a/vel/api/size_hint.py +++ b/vel/api/size_hint.py @@ -19,6 +19,14 @@ def shape(self, idx=1) -> typing.Tuple[int]: """ Get shape of size hint, except for a number of dimensions (batch dimensions """ return self[idx:] + def append(self, element: int) -> 'SizeHint': + """ Return a copy of this size hint, with new element added """ + return SizeHint(*(list(self) + [element])) + + def drop_last(self) -> 'SizeHint': + """ Return a copy of this size hint, with last element dropped """ + return SizeHint(*self[:-1]) + def __repr__(self): internal = ", ".join([self._inner_repr(s) for s in self]) return f"{self.__class__.__name__}({internal})" diff --git a/vel/command/rnn/generate_text.py b/vel/command/rnn/generate_text.py index 99b56647..bf14f074 100644 --- a/vel/command/rnn/generate_text.py +++ b/vel/command/rnn/generate_text.py @@ -6,6 +6,7 @@ import torch.distributions as dist from vel.api import TrainingInfo +from vel.util.tensor_util import to_device class GenerateTextCommand: @@ -41,12 +42,12 @@ def run(self): generated_text = [current_char] - state = model.zero_state(1).to(device) + state = to_device(model.zero_state(1), device) char_tensor = torch.from_numpy(np.array([current_char_encoded])).view(1, 1).to(device) for _ in tqdm.trange(self.length): - prob_logits, state = model.forward_state(char_tensor, state) + prob_logits, state = model(char_tensor, state) # Apply temperature to the logits prob_logits = F.log_softmax(prob_logits.view(-1).div(self.temperature), dim=0) diff --git a/vel/command/summary_command.py b/vel/command/summary_command.py index 37393b84..51ab11f7 100644 --- a/vel/command/summary_command.py +++ b/vel/command/summary_command.py @@ -1,21 +1,14 @@ -from vel.api import Source - - class ModelSummary: """ Just print model summary """ - def __init__(self, model, source: Source): - self.model = model - self.source = source + def __init__(self, model): + self.model_factory = model def run(self, *args): """ Print model summary """ - if self.source is None: - self.model.summary() - else: - x_data, y_data = next(iter(self.source.train_loader)) - self.model.summary(input_size=x_data.shape[1:]) + model = self.model_factory.instantiate() + model.summary() -def create(model, source=None): +def create(model): """ Vel factory function """ - return ModelSummary(model, source) + return ModelSummary(model) diff --git a/vel/data/text_character_loader.py b/vel/data/text_character_loader.py index 92f9d405..5eafc95b 100644 --- a/vel/data/text_character_loader.py +++ b/vel/data/text_character_loader.py @@ -109,6 +109,11 @@ def __init__(self, source, sequence_length: int, batch_size: int): def __getitem__(self, item): return self._loaders[item] + @property + def alphabet_size(self): + """ Size of the text alphabet """ + return len(self.alphabet) + @property def loader(self): """ Get a dict of loaders """ @@ -127,8 +132,3 @@ def create(source: Source, sequence_length: int = 64, batch_size: int = 64): sequence_length=sequence_length, batch_size=batch_size ) - - - - - diff --git a/vel/model/nlp/__init__.py b/vel/model/nlp/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/model/nlp/language_model.py b/vel/model/nlp/language_model.py new file mode 100644 index 00000000..55b96adb --- /dev/null +++ b/vel/model/nlp/language_model.py @@ -0,0 +1,79 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from vel.api import LossFunctionModel, ModelFactory, Network, BackboneNetwork, SizeHints, SizeHint + + +class LanguageModel(LossFunctionModel): + """ Language model - autoregressive generative model for text """ + + def __init__(self, alphabet_size: int, net: BackboneNetwork): + super().__init__() + + self.net = net + self.alphabet_size = alphabet_size + self.output_dim = self.alphabet_size + 1 + + self.net = net + self.output_layer = nn.Linear( + in_features=self.net.size_hints().assert_single().last(), + out_features=self.alphabet_size+1 + ) + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return self.net.is_stateful + + def zero_state(self, batch_size): + """ Potential state for the model """ + return self.net.zero_state(batch_size) + + def forward(self, input_data: torch.Tensor, state=None) -> torch.Tensor: + r"""Defines the computation performed at every call. + + Should be overridden by all subclasses. + + .. note:: + Although the recipe for forward pass needs to be defined within + this function, one should call the :class:`Module` instance afterwards + instead of this since the former takes care of running the + registered hooks while the latter silently ignores them. + """ + if self.net.is_stateful: + output, new_state = self.net(input_data, state=state) + else: + output = self.net(input_data) + new_state = state + + return F.log_softmax(self.output_layer(output), dim=-1), new_state + + def loss_value(self, x_data, y_true, y_pred) -> torch.tensor: + """ Calculate a value of loss function """ + y_pred = y_pred.view(-1, y_pred.size(2)) + y_true = y_true.view(-1).to(torch.long) + return F.nll_loss(y_pred, y_true) + + +class LanguageModelFactory(ModelFactory): + def __init__(self, alphabet_size: int, net_factory: ModelFactory): + self.alphabet_size = alphabet_size + self.net_factory = net_factory + + def instantiate(self, **extra_args) -> Network: + size_hint = SizeHints(SizeHint(None, None)) + net = self.net_factory.instantiate(alphabet_size=self.alphabet_size, size_hint=size_hint) + + return LanguageModel( + alphabet_size=self.alphabet_size, + net=net + ) + + +def create(loader, net: ModelFactory): + """ Vel factory function """ + return LanguageModelFactory( + alphabet_size=loader.alphabet_size, + net_factory=net + ) diff --git a/vel/model/rnn/multilayer_rnn_sequence_model.py b/vel/model/rnn/multilayer_rnn_sequence_model.py index 959741a5..3f5c332a 100644 --- a/vel/model/rnn/multilayer_rnn_sequence_model.py +++ b/vel/model/rnn/multilayer_rnn_sequence_model.py @@ -4,7 +4,7 @@ import torch.nn.functional as F import torch.nn as nn -from vel.api import LossFunctionModel, ModelFactory, LinearBackboneModel +from vel.api import LossFunctionModel, ModelFactory from vel.module.rnn_layer import RnnLayer diff --git a/vel/module/input/embedding.py b/vel/module/input/embedding.py deleted file mode 100644 index 97927ff1..00000000 --- a/vel/module/input/embedding.py +++ /dev/null @@ -1,43 +0,0 @@ -import torch.nn as nn - -from vel.api import Network, LanguageSource, ModelFactory - - -class EmbeddingInput(Network): - """ Learnable Embedding input layer """ - - def __init__(self, alphabet_size: int, output_dim: int, pretrained: bool = False, frozen: bool = False, - source: LanguageSource = None): - super().__init__() - - self._output_dim = output_dim - self._alphabet_size = alphabet_size - self._pretrained = pretrained - self._frozen = frozen - self._source = source - - self.layer = nn.Embedding(self._alphabet_size, self._output_dim) - - def reset_weights(self): - if self._pretrained: - self.layer.weight.data.copy_(self._source.fields['text'].vocab.vectors) - - if self._frozen: - self.layer.weight.requires_grad = False - - @property - def output_dim(self) -> int: - """ Final dimension of model output """ - return self._output_dim - - def forward(self, input_data): - return self.layer(input_data) - - -def create(alphabet_size: int, output_dim: int, pretrained: bool = False, frozen: bool = False, - source: LanguageSource = None): - """ Vel factory function """ - def instantiate(**_): - return EmbeddingInput(alphabet_size, output_dim, pretrained=pretrained, frozen=frozen, source=source) - - return ModelFactory.generic(instantiate) diff --git a/vel/module/input/flatten.py b/vel/module/input/flatten.py index 5e140cc0..c69e9f02 100644 --- a/vel/module/input/flatten.py +++ b/vel/module/input/flatten.py @@ -1,10 +1,10 @@ from vel.module.layers import Flatten -from vel.api import ModelFactory, BackboneNetwork +from vel.api import Network -class FlattenInput(BackboneNetwork): +class FlattenInput(Network): """ Sequence input """ def __init__(self): super().__init__() diff --git a/vel/module/input/one_hot_encoding.py b/vel/module/input/one_hot_encoding.py index 125bdb47..15f4e961 100644 --- a/vel/module/input/one_hot_encoding.py +++ b/vel/module/input/one_hot_encoding.py @@ -1,8 +1,8 @@ -from vel.api import LinearBackboneModel, ModelFactory +from vel.api import Network from vel.module.layers import OneHotEncode -class OneHotEncodingInput(LinearBackboneModel): +class OneHotEncodingInput(Network): """ One-hot encoding input layer """ def __init__(self, alphabet_size: int): @@ -12,18 +12,6 @@ def __init__(self, alphabet_size: int): self.layer = OneHotEncode(self._alphabet_size) - @property - def output_dim(self) -> int: - """ Final dimension of model output """ - return self._alphabet_size - def forward(self, input_data): return self.layer(input_data) - -def create(alphabet_size: int): - """ Vel factory function """ - def instantiate(**_): - return OneHotEncodingInput(alphabet_size) - - return ModelFactory.generic(instantiate) diff --git a/vel/module/input/sequence.py b/vel/module/input/sequence.py deleted file mode 100644 index 51c50c82..00000000 --- a/vel/module/input/sequence.py +++ /dev/null @@ -1,21 +0,0 @@ -import torch.nn as nn - -from vel.api import ModelFactory, BackboneModel - - -class SequenceInput(BackboneModel): - """ Sequence input """ - def __init__(self, modules): - super().__init__() - self.model = nn.Sequential(*modules) - - def forward(self, input_data): - return self.model(input_data) - - -def create(modules): - """ Vel factory function """ - def instantiate(**_): - return SequenceInput([f.instantiate() for f in modules]) - - return ModelFactory.generic(instantiate) diff --git a/vel/net/layer/arch/parallel.py b/vel/net/layer/arch/parallel.py index de592f2a..c25cc0ec 100644 --- a/vel/net/layer/arch/parallel.py +++ b/vel/net/layer/arch/parallel.py @@ -34,7 +34,7 @@ def name_base(self) -> str: """ Base of layer name """ return "parallel" - def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: hints = direct_input.assert_tuple(len(self.layers)) layers = [] diff --git a/vel/net/layer/dropout.py b/vel/net/layer/dropout.py new file mode 100644 index 00000000..3d6df1f9 --- /dev/null +++ b/vel/net/layer/dropout.py @@ -0,0 +1,54 @@ +import torch.nn.functional as F +from vel.api import SizeHints +from vel.net.layer_base import Layer, LayerFactory + + +class DropoutLayer(Layer): + """ + During training, randomly zeroes some of the elements of the input + tensor with probability :attr:`p` using samples from a Bernoulli + distribution. + + See :class:`~torch.nn.Dropout` for details. + """ + def __init__(self, name: str, input_size: SizeHints, p: float): + super().__init__(name) + + self.p = p + self.input_size = input_size + + def forward(self, direct, state: dict = None, context: dict = None): + return F.dropout(direct, p=self.p, training=self.training) + + def size_hints(self) -> SizeHints: + """ Size hints for this network """ + return self.input_size + + def extra_repr(self) -> str: + """Set the extra representation of the module""" + return "p={:.2f}".format(self.p) + + +class DropoutLayerFactory(LayerFactory): + """ Factory class for the Dropout layer """ + + def __init__(self, p: float): + self.p = p + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "dropout" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + """ Create a given layer object """ + return DropoutLayer( + name=name, + input_size=direct_input, + p=self.p + ) + + +def create(p: float): + """ Vel factory function """ + return DropoutLayerFactory(p) diff --git a/vel/net/layer/input/image_to_tensor.py b/vel/net/layer/input/image_to_tensor.py index 4924c33f..1f5adb9c 100644 --- a/vel/net/layer/input/image_to_tensor.py +++ b/vel/net/layer/input/image_to_tensor.py @@ -34,7 +34,7 @@ def name_base(self) -> str: """ Base of layer name """ return "image_to_tensor" - def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: """ Create a given layer object """ if self.shape is None: shape = direct_input.assert_single().shape() diff --git a/vel/net/layer/input/normalize.py b/vel/net/layer/input/normalize.py index 8da64fa7..91766a38 100644 --- a/vel/net/layer/input/normalize.py +++ b/vel/net/layer/input/normalize.py @@ -33,7 +33,7 @@ def name_base(self) -> str: """ Base of layer name """ return "image_to_tensor" - def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: """ Create a given layer object """ # Potential improvement here is to use either direct input or size parameter if self.shape is None: diff --git a/vel/net/layer/mlp.py b/vel/net/layer/mlp.py index 1be0d57b..d2f57b49 100644 --- a/vel/net/layer/mlp.py +++ b/vel/net/layer/mlp.py @@ -18,17 +18,18 @@ class MLP(Layer): """ Simple Multi-Layer-Perceptron network """ - def __init__(self, name: str, input_length: int, hidden_layers: typing.List[int], activation: str = 'tanh', + def __init__(self, name: str, input_size: SizeHints, hidden_layers: typing.List[int], activation: str = 'tanh', normalization: typing.Optional[str] = None): super().__init__(name) - self.input_length = input_length + self.input_size = input_size + self.input_length = input_size.assert_single().last() self.hidden_layers = hidden_layers self.activation = activation self.normalization = normalization layer_objects = [] - layer_sizes = zip([input_length] + hidden_layers, hidden_layers) + layer_sizes = zip([self.input_length] + hidden_layers, hidden_layers) for input_size, output_size in layer_sizes: layer_objects.append(nn.Linear(input_size, output_size)) @@ -39,9 +40,12 @@ def __init__(self, name: str, input_length: int, hidden_layers: typing.List[int] layer_objects.append(net_util.activation(activation)()) self.model = nn.Sequential(*layer_objects) - self.hidden_units = hidden_layers[-1] if hidden_layers else input_length + self.hidden_units = hidden_layers[-1] if hidden_layers else self.input_length + + self.output_size = input_size.assert_single().drop_last().append(self.hidden_units) def reset_weights(self): + """ Call proper initializers for the weights """ for m in self.modules(): if isinstance(m, nn.Linear): # init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') @@ -52,7 +56,8 @@ def forward(self, direct, state: dict = None, context: dict = None): return self.model(direct.float()) def size_hints(self) -> SizeHints: - return SizeHints(SizeHint(None, self.hidden_units)) + """ Size hints for this network """ + return SizeHints(self.output_size) class MLPFactory(LayerFactory): @@ -67,11 +72,11 @@ def name_base(self) -> str: """ Base of layer name """ return "mlp" - def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: """ Create a given layer object """ return MLP( name=name, - input_length=direct_input.assert_single().last(), + input_size=direct_input, hidden_layers=self.hidden_layers, activation=self.activation, normalization=self.normalization diff --git a/vel/net/layer/nlp/__init__.py b/vel/net/layer/nlp/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/net/layer/nlp/alphabet_embedding.py b/vel/net/layer/nlp/alphabet_embedding.py new file mode 100644 index 00000000..2fec003a --- /dev/null +++ b/vel/net/layer/nlp/alphabet_embedding.py @@ -0,0 +1,52 @@ +import torch.nn as nn + +from vel.api import SizeHints +from vel.net.layer_base import Layer, LayerFactory + + +class AlphabetEmbeddingLayer(Layer): + """ + Encode incoming tensor encoded using certain alphabet into one-hot encoding + """ + def __init__(self, name: str, alphabet_size: int, dim: int, input_shape: SizeHints): + super().__init__(name) + + self.alphabet_size = alphabet_size + self.dim = dim + self.output_size = SizeHints(input_shape.assert_single().append(self.dim)) + + self.layer = nn.Embedding(self.alphabet_size + 1, self.dim) + + def forward(self, direct, state: dict = None, context: dict = None): + return self.layer(direct) + + def size_hints(self) -> SizeHints: + """ Size hints for this network """ + return self.output_size + + +class AlphabetEmbeddingLayerFactory(LayerFactory): + """ Factory class for the AlphabetOneHotEncode layer """ + + def __init__(self, dim: int): + self.dim = dim + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "alphabet_embedding" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + alphabet_size = extra_args['alphabet_size'] + + return AlphabetEmbeddingLayer( + name=name, + alphabet_size=alphabet_size, + dim=self.dim, + input_shape=direct_input + ) + + +def create(dim: int): + """ Vel factory function """ + return AlphabetEmbeddingLayerFactory(dim) diff --git a/vel/net/layer/nlp/alphabet_one_hot_encode.py b/vel/net/layer/nlp/alphabet_one_hot_encode.py new file mode 100644 index 00000000..b26745c5 --- /dev/null +++ b/vel/net/layer/nlp/alphabet_one_hot_encode.py @@ -0,0 +1,47 @@ +from vel.api import SizeHints +from vel.net.layer_base import Layer, LayerFactory + +from vel.util.tensor_util import one_hot_encoding + + +class AlphabetOneHotEncodeLayer(Layer): + """ + Encode incoming tensor encoded using certain alphabet into one-hot encoding + """ + def __init__(self, name: str, alphabet_size: int, input_shape: SizeHints): + super().__init__(name) + + self.alphabet_size = alphabet_size + self.output_size = SizeHints(input_shape.assert_single().append(self.alphabet_size + 1)) + + def forward(self, direct, state: dict = None, context: dict = None): + return one_hot_encoding(direct, num_labels=self.alphabet_size + 1) + + def size_hints(self) -> SizeHints: + """ Size hints for this network """ + return self.output_size + + +class AlphabetOneHotEncodeLayerFactory(LayerFactory): + """ Factory class for the AlphabetoneHotEncode layer """ + + def __init__(self): + pass + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "alphabet_one_hot_encode" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + alphabet_size = extra_args['alphabet_size'] + return AlphabetOneHotEncodeLayer( + name=name, + alphabet_size=alphabet_size, + input_shape=direct_input + ) + + +def create(): + """ Vel factory function """ + return AlphabetOneHotEncodeLayerFactory() diff --git a/vel/net/layer/rnn.py b/vel/net/layer/rnn.py new file mode 100644 index 00000000..7cbf9e13 --- /dev/null +++ b/vel/net/layer/rnn.py @@ -0,0 +1,126 @@ +import torch +import torch.nn as nn +import torch.nn.init as init + +from vel.api import SizeHints +from vel.net.layer_base import Layer, LayerFactory + + +class RnnLayer(Layer): + """ Single Recurrent Layer """ + def __init__(self, name: str, input_size: SizeHints, hidden_size: int, rnn_type: str, + bias: bool = True, bidirectional: bool = False, nonlinearity: str = 'tanh'): + super().__init__(name) + + self.input_size = input_size + self.input_length = input_size.assert_single().last() + self.hidden_size = hidden_size + self.rnn_type = rnn_type + + self.bias = bias + self.bidirectional = bidirectional + self.nonlinearity = nonlinearity + + if self.rnn_type == 'rnn': + self.rnn_cell = nn.RNN( + input_size=self.input_length, hidden_size=hidden_size, bias=bias, nonlinearity=nonlinearity, + bidirectional=bidirectional, batch_first=True + ) + elif self.rnn_type == 'lstm': + self.rnn_cell = nn.LSTM( + input_size=self.input_length, hidden_size=hidden_size, bias=bias, + bidirectional=bidirectional, batch_first=True + ) + elif self.rnn_type == 'gru': + self.rnn_cell = nn.GRU( + input_size=self.input_length, hidden_size=hidden_size, bias=bias, + bidirectional=bidirectional, batch_first=True + ) + + self.output_size = input_size.assert_single().drop_last().append(self.hidden_size) + + def reset_weights(self): + """ Call proper initializers for the weights """ + init.xavier_normal_(self.rnn_cell.weight_hh) + init.xavier_normal_(self.rnn_cell.weight_ih) + init.zeros_(self.rnn_cell.bias_ih) + init.zeros_(self.rnn_cell.bias_hh) + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return True + + @property + def state_dim(self) -> int: + """ Dimension of model state """ + if self.rnn_type == 'lstm': + return 2 * self.hidden_size + else: + return self.hidden_size + + def zero_state(self, batch_size): + """ Potential state for the model """ + return {self.name: torch.zeros(batch_size, self.state_dim)} + + def forward(self, input_data, state: dict, context: dict = None): + """ Forward propagation of a single layer """ + + if self.rnn_type == 'lstm': + state_tensor = state[self.name].unsqueeze(0) + hidden_state, cell_state = torch.split(state_tensor, self.hidden_size, dim=2) + output, (hidden_state, cell_state) = self.rnn_cell( + input_data, (hidden_state.contiguous(), cell_state.contiguous()) + ) + new_state = torch.cat([hidden_state, cell_state], dim=2) + return output, {self.name: new_state[0]} + else: + state_tensor = state[self.name].unsqueeze(0) + output, new_state = self.rnn_cell(input_data, state_tensor) + return output, {self.name: new_state[0]} + + def size_hints(self) -> SizeHints: + """ Size hints for this network """ + return SizeHints(self.output_size) + + +class RnnLayerFactory(LayerFactory): + """ Factory class for the RnnLayer """ + + def __init__(self, hidden_size: int, rnn_type: str, bias: bool = True, bidirectional: bool = False, + nonlinearity: str = 'tanh'): + self.hidden_size = hidden_size + self.rnn_type = rnn_type + + self.bias = bias + self.bidirectional = bidirectional + self.nonlinearity = nonlinearity + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "rnn" + + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + """ Create instance of 'RnnLayer' """ + return RnnLayer( + name=name, + input_size=direct_input, + hidden_size=self.hidden_size, + rnn_type=self.rnn_type, + bias=self.bias, + bidirectional=self.bidirectional, + nonlinearity=self.nonlinearity + ) + + +def create(hidden_size: int, rnn_type: str, bias: bool = True, bidirectional: bool = False, + nonlinearity: str = 'tanh'): + """ Vel factory function """ + return RnnLayerFactory( + hidden_size=hidden_size, + rnn_type=rnn_type, + bias=bias, + bidirectional=bidirectional, + nonlinearity=nonlinearity + ) diff --git a/vel/net/layer/util/concat.py b/vel/net/layer/util/concat.py index d80e2d57..7d2d7b57 100644 --- a/vel/net/layer/util/concat.py +++ b/vel/net/layer/util/concat.py @@ -29,7 +29,7 @@ def name_base(self) -> str: """ Base of layer name """ return "concat" - def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: inputs = direct_input.assert_tuple() result = [] diff --git a/vel/net/layer/util/repeat.py b/vel/net/layer/util/repeat.py index 32ca7ede..9fda8050 100644 --- a/vel/net/layer/util/repeat.py +++ b/vel/net/layer/util/repeat.py @@ -26,7 +26,7 @@ def name_base(self) -> str: """ Base of layer name """ return "repeat_tensor" - def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: return RepeatTensor( name=name, times=self.times, diff --git a/vel/net/layer_base.py b/vel/net/layer_base.py index 62dcabcb..d1be235f 100644 --- a/vel/net/layer_base.py +++ b/vel/net/layer_base.py @@ -1,6 +1,4 @@ -import typing - -from vel.api import BackboneNetwork, SizeHints, SizeHint +from vel.api import BackboneNetwork, SizeHints class Layer(BackboneNetwork): @@ -8,10 +6,6 @@ def __init__(self, name: str): super().__init__() self.name = name - def state_size_hints(self) -> typing.Dict[str, SizeHint]: - """ Size hints for state part of this network """ - return {} - def forward(self, direct, state: dict = None, context: dict = None): """ Forward propagation of a single layer """ raise NotImplementedError @@ -25,7 +19,7 @@ def name_base(self) -> str: """ Base of layer name """ raise NotImplementedError - def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: """ Create a given layer object """ raise NotImplementedError diff --git a/vel/net/modular.py b/vel/net/modular.py index c23bf0a6..517d2c2c 100644 --- a/vel/net/modular.py +++ b/vel/net/modular.py @@ -3,10 +3,34 @@ import torch.nn as nn from vel.api import BackboneNetwork, ModelFactory, SizeHints +from vel.util.tensor_util import to_device from .layer_base import LayerFactory -def instantiate_layers(layers: [LayerFactory], size_hint: SizeHints) -> nn.Module: +class ModularSequential(nn.Module): + """ Modification of nn.Sequential for the purpose of modular networks """ + def __init__(self, layers: collections.OrderedDict): + super().__init__() + + self._layers = [] + + for key, module in layers.items(): + self.add_module(key, module) + self._layers.append(module) + + def __len__(self): + return len(self._layers) + + def __getitem__(self, item): + return self._layers[item] + + def forward(self, direct, state: dict = None, context: dict = None): + for layer in self._layers: + direct = layer(direct, state=state, context=context) + return direct + + +def instantiate_layers(layers: [LayerFactory], size_hint: SizeHints, extra_args: dict) -> nn.Module: """ Instantiate list of layer factories into PyTorch Module """ module_dict = collections.OrderedDict() context = {} @@ -15,12 +39,12 @@ def instantiate_layers(layers: [LayerFactory], size_hint: SizeHints) -> nn.Modul counter = idx + 1 name = "{}_{:04d}".format(layer_factory.name_base, counter) - layer = layer_factory.instantiate(name=name, direct_input=size_hint, context=context) + layer = layer_factory.instantiate(name=name, direct_input=size_hint, context=context, extra_args=extra_args) size_hint = layer.size_hints() module_dict[name] = layer - return nn.Sequential(module_dict) + return ModularSequential(module_dict) class ModularNetwork(BackboneNetwork): @@ -94,12 +118,15 @@ def reset_state(self, state, dones): """ Reset the state after the episode has been terminated """ raise NotImplementedError - def forward(self, input_data, state): + def forward(self, input_data, state=None): data = input_data context = {} output_state = {} + if state is None: + state = to_device(self.zero_state(input_data.size(0)), input_data.device) + for layer in self.layers: if layer.is_stateful: data, new_state = layer(data, state=state, context=context) @@ -120,7 +147,7 @@ def instantiate(self, size_hint=None, **extra_args) -> BackboneNetwork: if size_hint is None: size_hint = SizeHints() - layers = instantiate_layers(self.layers, size_hint=size_hint) + layers = instantiate_layers(self.layers, size_hint=size_hint, extra_args=extra_args) is_stateful = any(l.is_stateful for l in layers) if is_stateful: diff --git a/vel/rl/layer/double_nature_cnn.py b/vel/rl/layer/double_nature_cnn.py index 2e269783..3f78db01 100644 --- a/vel/rl/layer/double_nature_cnn.py +++ b/vel/rl/layer/double_nature_cnn.py @@ -110,7 +110,7 @@ def name_base(self) -> str: """ Base of layer name """ return "double_nature_cnn" - def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: (b, c, w, h) = direct_input.assert_single(4) return DoubleNatureCnn( diff --git a/vel/rl/layer/double_noisy_nature_cnn.py b/vel/rl/layer/double_noisy_nature_cnn.py index acade064..f0740be8 100644 --- a/vel/rl/layer/double_noisy_nature_cnn.py +++ b/vel/rl/layer/double_noisy_nature_cnn.py @@ -128,7 +128,7 @@ def name_base(self) -> str: """ Base of layer name """ return "double_noisy_nature_cnn" - def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: (b, c, w, h) = direct_input.assert_single(4) return DoubleNoisyNatureCnn( diff --git a/vel/rl/layer/nature_cnn.py b/vel/rl/layer/nature_cnn.py index f2503a62..b9845a16 100644 --- a/vel/rl/layer/nature_cnn.py +++ b/vel/rl/layer/nature_cnn.py @@ -94,7 +94,7 @@ def name_base(self) -> str: """ Base of layer name """ return "nature_cnn" - def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: (b, c, w, h) = direct_input.assert_single(4) return NatureCnn( diff --git a/vel/rl/layer/nature_cnn_small.py b/vel/rl/layer/nature_cnn_small.py index c9ac77bb..e8bc8928 100644 --- a/vel/rl/layer/nature_cnn_small.py +++ b/vel/rl/layer/nature_cnn_small.py @@ -88,7 +88,7 @@ def name_base(self) -> str: """ Base of layer name """ return "nature_cnn_small" - def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: (b, c, w, h) = direct_input.assert_single(4) return NatureCnnSmall( diff --git a/vel/rl/layer/rnn_cell.py b/vel/rl/layer/rnn_cell.py index 678b003f..a509072d 100644 --- a/vel/rl/layer/rnn_cell.py +++ b/vel/rl/layer/rnn_cell.py @@ -82,7 +82,7 @@ def __init__(self, hidden_size: int, rnn_type: str, bias: bool = True, nonlinear def name_base(self) -> str: return "rnn_cell" - def instantiate(self, name: str, direct_input: SizeHints, context: dict) -> Layer: + def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: input_size = direct_input.assert_single().last() return RnnCell( diff --git a/vel/util/summary.py b/vel/util/summary.py deleted file mode 100644 index d7608601..00000000 --- a/vel/util/summary.py +++ /dev/null @@ -1,87 +0,0 @@ -""" -Code based on: https://github.com/sksq96/pytorch-summary/blob/master/torchsummary/torchsummary.py -""" -import torch -import torch.nn as nn -from torch.autograd import Variable - -from collections import OrderedDict - - -def summary(model, input_size): - """ Print summary of the model """ - def register_hook(module): - def hook(module, input, output): - class_name = str(module.__class__).split('.')[-1].split("'")[0] - module_idx = len(summary) - - m_key = '%s-%i' % (class_name, module_idx + 1) - summary[m_key] = OrderedDict() - summary[m_key]['input_shape'] = list(input[0].size()) - summary[m_key]['input_shape'][0] = -1 - if isinstance(output, (list, tuple)): - summary[m_key]['output_shape'] = [[-1] + list(o.size())[1:] for o in output] - else: - summary[m_key]['output_shape'] = list(output.size()) - summary[m_key]['output_shape'][0] = -1 - - params = 0 - if hasattr(module, 'weight') and hasattr(module.weight, 'size'): - params += torch.prod(torch.LongTensor(list(module.weight.size()))) - summary[m_key]['trainable'] = module.weight.requires_grad - if hasattr(module, 'bias') and hasattr(module.bias, 'size'): - params += torch.prod(torch.LongTensor(list(module.bias.size()))) - summary[m_key]['nb_params'] = params - - if (not isinstance(module, nn.Sequential) and - not isinstance(module, nn.ModuleList) and - not (module == model)): - hooks.append(module.register_forward_hook(hook)) - - if torch.cuda.is_available(): - dtype = torch.cuda.FloatTensor - model = model.cuda() - else: - dtype = torch.FloatTensor - model = model.cpu() - - # check if there are multiple inputs to the network - if isinstance(input_size[0], (list, tuple)): - x = [Variable(torch.rand(2, *in_size)).type(dtype) for in_size in input_size] - else: - x = Variable(torch.rand(2, *input_size)).type(dtype) - - # print(type(x[0])) - # create properties - summary = OrderedDict() - hooks = [] - # register hook - model.apply(register_hook) - # make a forward pass - # print(x.shape) - model(x) - # remove these hooks - for h in hooks: - h.remove() - - print('----------------------------------------------------------------') - line_new = '{:>20} {:>25} {:>15}'.format('Layer (type)', 'Output Shape', 'Param #') - print(line_new) - print('================================================================') - total_params = 0 - trainable_params = 0 - for layer in summary: - # input_shape, output_shape, trainable, nb_params - line_new = '{:>20} {:>25} {:>15}'.format(layer, str(summary[layer]['output_shape']), - '{0:,}'.format(summary[layer]['nb_params'])) - total_params += summary[layer]['nb_params'] - if 'trainable' in summary[layer]: - if summary[layer]['trainable']: - trainable_params += summary[layer]['nb_params'] - print(line_new) - print('================================================================') - print('Total params: {0:,}'.format(total_params)) - print('Trainable params: {0:,}'.format(trainable_params)) - print('Non-trainable params: {0:,}'.format(total_params - trainable_params)) - print('----------------------------------------------------------------') - # return summary From 0d5e6b153547a0615ac9f85b3cd63e86f032a640 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 6 Oct 2019 21:38:56 -0700 Subject: [PATCH 123/162] Deleting abandoned files. --- vel/data/augmentation/tta/__init__.py | 0 vel/data/augmentation/tta/train_tta.py | 107 ---- vel/rl/test/__init__.py | 0 vel/rl/test/test_integration.py | 679 ------------------------- 4 files changed, 786 deletions(-) delete mode 100644 vel/data/augmentation/tta/__init__.py delete mode 100644 vel/data/augmentation/tta/train_tta.py delete mode 100644 vel/rl/test/__init__.py delete mode 100644 vel/rl/test/test_integration.py diff --git a/vel/data/augmentation/tta/__init__.py b/vel/data/augmentation/tta/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/vel/data/augmentation/tta/train_tta.py b/vel/data/augmentation/tta/train_tta.py deleted file mode 100644 index 78621428..00000000 --- a/vel/data/augmentation/tta/train_tta.py +++ /dev/null @@ -1,107 +0,0 @@ -# import torch -# import torch.utils.data as tdata -# -# import vel.api.data as wdata -# -# -# class TrainTTALoader: -# def __init__(self, n_augmentations, batch_size, data_source, augmentations, num_workers): -# self.n_augmentations = n_augmentations -# self.data_source = data_source -# self.augmentations = augmentations -# -# self.val_ds = wdata.DataFlow(self.data_source, augmentations, tag='val') -# self.train_ds = wdata.DataFlow(self.data_source, augmentations, tag='train') -# -# self.val_loader = tdata.DataLoader( -# self.val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers -# ) -# -# self.train_loader = tdata.DataLoader( -# self.train_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers -# ) -# -# def __len__(self): -# return (1 + self.n_augmentations) * len(self.val_loader) -# -# def __iter__(self): -# iterlist = [iter(self.val_loader)] -# -# for _ in range(self.n_augmentations): -# iterlist.append(iter(self.train_loader)) -# -# for _ in range(len(self.val_loader)): -# for iterator in iterlist: -# yield next(iterator) -# -# -# class TrainTTAAccumulator: -# def __init__(self, metric_accumulator, n_augmentations, data_source): -# self.metric_accumulator = metric_accumulator -# -# self.source_elements = len(data_source) -# self.n_augmentations = n_augmentations -# -# self.data = None -# self.target = None -# -# self.accumulated_output = [] -# self.accumulated_context = [] -# -# self.index = 0 -# -# # def calculate(self, data, target, output, context): -# def calculate(self, data_dict): -# """ Accumulate results """ -# data = data_dict['data'] -# target = data_dict['target'] -# output = data_dict['output'] -# -# if self.index == 0: -# self.data = data -# -# self.target = target -# -# self.accumulated_output.append(output) -# self.accumulated_context.append(context) -# -# self.index += 1 -# -# if self.index == (1 + self.n_augmentations): -# new_output = torch.mean(torch.stack(self.accumulated_output, dim=-1), dim=-1) -# new_context = { -# k: torch.mean(torch.stack([c[k] for c in self.accumulated_context], dim=-1), dim=-1) -# for k in context.keys() -# } -# -# self.metric_accumulator.calculate(self.data, self.target, new_output, new_context) -# -# self.index = 0 -# self.data = None -# self.target = None -# self.accumulated_output = [] -# self.accumulated_context = [] -# -# -# class TrainTTA: -# """ Test time augmentation that generates additional samples according to the training set augmentations """ -# def __init__(self, n_augmentations): -# self.n_augmentations = n_augmentations -# -# def loader(self, data_source, augmentations, batch_size, num_workers): -# """ Return loader for the test-time-augmentation set """ -# return TrainTTALoader( -# n_augmentations=self.n_augmentations, -# batch_size=batch_size, -# data_source=data_source, -# augmentations=augmentations, -# num_workers=num_workers -# ) -# -# def accumulator(self, metric_accumulator, val_source): -# """ Reset internal state """ -# return TrainTTAAccumulator(metric_accumulator, self.n_augmentations, val_source) -# -# -# def create(n_augmentations): -# return TrainTTA(n_augmentations) diff --git a/vel/rl/test/__init__.py b/vel/rl/test/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/vel/rl/test/test_integration.py b/vel/rl/test/test_integration.py deleted file mode 100644 index cc488751..00000000 --- a/vel/rl/test/test_integration.py +++ /dev/null @@ -1,679 +0,0 @@ -# import torch -# import torch.optim as optim -# -# from vel.module.input.image_to_tensor import ImageToTensorFactory -# from vel.module.input.normalize_observations import NormalizeObservationsFactory -# from vel.rl.buffer.circular_replay_buffer import CircularReplayBuffer -# from vel.rl.buffer.prioritized_circular_replay_buffer import PrioritizedCircularReplayBuffer -# from vel.rl.command.rl_train_command import FrameTracker -# from vel.rl.env_roller.step_env_roller import StepEnvRoller -# from vel.rl.env_roller.trajectory_replay_env_roller import TrajectoryReplayEnvRoller -# from vel.rl.env_roller.transition_replay_env_roller import TransitionReplayEnvRoller -# from vel.rl.metrics import EpisodeRewardMetric -# from vel.rl.module.noise.eps_greedy import EpsGreedy -# from vel.rl.module.noise.ou_noise import OuNoise -# from vel.function.linear import LinearSchedule -# from vel.function.linear_and_constant import LinearAndConstantSchedule -# from vel.util.random import set_seed -# -# from vel.rl.env.classic_atari import ClassicAtariEnv -# from vel.rl.env.mujoco import MujocoEnv -# from vel.rl.vecenv.subproc import SubprocVecEnvWrapper -# from vel.rl.vecenv.dummy import DummyVecEnvWrapper -# -# from vel.rl.policy.stochastic_policy import StochasticPolicyFactory -# # from vel.rl.model.q_stochastic_policy_model import QStochasticPolicyModelFactory -# # from vel.rl.model.q_model import QModelFactory -# # from vel.rl.model.deterministic_policy_model import DeterministicPolicyModelFactory -# # from vel.rl.model.stochastic_policy_model_separate import StochasticPolicyModelSeparateFactory -# -# from vel.rl.backbone.nature_cnn import NatureCnnFactory -# from vel.rl.backbone.mlp import MLPFactory -# -# from vel.rl.reinforcer.on_policy_iteration_reinforcer import ( -# OnPolicyIterationReinforcer, OnPolicyIterationReinforcerSettings -# ) -# -# from vel.rl.reinforcer.buffered_off_policy_iteration_reinforcer import ( -# BufferedOffPolicyIterationReinforcer, BufferedOffPolicyIterationReinforcerSettings -# ) -# -# from vel.rl.reinforcer.buffered_mixed_policy_iteration_reinforcer import ( -# BufferedMixedPolicyIterationReinforcer, BufferedMixedPolicyIterationReinforcerSettings -# ) -# -# from vel.rl.algo.dqn import DeepQLearning -# from vel.rl.algo.policy_gradient.a2c import A2CPolicyGradient -# from vel.rl.algo.policy_gradient.ppo import PpoPolicyGradient -# from vel.rl.algo.policy_gradient.trpo import TrpoPolicyGradient -# from vel.rl.algo.policy_gradient.acer import AcerPolicyGradient -# from vel.rl.algo.policy_gradient.ddpg import DeepDeterministicPolicyGradient -# -# from vel.api.info import TrainingInfo, EpochInfo -# -# -# CPU_DEVICE = torch.device('cpu') -# -# -# def test_a2c_breakout(): -# """ -# Simple 1 iteration of a2c breakout -# """ -# seed = 1001 -# -# # Set random seed in python std lib, numpy and pytorch -# set_seed(seed) -# -# # Create 16 environments evaluated in parallel in sub processess with all usual DeepMind wrappers -# # These are just helper functions for that -# vec_env = SubprocVecEnvWrapper( -# ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 -# ).instantiate(parallel_envs=16, seed=seed) -# -# # Again, use a helper to create a model -# # But because model is owned by the reinforcer, model should not be accessed using this variable -# # but from reinforcer.model property -# policy = StochasticPolicyFactory( -# input_block=ImageToTensorFactory(), -# backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) -# ).instantiate(action_space=vec_env.action_space) -# -# # Reinforcer - an object managing the learning process -# reinforcer = OnPolicyIterationReinforcer( -# device=CPU_DEVICE, -# settings=OnPolicyIterationReinforcerSettings( -# batch_size=256, -# number_of_steps=5 -# ), -# policy=policy, -# algo=A2CPolicyGradient( -# entropy_coefficient=0.01, -# value_coefficient=0.5, -# discount_factor=0.99, -# max_grad_norm=0.5 -# ), -# env_roller=StepEnvRoller( -# environment=vec_env, -# policy=policy, -# device=CPU_DEVICE -# ) -# ) -# -# # Model optimizer -# optimizer = optim.RMSprop(reinforcer.policy.parameters(), lr=7.0e-4, eps=1e-3) -# -# # Overall information store for training information -# training_info = TrainingInfo( -# metrics=[ -# EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode -# ], -# callbacks=[] # Print live metrics every epoch to standard output -# ) -# -# # A bit of training initialization bookkeeping... -# training_info.initialize() -# reinforcer.initialize_training(training_info) -# training_info.on_train_begin() -# -# # Let's make 100 batches per epoch to average metrics nicely -# num_epochs = 1 -# -# # Normal handrolled training loop -# for i in range(1, num_epochs+1): -# epoch_info = EpochInfo( -# training_info=training_info, -# global_epoch_idx=i, -# batches_per_epoch=1, -# optimizer=optimizer -# ) -# -# reinforcer.train_epoch(epoch_info, interactive=False) -# -# training_info.on_train_end() -# -# -# def test_ppo_breakout(): -# """ -# Simple 1 iteration of ppo breakout -# """ -# device = torch.device('cpu') -# seed = 1001 -# -# # Set random seed in python std lib, numpy and pytorch -# set_seed(seed) -# -# # Create 16 environments evaluated in parallel in sub processess with all usual DeepMind wrappers -# # These are just helper functions for that -# vec_env = SubprocVecEnvWrapper( -# ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 -# ).instantiate(parallel_envs=8, seed=seed) -# -# # Again, use a helper to create a model -# # But because model is owned by the reinforcer, model should not be accessed using this variable -# # but from reinforcer.model property -# policy = StochasticPolicyFactory( -# input_block=ImageToTensorFactory(), -# backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) -# ).instantiate(action_space=vec_env.action_space) -# -# # Reinforcer - an object managing the learning process -# reinforcer = OnPolicyIterationReinforcer( -# device=device, -# settings=OnPolicyIterationReinforcerSettings( -# number_of_steps=12, -# batch_size=4, -# experience_replay=2, -# ), -# policy=policy, -# algo=PpoPolicyGradient( -# entropy_coefficient=0.01, -# value_coefficient=0.5, -# max_grad_norm=0.5, -# cliprange=LinearSchedule(0.1, 0.0), -# discount_factor=0.99, -# normalize_advantage=True -# ), -# env_roller=StepEnvRoller( -# environment=vec_env, -# policy=policy, -# device=device, -# ) -# ) -# -# # Model optimizer -# # optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=7.0e-4, eps=1e-3) -# optimizer = optim.Adam(reinforcer.policy.parameters(), lr=2.5e-4, eps=1e-5) -# -# # Overall information store for training information -# training_info = TrainingInfo( -# metrics=[ -# EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode -# ], -# callbacks=[ -# FrameTracker(100_000) -# ] # Print live metrics every epoch to standard output -# ) -# -# # A bit of training initialization bookkeeping... -# training_info.initialize() -# reinforcer.initialize_training(training_info) -# training_info.on_train_begin() -# -# # Let's make 100 batches per epoch to average metrics nicely -# num_epochs = 1 -# -# # Normal handrolled training loop -# for i in range(1, num_epochs+1): -# epoch_info = EpochInfo( -# training_info=training_info, -# global_epoch_idx=i, -# batches_per_epoch=1, -# optimizer=optimizer -# ) -# -# reinforcer.train_epoch(epoch_info, interactive=False) -# -# training_info.on_train_end() - - -# def test_dqn_breakout(): -# """ -# Simple 1 iteration of DQN breakout -# """ -# device = torch.device('cpu') -# seed = 1001 -# -# # Set random seed in python std lib, numpy and pytorch -# set_seed(seed) -# -# # Only single environment for DQN -# vec_env = DummyVecEnvWrapper( -# ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 -# ).instantiate(parallel_envs=1, seed=seed) -# -# # Again, use a helper to create a model -# # But because model is owned by the reinforcer, model should not be accessed using this variable -# # but from reinforcer.model property -# model_factory = QModelFactory( -# input_block=ImageToTensorFactory(), -# backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) -# ) -# -# # Reinforcer - an object managing the learning process -# reinforcer = BufferedOffPolicyIterationReinforcer( -# device=device, -# settings=BufferedOffPolicyIterationReinforcerSettings( -# rollout_steps=4, -# training_steps=1, -# ), -# environment=vec_env, -# algo=DeepQLearning( -# model_factory=model_factory, -# double_dqn=False, -# target_update_frequency=10_000, -# discount_factor=0.99, -# max_grad_norm=0.5 -# ), -# model=model_factory.instantiate(action_space=vec_env.action_space), -# env_roller=TransitionReplayEnvRoller( -# environment=vec_env, -# device=device, -# replay_buffer=CircularReplayBuffer( -# buffer_capacity=100, -# buffer_initial_size=100, -# num_envs=vec_env.num_envs, -# observation_space=vec_env.observation_space, -# action_space=vec_env.action_space, -# frame_stack_compensation=True, -# frame_history=4 -# ), -# action_noise=EpsGreedy( -# epsilon=LinearAndConstantSchedule( -# initial_value=1.0, final_value=0.1, end_of_interpolation=0.1 -# ), -# environment=vec_env -# ) -# ) -# ) -# -# # Model optimizer -# optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=2.5e-4, alpha=0.95, momentum=0.95, eps=1e-3) -# -# # Overall information store for training information -# training_info = TrainingInfo( -# metrics=[ -# EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode -# ], -# callbacks=[ -# FrameTracker(100_000) -# ] # Print live metrics every epoch to standard output -# ) -# -# # A bit of training initialization bookkeeping... -# training_info.initialize() -# reinforcer.initialize_training(training_info) -# training_info.on_train_begin() -# -# # Let's make 100 batches per epoch to average metrics nicely -# num_epochs = 1 -# -# # Normal handrolled training loop -# for i in range(1, num_epochs+1): -# epoch_info = EpochInfo( -# training_info=training_info, -# global_epoch_idx=i, -# batches_per_epoch=1, -# optimizer=optimizer -# ) -# -# reinforcer.train_epoch(epoch_info, interactive=False) -# -# training_info.on_train_end() -# -# -# def test_prioritized_dqn_breakout(): -# """ -# Simple 1 iteration of DQN prioritized replay breakout -# """ -# device = torch.device('cpu') -# seed = 1001 -# -# # Set random seed in python std lib, numpy and pytorch -# set_seed(seed) -# -# # Only single environment for DQN -# vec_env = DummyVecEnvWrapper( -# ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 -# ).instantiate(parallel_envs=1, seed=seed) -# -# # Again, use a helper to create a model -# # But because model is owned by the reinforcer, model should not be accessed using this variable -# # but from reinforcer.model property -# model_factory = QModelFactory( -# input_block=ImageToTensorFactory(), -# backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) -# ) -# -# # Reinforcer - an object managing the learning process -# reinforcer = BufferedOffPolicyIterationReinforcer( -# device=device, -# settings=BufferedOffPolicyIterationReinforcerSettings( -# rollout_steps=4, -# training_steps=1, -# ), -# environment=vec_env, -# algo=DeepQLearning( -# model_factory=model_factory, -# double_dqn=False, -# target_update_frequency=10_000, -# discount_factor=0.99, -# max_grad_norm=0.5 -# ), -# model=model_factory.instantiate(action_space=vec_env.action_space), -# env_roller=TransitionReplayEnvRoller( -# environment=vec_env, -# device=device, -# replay_buffer=PrioritizedCircularReplayBuffer( -# buffer_capacity=100, -# buffer_initial_size=100, -# num_envs=vec_env.num_envs, -# observation_space=vec_env.observation_space, -# action_space=vec_env.action_space, -# priority_exponent=0.6, -# priority_weight=LinearSchedule( -# initial_value=0.4, -# final_value=1.0 -# ), -# priority_epsilon=1.0e-6, -# frame_stack_compensation=True, -# frame_history=4 -# ), -# action_noise=EpsGreedy( -# epsilon=LinearAndConstantSchedule( -# initial_value=1.0, final_value=0.1, end_of_interpolation=0.1 -# ), -# environment=vec_env -# ) -# ) -# ) -# -# # Model optimizer -# optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=2.5e-4, alpha=0.95, momentum=0.95, eps=1e-3) -# -# # Overall information store for training information -# training_info = TrainingInfo( -# metrics=[ -# EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode -# ], -# callbacks=[ -# FrameTracker(100_000) -# ] # Print live metrics every epoch to standard output -# ) -# -# # A bit of training initialization bookkeeping... -# training_info.initialize() -# reinforcer.initialize_training(training_info) -# training_info.on_train_begin() -# -# # Let's make 100 batches per epoch to average metrics nicely -# num_epochs = 1 -# -# # Normal handrolled training loop -# for i in range(1, num_epochs+1): -# epoch_info = EpochInfo( -# training_info=training_info, -# global_epoch_idx=i, -# batches_per_epoch=1, -# optimizer=optimizer -# ) -# -# reinforcer.train_epoch(epoch_info, interactive=False) -# -# training_info.on_train_end() -# -# -# def test_ddpg_bipedal_walker(): -# """ -# 1 iteration of DDPG bipedal walker environment -# """ -# device = torch.device('cpu') -# seed = 1001 -# -# # Set random seed in python std lib, numpy and pytorch -# set_seed(seed) -# -# # Only single environment for DDPG -# -# vec_env = DummyVecEnvWrapper( -# MujocoEnv('BipedalWalker-v2') -# ).instantiate(parallel_envs=1, seed=seed) -# -# # Again, use a helper to create a model -# # But because model is owned by the reinforcer, model should not be accessed using this variable -# # but from reinforcer.model property -# model_factory = DeterministicPolicyModelFactory( -# input_block=NormalizeObservationsFactory(input_shape=24), -# policy_backbone=MLPFactory(input_length=24, hidden_layers=[64, 64], normalization='layer'), -# value_backbone=MLPFactory(input_length=28, hidden_layers=[64, 64], normalization='layer') -# ) -# -# # Reinforcer - an object managing the learning process -# reinforcer = BufferedOffPolicyIterationReinforcer( -# device=device, -# settings=BufferedOffPolicyIterationReinforcerSettings( -# rollout_steps=4, -# training_steps=1, -# ), -# environment=vec_env, -# algo=DeepDeterministicPolicyGradient( -# model_factory=model_factory, -# tau=0.01, -# discount_factor=0.99, -# max_grad_norm=0.5 -# ), -# model=model_factory.instantiate(action_space=vec_env.action_space), -# env_roller=TransitionReplayEnvRoller( -# environment=vec_env, -# device=device, -# action_noise=OuNoise(std_dev=0.2, environment=vec_env), -# replay_buffer=CircularReplayBuffer( -# buffer_capacity=100, -# buffer_initial_size=100, -# num_envs=vec_env.num_envs, -# observation_space=vec_env.observation_space, -# action_space=vec_env.action_space -# ), -# normalize_returns=True, -# discount_factor=0.99 -# ), -# ) -# -# # Model optimizer -# optimizer = optim.Adam(reinforcer.model.parameters(), lr=2.5e-4, eps=1e-4) -# -# # Overall information store for training information -# training_info = TrainingInfo( -# metrics=[ -# EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode -# ], -# callbacks=[ -# FrameTracker(100_000) -# ] # Print live metrics every epoch to standard output -# ) -# -# # A bit of training initialization bookkeeping... -# training_info.initialize() -# reinforcer.initialize_training(training_info) -# training_info.on_train_begin() -# -# # Let's make 100 batches per epoch to average metrics nicely -# num_epochs = 1 -# -# # Normal handrolled training loop -# for i in range(1, num_epochs+1): -# epoch_info = EpochInfo( -# training_info=training_info, -# global_epoch_idx=i, -# batches_per_epoch=1, -# optimizer=optimizer -# ) -# -# reinforcer.train_epoch(epoch_info, interactive=False) -# -# training_info.on_train_end() -# -# -# def test_trpo_bipedal_walker(): -# """ -# 1 iteration of TRPO on bipedal walker -# """ -# device = torch.device('cpu') -# seed = 1001 -# -# # Set random seed in python std lib, numpy and pytorch -# set_seed(seed) -# -# vec_env = DummyVecEnvWrapper( -# MujocoEnv('BipedalWalker-v2', normalize_returns=True), -# ).instantiate(parallel_envs=8, seed=seed) -# -# # Again, use a helper to create a model -# # But because model is owned by the reinforcer, model should not be accessed using this variable -# # but from reinforcer.model property -# model_factory = StochasticPolicyModelSeparateFactory( -# input_block=NormalizeObservationsFactory(input_shape=24), -# policy_backbone=MLPFactory(input_length=24, hidden_layers=[32, 32]), -# value_backbone=MLPFactory(input_length=24, hidden_layers=[32]) -# ) -# -# # Reinforcer - an object managing the learning process -# reinforcer = OnPolicyIterationReinforcer( -# device=device, -# settings=OnPolicyIterationReinforcerSettings( -# number_of_steps=12, -# ), -# model=model_factory.instantiate(action_space=vec_env.action_space), -# algo=TrpoPolicyGradient( -# max_kl=0.01, -# cg_iters=10, -# line_search_iters=10, -# improvement_acceptance_ratio=0.1, -# cg_damping=0.1, -# vf_iters=5, -# entropy_coef=0.0, -# discount_factor=0.99, -# max_grad_norm=0.5, -# gae_lambda=1.0 -# ), -# env_roller=StepEnvRoller( -# environment=vec_env, -# device=device, -# ) -# ) -# -# # Model optimizer -# optimizer = optim.Adam(reinforcer.model.parameters(), lr=1.0e-3, eps=1e-4) -# -# # Overall information store for training information -# training_info = TrainingInfo( -# metrics=[ -# EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode -# ], -# callbacks=[ -# FrameTracker(100_000) -# ] # Print live metrics every epoch to standard output -# ) -# -# # A bit of training initialization bookkeeping... -# training_info.initialize() -# reinforcer.initialize_training(training_info) -# training_info.on_train_begin() -# -# # Let's make 100 batches per epoch to average metrics nicely -# num_epochs = 1 -# -# # Normal handrolled training loop -# for i in range(1, num_epochs+1): -# epoch_info = EpochInfo( -# training_info=training_info, -# global_epoch_idx=i, -# batches_per_epoch=1, -# optimizer=optimizer -# ) -# -# reinforcer.train_epoch(epoch_info, interactive=False) -# -# training_info.on_train_end() -# -# -# def test_acer_breakout(): -# """ -# 1 iteration of ACER on breakout environment -# """ -# device = torch.device('cpu') -# seed = 1001 -# -# # Set random seed in python std lib, numpy and pytorch -# set_seed(seed) -# -# # Create 16 environments evaluated in parallel in sub processess with all usual DeepMind wrappers -# # These are just helper functions for that -# vec_env = SubprocVecEnvWrapper( -# ClassicAtariEnv('BreakoutNoFrameskip-v4'), frame_history=4 -# ).instantiate(parallel_envs=16, seed=seed) -# -# # Again, use a helper to create a model -# # But because model is owned by the reinforcer, model should not be accessed using this variable -# # but from reinforcer.model property -# model_factory = QStochasticPolicyModelFactory( -# input_block=ImageToTensorFactory(), -# backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4) -# ) -# -# # Reinforcer - an object managing the learning process -# reinforcer = BufferedMixedPolicyIterationReinforcer( -# device=device, -# settings=BufferedMixedPolicyIterationReinforcerSettings( -# experience_replay=2, -# number_of_steps=12, -# stochastic_experience_replay=False -# ), -# model=model_factory.instantiate(action_space=vec_env.action_space), -# env=vec_env, -# algo=AcerPolicyGradient( -# model_factory=model_factory, -# entropy_coefficient=0.01, -# q_coefficient=0.5, -# rho_cap=10.0, -# retrace_rho_cap=1.0, -# trust_region=True, -# trust_region_delta=1.0, -# discount_factor=0.99, -# max_grad_norm=10.0, -# ), -# env_roller=TrajectoryReplayEnvRoller( -# environment=vec_env, -# device=device, -# replay_buffer=CircularReplayBuffer( -# buffer_capacity=100, -# buffer_initial_size=100, -# num_envs=vec_env.num_envs, -# action_space=vec_env.action_space, -# observation_space=vec_env.observation_space, -# frame_stack_compensation=True, -# frame_history=4, -# ) -# ), -# ) -# -# # Model optimizer -# optimizer = optim.RMSprop(reinforcer.model.parameters(), lr=7.0e-4, eps=1e-3, alpha=0.99) -# -# # Overall information store for training information -# training_info = TrainingInfo( -# metrics=[ -# EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode -# ], -# callbacks=[] # Print live metrics every epoch to standard output -# ) -# -# # A bit of training initialization bookkeeping... -# training_info.initialize() -# reinforcer.initialize_training(training_info) -# training_info.on_train_begin() -# -# # Let's make 100 batches per epoch to average metrics nicely -# num_epochs = 1 -# -# # Normal handrolled training loop -# for i in range(1, num_epochs+1): -# epoch_info = EpochInfo( -# training_info=training_info, -# global_epoch_idx=i, -# batches_per_epoch=1, -# optimizer=optimizer -# ) -# -# reinforcer.train_epoch(epoch_info, interactive=False) -# -# training_info.on_train_end() From dfaef137289c51509a8d04851a8a41d287852f77 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 6 Oct 2019 21:40:37 -0700 Subject: [PATCH 124/162] Renaming, Network -> VModule. --- vel/api/__init__.py | 2 +- vel/api/model.py | 4 ++-- vel/api/model_factory.py | 4 ++-- vel/api/{network.py => vmodule.py} | 4 ++-- vel/model/nlp/language_model.py | 6 +++--- vel/module/input/flatten.py | 4 ++-- vel/module/input/image_to_tensor.py | 4 ++-- vel/module/input/normalize_observations.py | 4 ++-- vel/module/input/one_hot_encoding.py | 4 ++-- vel/net/layer_base.py | 4 ++-- vel/net/modular.py | 8 ++++---- vel/rl/module/actor_critic_policy.py | 8 ++++---- vel/rl/module/noise/eps_greedy.py | 4 ++-- vel/rl/module/noise/ou_noise.py | 4 ++-- vel/rl/module/q_policy.py | 6 +++--- vel/rl/module/q_stochastic_policy.py | 6 +++--- vel/rl/module/rainbow_policy.py | 6 +++--- vel/rl/module/stochastic_policy.py | 6 +++--- vel/rl/module/stochastic_rnn_policy.py | 6 +++--- vel/rl/policy/a2c.py | 4 ++-- vel/rl/policy/a2c_rnn.py | 4 ++-- vel/rl/policy/acer.py | 4 ++-- vel/rl/policy/ddpg.py | 4 ++-- vel/rl/policy/dqn.py | 4 ++-- vel/rl/policy/ppo.py | 4 ++-- vel/rl/policy/ppo_rnn.py | 4 ++-- vel/rl/policy/rainbow.py | 4 ++-- vel/rl/policy/trpo.py | 6 +++--- 28 files changed, 66 insertions(+), 66 deletions(-) rename vel/api/{network.py => vmodule.py} (92%) diff --git a/vel/api/__init__.py b/vel/api/__init__.py index 9ddf09d8..ceb95211 100644 --- a/vel/api/__init__.py +++ b/vel/api/__init__.py @@ -1,7 +1,7 @@ from .callback import Callback from .info import BatchInfo, EpochInfo, TrainingInfo from .size_hint import SizeHint, SizeHints -from .network import Network, BackboneNetwork +from .vmodule import VModule, BackboneModule from .model import ( Model, ValidatedModel, OptimizedModel, GradientModel, LossFunctionModel ) diff --git a/vel/api/model.py b/vel/api/model.py index 052bd1c3..ac3fe547 100644 --- a/vel/api/model.py +++ b/vel/api/model.py @@ -6,10 +6,10 @@ from vel.metric.loss_metric import Loss -from .network import Network +from .vmodule import VModule -class Model(Network): +class Model(VModule): """ Class representing full neural network model, generally used to solve some problem """ def metrics(self) -> list: diff --git a/vel/api/model_factory.py b/vel/api/model_factory.py index 0015f006..a79ad097 100644 --- a/vel/api/model_factory.py +++ b/vel/api/model_factory.py @@ -1,11 +1,11 @@ -from .network import Network +from .vmodule import VModule from vel.internal.generic_factory import GenericFactory class ModelFactory: """ Factory class for models """ - def instantiate(self, **extra_args) -> Network: + def instantiate(self, **extra_args) -> VModule: raise NotImplementedError @staticmethod diff --git a/vel/api/network.py b/vel/api/vmodule.py similarity index 92% rename from vel/api/network.py rename to vel/api/vmodule.py index b6c8b97d..1374258e 100644 --- a/vel/api/network.py +++ b/vel/api/vmodule.py @@ -3,7 +3,7 @@ from .size_hint import SizeHints -class Network(nn.Module): +class VModule(nn.Module): """ Vel wrapper over nn.Module offering a few internally useful utilities """ def reset_weights(self): @@ -20,7 +20,7 @@ def zero_state(self, batch_size): return None -class BackboneNetwork(Network): +class BackboneModule(VModule): """ Network, whose output feeds into other models. Needs to provide size hints. """ def size_hints(self) -> SizeHints: diff --git a/vel/model/nlp/language_model.py b/vel/model/nlp/language_model.py index 55b96adb..bd295a3b 100644 --- a/vel/model/nlp/language_model.py +++ b/vel/model/nlp/language_model.py @@ -2,13 +2,13 @@ import torch.nn as nn import torch.nn.functional as F -from vel.api import LossFunctionModel, ModelFactory, Network, BackboneNetwork, SizeHints, SizeHint +from vel.api import LossFunctionModel, ModelFactory, VModule, BackboneModule, SizeHints, SizeHint class LanguageModel(LossFunctionModel): """ Language model - autoregressive generative model for text """ - def __init__(self, alphabet_size: int, net: BackboneNetwork): + def __init__(self, alphabet_size: int, net: BackboneModule): super().__init__() self.net = net @@ -61,7 +61,7 @@ def __init__(self, alphabet_size: int, net_factory: ModelFactory): self.alphabet_size = alphabet_size self.net_factory = net_factory - def instantiate(self, **extra_args) -> Network: + def instantiate(self, **extra_args) -> VModule: size_hint = SizeHints(SizeHint(None, None)) net = self.net_factory.instantiate(alphabet_size=self.alphabet_size, size_hint=size_hint) diff --git a/vel/module/input/flatten.py b/vel/module/input/flatten.py index c69e9f02..9462f689 100644 --- a/vel/module/input/flatten.py +++ b/vel/module/input/flatten.py @@ -1,10 +1,10 @@ from vel.module.layers import Flatten -from vel.api import Network +from vel.api import VModule -class FlattenInput(Network): +class FlattenInput(VModule): """ Sequence input """ def __init__(self): super().__init__() diff --git a/vel/module/input/image_to_tensor.py b/vel/module/input/image_to_tensor.py index b02a3229..3ca95d3c 100644 --- a/vel/module/input/image_to_tensor.py +++ b/vel/module/input/image_to_tensor.py @@ -1,6 +1,6 @@ import torch -from vel.api import Network +from vel.api import VModule def image_to_tensor(image: torch.Tensor) -> torch.Tensor: @@ -15,7 +15,7 @@ def image_to_tensor(image: torch.Tensor) -> torch.Tensor: return result -class ImageToTensor(Network): +class ImageToTensor(VModule): """ Convert simple image to tensor. diff --git a/vel/module/input/normalize_observations.py b/vel/module/input/normalize_observations.py index 52dc8de9..a1965a0e 100644 --- a/vel/module/input/normalize_observations.py +++ b/vel/module/input/normalize_observations.py @@ -1,9 +1,9 @@ import torch -from vel.api import Network +from vel.api import VModule -class NormalizeObservations(Network): +class NormalizeObservations(VModule): """ Normalize a vector of observations """ def __init__(self, input_shape, epsilon=1e-6): diff --git a/vel/module/input/one_hot_encoding.py b/vel/module/input/one_hot_encoding.py index 15f4e961..6cc83b74 100644 --- a/vel/module/input/one_hot_encoding.py +++ b/vel/module/input/one_hot_encoding.py @@ -1,8 +1,8 @@ -from vel.api import Network +from vel.api import VModule from vel.module.layers import OneHotEncode -class OneHotEncodingInput(Network): +class OneHotEncodingInput(VModule): """ One-hot encoding input layer """ def __init__(self, alphabet_size: int): diff --git a/vel/net/layer_base.py b/vel/net/layer_base.py index d1be235f..1738e694 100644 --- a/vel/net/layer_base.py +++ b/vel/net/layer_base.py @@ -1,7 +1,7 @@ -from vel.api import BackboneNetwork, SizeHints +from vel.api import BackboneModule, SizeHints -class Layer(BackboneNetwork): +class Layer(BackboneModule): def __init__(self, name: str): super().__init__() self.name = name diff --git a/vel/net/modular.py b/vel/net/modular.py index 517d2c2c..d448d628 100644 --- a/vel/net/modular.py +++ b/vel/net/modular.py @@ -2,7 +2,7 @@ import torch.nn as nn -from vel.api import BackboneNetwork, ModelFactory, SizeHints +from vel.api import BackboneModule, ModelFactory, SizeHints from vel.util.tensor_util import to_device from .layer_base import LayerFactory @@ -47,7 +47,7 @@ def instantiate_layers(layers: [LayerFactory], size_hint: SizeHints, extra_args: return ModularSequential(module_dict) -class ModularNetwork(BackboneNetwork): +class ModularNetwork(BackboneModule): """ Network that is built from layers """ def __init__(self, layers: nn.Module): @@ -82,7 +82,7 @@ def forward(self, input_data, state=None): return self.layers(input_data, context=context) -class StatefulModularNetwork(BackboneNetwork): +class StatefulModularNetwork(BackboneModule): """ Modular network handling the state between the episodes """ def __init__(self, layers: nn.Module): @@ -142,7 +142,7 @@ class ModularNetworkFactory(ModelFactory): def __init__(self, layers: [LayerFactory]): self.layers = layers - def instantiate(self, size_hint=None, **extra_args) -> BackboneNetwork: + def instantiate(self, size_hint=None, **extra_args) -> BackboneModule: """ Create either stateful or not modular network instance """ if size_hint is None: size_hint = SizeHints() diff --git a/vel/rl/module/actor_critic_policy.py b/vel/rl/module/actor_critic_policy.py index dc6ae23f..bbfc45de 100644 --- a/vel/rl/module/actor_critic_policy.py +++ b/vel/rl/module/actor_critic_policy.py @@ -3,18 +3,18 @@ import gym import torch -from vel.api import Network, BackboneNetwork +from vel.api import VModule, BackboneModule from vel.rl.module.head.deterministic_action_head import DeterministicActionHead from vel.rl.module.head.deterministic_critic_head import DeterministicCriticHead -class ActorCriticPolicy(Network): +class ActorCriticPolicy(VModule): """ Deterministic Policy Gradient - model """ - def __init__(self, input_net: BackboneNetwork, policy_net: BackboneNetwork, - value_net: BackboneNetwork, action_space: gym.Space): + def __init__(self, input_net: BackboneModule, policy_net: BackboneModule, + value_net: BackboneModule, action_space: gym.Space): super().__init__() self.input_net = input_net diff --git a/vel/rl/module/noise/eps_greedy.py b/vel/rl/module/noise/eps_greedy.py index becf13e1..328c140e 100644 --- a/vel/rl/module/noise/eps_greedy.py +++ b/vel/rl/module/noise/eps_greedy.py @@ -3,12 +3,12 @@ import torch -from vel.api import Schedule, Network +from vel.api import Schedule, VModule from vel.internal.generic_factory import GenericFactory from vel.function.constant import ConstantSchedule -class EpsGreedy(Network): +class EpsGreedy(VModule): """ Epsilon-greedy action selection """ def __init__(self, action_space: gym.Space): super().__init__() diff --git a/vel/rl/module/noise/ou_noise.py b/vel/rl/module/noise/ou_noise.py index 721b8772..10c154f0 100644 --- a/vel/rl/module/noise/ou_noise.py +++ b/vel/rl/module/noise/ou_noise.py @@ -2,12 +2,12 @@ import numpy as np import torch -from vel.api import Network +from vel.api import VModule from vel.util.process import OrnsteinUhlenbeckNoiseProcess from vel.internal.generic_factory import GenericFactory -class OuNoise(Network): +class OuNoise(VModule): """ Ornstein–Uhlenbeck noise process for action noise """ def __init__(self, std_dev: float, action_space: gym.Space): diff --git a/vel/rl/module/q_policy.py b/vel/rl/module/q_policy.py index 7e6e8925..6c18a235 100644 --- a/vel/rl/module/q_policy.py +++ b/vel/rl/module/q_policy.py @@ -1,17 +1,17 @@ import gym -from vel.api import Network, BackboneNetwork +from vel.api import VModule, BackboneModule from vel.rl.module.head.q_head import QHead from vel.rl.module.head.q_dueling_head import QDuelingHead -class QPolicy(Network): +class QPolicy(VModule): """ Simple deterministic greedy action-value model. Supports only discrete action spaces (ones that can be enumerated) """ - def __init__(self, net: BackboneNetwork, action_space: gym.Space, dueling_dqn=False): + def __init__(self, net: BackboneModule, action_space: gym.Space, dueling_dqn=False): super().__init__() self.dueling_dqn = dueling_dqn diff --git a/vel/rl/module/q_stochastic_policy.py b/vel/rl/module/q_stochastic_policy.py index d98b1ac3..29f12107 100644 --- a/vel/rl/module/q_stochastic_policy.py +++ b/vel/rl/module/q_stochastic_policy.py @@ -1,17 +1,17 @@ import gym -from vel.api import BackboneNetwork, Network +from vel.api import BackboneModule, VModule from vel.rl.module.head.stochastic_action_head import make_stockastic_action_head from vel.rl.module.head.q_head import QHead -class QStochasticPolicy(Network): +class QStochasticPolicy(VModule): """ A policy model with an action-value critic head (instead of more common state-value critic head). Supports only discrete action spaces (ones that can be enumerated) """ - def __init__(self, net: BackboneNetwork, action_space: gym.Space): + def __init__(self, net: BackboneModule, action_space: gym.Space): super().__init__() assert isinstance(action_space, gym.spaces.Discrete) diff --git a/vel/rl/module/rainbow_policy.py b/vel/rl/module/rainbow_policy.py index 8e709758..2b4fe914 100644 --- a/vel/rl/module/rainbow_policy.py +++ b/vel/rl/module/rainbow_policy.py @@ -1,11 +1,11 @@ import gym import torch -from vel.api import Network, BackboneNetwork +from vel.api import VModule, BackboneModule from vel.rl.module.head.q_distributional_noisy_dueling_head import QDistributionalNoisyDuelingHead -class RainbowPolicy(Network): +class RainbowPolicy(VModule): """ A deterministic greedy action-value model. Includes following commonly known modifications: @@ -14,7 +14,7 @@ class RainbowPolicy(Network): - Noisy Nets """ - def __init__(self, net: BackboneNetwork, action_space: gym.Space, vmin: float, vmax: float, + def __init__(self, net: BackboneModule, action_space: gym.Space, vmin: float, vmax: float, atoms: int = 1, initial_std_dev: float = 0.4, factorized_noise: bool = True): super().__init__() diff --git a/vel/rl/module/stochastic_policy.py b/vel/rl/module/stochastic_policy.py index 47a22e3f..d11ac0a6 100644 --- a/vel/rl/module/stochastic_policy.py +++ b/vel/rl/module/stochastic_policy.py @@ -1,17 +1,17 @@ import gym -from vel.api import Network, BackboneNetwork +from vel.api import VModule, BackboneModule from vel.rl.module.head.stochastic_action_head import make_stockastic_action_head from vel.rl.module.head.value_head import ValueHead -class StochasticPolicy(Network): +class StochasticPolicy(VModule): """ Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone """ - def __init__(self, net: BackboneNetwork, action_space: gym.Space): + def __init__(self, net: BackboneModule, action_space: gym.Space): super().__init__() self.net = net diff --git a/vel/rl/module/stochastic_rnn_policy.py b/vel/rl/module/stochastic_rnn_policy.py index ae2e17b2..94410c63 100644 --- a/vel/rl/module/stochastic_rnn_policy.py +++ b/vel/rl/module/stochastic_rnn_policy.py @@ -1,19 +1,19 @@ import gym -from vel.api import Network, BackboneNetwork +from vel.api import VModule, BackboneModule from vel.rl.module.head.stochastic_action_head import make_stockastic_action_head from vel.rl.module.head.value_head import ValueHead from vel.util.tensor_util import to_device -class StochasticRnnPolicy(Network): +class StochasticRnnPolicy(VModule): """ Most generic policy gradient model class with a set of common actor-critic heads that share a single backbone RNN version """ - def __init__(self, net: BackboneNetwork, action_space: gym.Space): + def __init__(self, net: BackboneModule, action_space: gym.Space): super().__init__() self.net = net diff --git a/vel/rl/policy/a2c.py b/vel/rl/policy/a2c.py index 68589fdd..8a96c277 100644 --- a/vel/rl/policy/a2c.py +++ b/vel/rl/policy/a2c.py @@ -5,7 +5,7 @@ from vel.metric.base import AveragingNamedMetric from vel.util.situational import gym_space_to_size_hint from vel.util.stats import explained_variance -from vel.api import ModelFactory, BatchInfo, BackboneNetwork +from vel.api import ModelFactory, BatchInfo, BackboneModule from vel.rl.api import RlPolicy, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae @@ -14,7 +14,7 @@ class A2C(RlPolicy): """ Simplest policy gradient - calculate loss as an advantage of an actor versus value function """ - def __init__(self, net: BackboneNetwork, action_space: gym.Space, + def __init__(self, net: BackboneModule, action_space: gym.Space, entropy_coefficient, value_coefficient, discount_factor: float, gae_lambda=1.0): super().__init__(discount_factor) diff --git a/vel/rl/policy/a2c_rnn.py b/vel/rl/policy/a2c_rnn.py index 84b4ef50..8b3a6654 100644 --- a/vel/rl/policy/a2c_rnn.py +++ b/vel/rl/policy/a2c_rnn.py @@ -2,7 +2,7 @@ import torch import torch.nn.functional as F -from vel.api import ModelFactory, BatchInfo, BackboneNetwork +from vel.api import ModelFactory, BatchInfo, BackboneModule from vel.metric.base import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae @@ -13,7 +13,7 @@ class A2CRnn(RlPolicy): """ Simplest policy gradient - calculate loss as an advantage of an actor versus value function """ - def __init__(self, net: BackboneNetwork, action_space: gym.Space, + def __init__(self, net: BackboneModule, action_space: gym.Space, entropy_coefficient, value_coefficient, discount_factor: float, gae_lambda=1.0): super().__init__(discount_factor) diff --git a/vel/rl/policy/acer.py b/vel/rl/policy/acer.py index 3d30f733..dbed10ce 100644 --- a/vel/rl/policy/acer.py +++ b/vel/rl/policy/acer.py @@ -3,7 +3,7 @@ import torch import torch.nn.functional as F -from vel.api import BackboneNetwork, ModelFactory, BatchInfo, OptimizerFactory, VelOptimizer +from vel.api import BackboneModule, ModelFactory, BatchInfo, OptimizerFactory, VelOptimizer from vel.metric.base import AveragingNamedMetric from vel.rl.api import Trajectories, RlPolicy, Rollout from vel.rl.module.q_stochastic_policy import QStochasticPolicy @@ -18,7 +18,7 @@ def select_indices(tensor, indices): class ACER(RlPolicy): """ Actor-Critic with Experience Replay - policy gradient calculations """ - def __init__(self, net: BackboneNetwork, target_net: typing.Optional[BackboneNetwork], action_space: gym.Space, + def __init__(self, net: BackboneModule, target_net: typing.Optional[BackboneModule], action_space: gym.Space, discount_factor: float, trust_region: bool = True, entropy_coefficient: float = 0.01, q_coefficient: float = 0.5, rho_cap: float = 10.0, retrace_rho_cap: float = 1.0, average_model_alpha: float = 0.99, trust_region_delta: float = 1.0): diff --git a/vel/rl/policy/ddpg.py b/vel/rl/policy/ddpg.py index 0e011ecb..915fdc3e 100644 --- a/vel/rl/policy/ddpg.py +++ b/vel/rl/policy/ddpg.py @@ -8,7 +8,7 @@ import vel.util.module_util as mu -from vel.api import BackboneNetwork, BatchInfo, ModelFactory, OptimizerFactory, VelOptimizer, SizeHints +from vel.api import BackboneModule, BatchInfo, ModelFactory, OptimizerFactory, VelOptimizer, SizeHints from vel.metric.base import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout from vel.rl.module.actor_critic_policy import ActorCriticPolicy @@ -19,7 +19,7 @@ class DDPG(RlPolicy): """ Deep Deterministic Policy Gradient (DDPG) - policy gradient calculations """ - def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_space: gym.Space, + def __init__(self, net: BackboneModule, target_net: BackboneModule, action_space: gym.Space, discount_factor: float, tau: float, noise_std_dev: float): super().__init__(discount_factor) diff --git a/vel/rl/policy/dqn.py b/vel/rl/policy/dqn.py index 9bc8dd64..c93bf013 100644 --- a/vel/rl/policy/dqn.py +++ b/vel/rl/policy/dqn.py @@ -6,7 +6,7 @@ import torch.nn.functional as F import torch.nn.utils -from vel.api import ModelFactory, BackboneNetwork, BatchInfo, Schedule, OptimizerFactory, VelOptimizer +from vel.api import ModelFactory, BackboneModule, BatchInfo, Schedule, OptimizerFactory, VelOptimizer from vel.function.constant import ConstantSchedule from vel.metric import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout @@ -18,7 +18,7 @@ class DQN(RlPolicy): """ Deep Q-Learning algorithm """ - def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_space: gym.Space, + def __init__(self, net: BackboneModule, target_net: BackboneModule, action_space: gym.Space, epsilon: typing.Union[float, Schedule], discount_factor: float, double_dqn: bool, dueling_dqn: bool, target_update_frequency: int): super().__init__(discount_factor) diff --git a/vel/rl/policy/ppo.py b/vel/rl/policy/ppo.py index 4e68445b..9ae38ba8 100644 --- a/vel/rl/policy/ppo.py +++ b/vel/rl/policy/ppo.py @@ -3,7 +3,7 @@ import numbers -from vel.api import BatchInfo, ModelFactory, BackboneNetwork +from vel.api import BatchInfo, ModelFactory, BackboneModule from vel.util.situational import gym_space_to_size_hint from vel.util.stats import explained_variance from vel.function.constant import ConstantSchedule @@ -16,7 +16,7 @@ class PPO(RlPolicy): """ Proximal Policy Optimization - https://arxiv.org/abs/1707.06347 """ - def __init__(self, net: BackboneNetwork, action_space: gym.Space, + def __init__(self, net: BackboneModule, action_space: gym.Space, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, normalize_advantage: bool = True, gae_lambda: float = 1.0): super().__init__(discount_factor) diff --git a/vel/rl/policy/ppo_rnn.py b/vel/rl/policy/ppo_rnn.py index fc28e2f1..0ac60339 100644 --- a/vel/rl/policy/ppo_rnn.py +++ b/vel/rl/policy/ppo_rnn.py @@ -3,7 +3,7 @@ import gym import torch -from vel.api import BatchInfo, ModelFactory, BackboneNetwork +from vel.api import BatchInfo, ModelFactory, BackboneModule from vel.function.constant import ConstantSchedule from vel.metric.base import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout, Trajectories @@ -15,7 +15,7 @@ class PPORnn(RlPolicy): """ Proximal Policy Optimization - https://arxiv.org/abs/1707.06347 """ - def __init__(self, net: BackboneNetwork, action_space: gym.Space, + def __init__(self, net: BackboneModule, action_space: gym.Space, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, normalize_advantage: bool = True, gae_lambda: float = 1.0): super().__init__(discount_factor) diff --git a/vel/rl/policy/rainbow.py b/vel/rl/policy/rainbow.py index f8693131..ed9be961 100644 --- a/vel/rl/policy/rainbow.py +++ b/vel/rl/policy/rainbow.py @@ -2,7 +2,7 @@ import torch import torch.nn.utils -from vel.api import ModelFactory, BackboneNetwork, BatchInfo, OptimizerFactory, VelOptimizer +from vel.api import ModelFactory, BackboneModule, BatchInfo, OptimizerFactory, VelOptimizer from vel.metric import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout from vel.rl.module.rainbow_policy import RainbowPolicy @@ -12,7 +12,7 @@ class Rainbow(RlPolicy): """ Deep Q-Learning algorithm """ - def __init__(self, net: BackboneNetwork, target_net: BackboneNetwork, action_space: gym.Space, + def __init__(self, net: BackboneModule, target_net: BackboneModule, action_space: gym.Space, discount_factor: float, target_update_frequency: int, vmin: float, vmax: float, atoms: int = 1, initial_std_dev: float = 0.4, factorized_noise: bool = True): super().__init__(discount_factor) diff --git a/vel/rl/policy/trpo.py b/vel/rl/policy/trpo.py index 0e4cba3a..c1fae215 100644 --- a/vel/rl/policy/trpo.py +++ b/vel/rl/policy/trpo.py @@ -8,7 +8,7 @@ import torch.nn.functional as F import torch.nn.utils -from vel.api import BatchInfo, VelOptimizer, OptimizerFactory, ModelFactory, BackboneNetwork +from vel.api import BatchInfo, VelOptimizer, OptimizerFactory, ModelFactory, BackboneModule from vel.util.stats import explained_variance from vel.metric.base import AveragingNamedMetric @@ -59,10 +59,10 @@ def conjugate_gradient_method(matrix_vector_operator, loss_gradient, nsteps, rdo class TRPO(RlPolicy): """ Trust Region Policy Optimization - https://arxiv.org/abs/1502.05477 """ - def __init__(self, policy_net: BackboneNetwork, value_net: BackboneNetwork, action_space: gym.Space, + def __init__(self, policy_net: BackboneModule, value_net: BackboneModule, action_space: gym.Space, max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, discount_factor, gae_lambda, improvement_acceptance_ratio, - input_net: typing.Optional[BackboneNetwork] = None, + input_net: typing.Optional[BackboneModule] = None, ): super().__init__(discount_factor) From 0c3bd10f2a6a61acbb2ebc61b572060c2371d7a7 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 6 Oct 2019 21:42:34 -0700 Subject: [PATCH 125/162] Renaming ModelFactory -> ModuleFactory. --- vel/api/__init__.py | 2 +- vel/api/model_factory.py | 8 ++++---- vel/command/phase_train_command.py | 2 +- vel/command/train_command.py | 2 +- vel/model/autoencoder/cnn_autoencoder.py | 4 ++-- vel/model/gan/simple_gan.py | 4 ++-- vel/model/imagenet/resnet34.py | 4 ++-- vel/model/latent/cnn_iwae.py | 4 ++-- vel/model/latent/cnn_vae.py | 4 ++-- vel/model/latent/fc_iwae.py | 4 ++-- vel/model/latent/fc_vae.py | 4 ++-- vel/model/latent/vq_vae.py | 4 ++-- vel/model/nlp/language_model.py | 8 ++++---- .../rnn/multilayer_rnn_sequence_classification.py | 6 +++--- vel/model/rnn/multilayer_rnn_sequence_model.py | 6 +++--- vel/model/vision/cifar10_cnn_01.py | 4 ++-- vel/model/vision/cifar_resnet_v1.py | 4 ++-- vel/model/vision/cifar_resnet_v2.py | 4 ++-- vel/model/vision/cifar_resnext.py | 4 ++-- vel/model/vision/mnist_cnn_01.py | 4 ++-- vel/net/modular.py | 4 ++-- vel/rl/command/enjoy.py | 4 ++-- vel/rl/command/evaluate_env_command.py | 6 +++--- vel/rl/command/record_movie_command.py | 4 ++-- vel/rl/layer/nature_cnn_rnn.py | 4 ++-- vel/rl/policy/a2c.py | 6 +++--- vel/rl/policy/a2c_rnn.py | 6 +++--- vel/rl/policy/acer.py | 4 ++-- vel/rl/policy/ddpg.py | 12 ++++++------ vel/rl/policy/dqn.py | 8 ++++---- vel/rl/policy/ppo.py | 6 +++--- vel/rl/policy/ppo_rnn.py | 6 +++--- vel/rl/policy/rainbow.py | 8 ++++---- vel/rl/policy/trpo.py | 12 ++++++------ .../buffered_mixed_policy_iteration_reinforcer.py | 4 ++-- .../buffered_off_policy_iteration_reinforcer.py | 4 ++-- vel/rl/reinforcer/on_policy_iteration_reinforcer.py | 4 ++-- 37 files changed, 94 insertions(+), 94 deletions(-) diff --git a/vel/api/__init__.py b/vel/api/__init__.py index ceb95211..4df70bd3 100644 --- a/vel/api/__init__.py +++ b/vel/api/__init__.py @@ -6,7 +6,7 @@ Model, ValidatedModel, OptimizedModel, GradientModel, LossFunctionModel ) from .model_config import ModelConfig -from .model_factory import ModelFactory +from .model_factory import ModuleFactory from .optimizer import OptimizerFactory, VelOptimizer, VelOptimizerProxy from .schedule import Schedule from .scheduler import SchedulerFactory diff --git a/vel/api/model_factory.py b/vel/api/model_factory.py index a79ad097..2920877c 100644 --- a/vel/api/model_factory.py +++ b/vel/api/model_factory.py @@ -1,15 +1,15 @@ -from .vmodule import VModule from vel.internal.generic_factory import GenericFactory +from .vmodule import VModule -class ModelFactory: - """ Factory class for models """ +class ModuleFactory: + """ Factory for modules """ def instantiate(self, **extra_args) -> VModule: raise NotImplementedError @staticmethod - def generic(closure, **kwargs) -> 'ModelFactory': + def generic(closure, **kwargs) -> 'ModuleFactory': """ Return a generic model factory """ # noinspection PyTypeChecker return GenericFactory(closure, kwargs) diff --git a/vel/command/phase_train_command.py b/vel/command/phase_train_command.py index 0fb91197..2670fcac 100644 --- a/vel/command/phase_train_command.py +++ b/vel/command/phase_train_command.py @@ -14,7 +14,7 @@ class PhaseTrainCommand: """ Training command - learn according to a set of phases """ - def __init__(self, model_config: api.ModelConfig, model_factory: api.ModelFactory, loader: data.DatasetLoader, + def __init__(self, model_config: api.ModelConfig, model_factory: api.ModuleFactory, loader: data.DatasetLoader, storage: api.Storage, phases: typing.List[train.TrainPhase], callbacks=None, restart=True): self.model_config = model_config diff --git a/vel/command/train_command.py b/vel/command/train_command.py index a504f9c7..9e94450c 100644 --- a/vel/command/train_command.py +++ b/vel/command/train_command.py @@ -12,7 +12,7 @@ class SimpleTrainCommand: """ Very simple training command - just run the supplied generators """ - def __init__(self, epochs: int, model_config: api.ModelConfig, model_factory: api.ModelFactory, + def __init__(self, epochs: int, model_config: api.ModelConfig, model_factory: api.ModuleFactory, optimizer_factory: api.OptimizerFactory, scheduler_factory: typing.Optional[api.SchedulerFactory], loader: data.DatasetLoader, storage: api.Storage, callbacks: typing.Optional[typing.List[api.Callback]]): diff --git a/vel/model/autoencoder/cnn_autoencoder.py b/vel/model/autoencoder/cnn_autoencoder.py index 0bb3197e..51ca0907 100644 --- a/vel/model/autoencoder/cnn_autoencoder.py +++ b/vel/model/autoencoder/cnn_autoencoder.py @@ -6,7 +6,7 @@ import vel.util.network as net_util -from vel.api import LossFunctionModel, ModelFactory +from vel.api import LossFunctionModel, ModuleFactory from vel.metric.loss_metric import Loss from vel.module.layers import Flatten, Reshape @@ -104,4 +104,4 @@ def instantiate(**_): img_rows, img_cols, img_channels, channels=channels, representation_length=representation_length ) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/model/gan/simple_gan.py b/vel/model/gan/simple_gan.py index 78d516eb..0823888b 100644 --- a/vel/model/gan/simple_gan.py +++ b/vel/model/gan/simple_gan.py @@ -6,7 +6,7 @@ import torch import torch.nn as nn -from vel.api import OptimizedModel, ModelFactory, VelOptimizer, OptimizerFactory +from vel.api import OptimizedModel, ModuleFactory, VelOptimizer, OptimizerFactory from vel.api.optimizer import VelMultiOptimizer from vel.metric import AveragingNamedMetric @@ -159,4 +159,4 @@ def instantiate(**_): img_rows, img_cols, img_channels, latent_dim=latent_dim ) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/model/imagenet/resnet34.py b/vel/model/imagenet/resnet34.py index f9901eba..3537a226 100644 --- a/vel/model/imagenet/resnet34.py +++ b/vel/model/imagenet/resnet34.py @@ -5,7 +5,7 @@ import vel.module.layers as layers import vel.util.module_util as mu -from vel.api import LossFunctionModel, ModelFactory, OptimizerFactory, VelOptimizer +from vel.api import LossFunctionModel, ModuleFactory, OptimizerFactory, VelOptimizer # Because of concat pooling it's 2x 512 @@ -108,4 +108,4 @@ def create(fc_layers=None, dropout=None, pretrained=True): def instantiate(**_): return Resnet34(fc_layers, dropout, pretrained) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/model/latent/cnn_iwae.py b/vel/model/latent/cnn_iwae.py index a6ee1d7f..6827b0af 100644 --- a/vel/model/latent/cnn_iwae.py +++ b/vel/model/latent/cnn_iwae.py @@ -8,7 +8,7 @@ import vel.util.network as net_util -from vel.api import ModelFactory +from vel.api import ModuleFactory from vel.module.layers import Flatten, Reshape from vel.model.latent.iwae import IWAE @@ -155,4 +155,4 @@ def instantiate(**_): img_rows, img_cols, img_channels, k=k, channels=channels, representation_length=representation_length ) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/model/latent/cnn_vae.py b/vel/model/latent/cnn_vae.py index 491cdb70..71582449 100644 --- a/vel/model/latent/cnn_vae.py +++ b/vel/model/latent/cnn_vae.py @@ -8,7 +8,7 @@ import vel.util.network as net_util -from vel.api import ModelFactory +from vel.api import ModuleFactory from vel.module.layers import Flatten, Reshape from vel.model.latent.vae_base import VaeBase @@ -155,4 +155,4 @@ def instantiate(**_): img_rows, img_cols, img_channels, channels=channels, representation_length=representation_length ) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/model/latent/fc_iwae.py b/vel/model/latent/fc_iwae.py index 7e7a44da..43e47d5e 100644 --- a/vel/model/latent/fc_iwae.py +++ b/vel/model/latent/fc_iwae.py @@ -4,7 +4,7 @@ import torch.nn.functional as F import torch.nn.init as init -from vel.api import ModelFactory +from vel.api import ModuleFactory from vel.module.layers import Flatten, Reshape from vel.model.latent.iwae import IWAE @@ -102,4 +102,4 @@ def instantiate(**_): analytical_kl_div=analytical_kl_div ) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/model/latent/fc_vae.py b/vel/model/latent/fc_vae.py index fbad9e29..4ae9323a 100644 --- a/vel/model/latent/fc_vae.py +++ b/vel/model/latent/fc_vae.py @@ -4,7 +4,7 @@ import torch.nn.functional as F import torch.nn.init as init -from vel.api import ModelFactory +from vel.api import ModuleFactory from vel.module.layers import Flatten, Reshape from vel.model.latent.vae_base import VaeBase @@ -103,4 +103,4 @@ def instantiate(**_): analytical_kl_div=analytical_kl_div ) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/model/latent/vq_vae.py b/vel/model/latent/vq_vae.py index 5f3d4677..f28dbf50 100644 --- a/vel/model/latent/vq_vae.py +++ b/vel/model/latent/vq_vae.py @@ -293,7 +293,7 @@ def metrics(self): def create(img_rows, img_cols, img_channels, channels=None, k: int = 512, d: int = 256, beta: float = 1.0): """ Vel factory function """ - from vel.api import ModelFactory + from vel.api import ModuleFactory if channels is None: channels = [16, 32, 32] @@ -303,4 +303,4 @@ def instantiate(**_): img_rows, img_cols, img_channels, channels=channels, k=k, d=d, beta=beta ) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/model/nlp/language_model.py b/vel/model/nlp/language_model.py index bd295a3b..e27835b9 100644 --- a/vel/model/nlp/language_model.py +++ b/vel/model/nlp/language_model.py @@ -2,7 +2,7 @@ import torch.nn as nn import torch.nn.functional as F -from vel.api import LossFunctionModel, ModelFactory, VModule, BackboneModule, SizeHints, SizeHint +from vel.api import LossFunctionModel, ModuleFactory, VModule, BackboneModule, SizeHints, SizeHint class LanguageModel(LossFunctionModel): @@ -56,8 +56,8 @@ def loss_value(self, x_data, y_true, y_pred) -> torch.tensor: return F.nll_loss(y_pred, y_true) -class LanguageModelFactory(ModelFactory): - def __init__(self, alphabet_size: int, net_factory: ModelFactory): +class LanguageModelFactory(ModuleFactory): + def __init__(self, alphabet_size: int, net_factory: ModuleFactory): self.alphabet_size = alphabet_size self.net_factory = net_factory @@ -71,7 +71,7 @@ def instantiate(self, **extra_args) -> VModule: ) -def create(loader, net: ModelFactory): +def create(loader, net: ModuleFactory): """ Vel factory function """ return LanguageModelFactory( alphabet_size=loader.alphabet_size, diff --git a/vel/model/rnn/multilayer_rnn_sequence_classification.py b/vel/model/rnn/multilayer_rnn_sequence_classification.py index 20f40706..82953b5b 100644 --- a/vel/model/rnn/multilayer_rnn_sequence_classification.py +++ b/vel/model/rnn/multilayer_rnn_sequence_classification.py @@ -6,7 +6,7 @@ import vel.util.module_util as mu -from vel.api import LossFunctionModel, ModelFactory, LinearBackboneModel, OptimizerFactory, VelOptimizer +from vel.api import LossFunctionModel, ModuleFactory, LinearBackboneModel, OptimizerFactory, VelOptimizer from vel.metric.accuracy import Accuracy from vel.metric.loss_metric import Loss from vel.module.rnn_layer import RnnLayer @@ -150,7 +150,7 @@ def metrics(self) -> list: return [Loss(), Accuracy()] -def create(input_block: ModelFactory, rnn_type: str, output_dim: int, +def create(input_block: ModuleFactory, rnn_type: str, output_dim: int, rnn_layers: typing.List[int], rnn_dropout: float = 0.0, bidirectional: bool = False, linear_layers: typing.List[int] = None, linear_dropout: float = 0.0): """ Vel factory function """ @@ -164,4 +164,4 @@ def instantiate(**_): linear_layers=linear_layers, linear_dropout=linear_dropout ) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/model/rnn/multilayer_rnn_sequence_model.py b/vel/model/rnn/multilayer_rnn_sequence_model.py index 3f5c332a..70f98c75 100644 --- a/vel/model/rnn/multilayer_rnn_sequence_model.py +++ b/vel/model/rnn/multilayer_rnn_sequence_model.py @@ -4,7 +4,7 @@ import torch.nn.functional as F import torch.nn as nn -from vel.api import LossFunctionModel, ModelFactory +from vel.api import LossFunctionModel, ModuleFactory from vel.module.rnn_layer import RnnLayer @@ -110,7 +110,7 @@ def loss_value(self, x_data, y_true, y_pred): return F.nll_loss(y_pred, y_true) -def create(input_block: ModelFactory, rnn_type: str, hidden_layers: typing.List[int], +def create(input_block: ModuleFactory, rnn_type: str, hidden_layers: typing.List[int], output_dim: int, dropout=0.0): """ Vel factory function """ def instantiate(**_): @@ -119,4 +119,4 @@ def instantiate(**_): dropout=dropout ) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/model/vision/cifar10_cnn_01.py b/vel/model/vision/cifar10_cnn_01.py index 3f3551af..da9e2c57 100644 --- a/vel/model/vision/cifar10_cnn_01.py +++ b/vel/model/vision/cifar10_cnn_01.py @@ -8,7 +8,7 @@ import torch.nn.init as init import torch.nn.functional as F -from vel.api import LossFunctionModel, ModelFactory +from vel.api import LossFunctionModel, ModuleFactory from vel.metric.loss_metric import Loss from vel.metric.accuracy import Accuracy @@ -92,4 +92,4 @@ def create(img_rows, img_cols, img_channels, num_classes): """ Vel factory function """ def instantiate(**_): return Net(img_rows, img_cols, img_channels, num_classes) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/model/vision/cifar_resnet_v1.py b/vel/model/vision/cifar_resnet_v1.py index ab8fac25..4a520c65 100644 --- a/vel/model/vision/cifar_resnet_v1.py +++ b/vel/model/vision/cifar_resnet_v1.py @@ -6,7 +6,7 @@ import torch.nn as nn import torch.nn.functional as F -from vel.api import LossFunctionModel, ModelFactory +from vel.api import LossFunctionModel, ModuleFactory from vel.module.resnet_v1 import Bottleneck, BasicBlock @@ -89,4 +89,4 @@ def create(blocks, mode='basic', inplanes=16, divisor=4, num_classes=1000): def instantiate(**_): return ResNetV1(block_dict[mode], blocks, inplanes=inplanes, divisor=divisor, num_classes=num_classes) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/model/vision/cifar_resnet_v2.py b/vel/model/vision/cifar_resnet_v2.py index 66e96fb6..eef5ab38 100644 --- a/vel/model/vision/cifar_resnet_v2.py +++ b/vel/model/vision/cifar_resnet_v2.py @@ -6,7 +6,7 @@ import torch.nn as nn import torch.nn.functional as F -from vel.api import LossFunctionModel, ModelFactory +from vel.api import LossFunctionModel, ModuleFactory from vel.module.resnet_v2 import Bottleneck, BasicBlock @@ -91,4 +91,4 @@ def create(blocks, mode='basic', inplanes=16, divisor=4, num_classes=1000): def instantiate(**_): return ResNetV2(block_dict[mode], blocks, inplanes=inplanes, divisor=divisor, num_classes=num_classes) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/model/vision/cifar_resnext.py b/vel/model/vision/cifar_resnext.py index edb6d8a2..d3ce97a3 100644 --- a/vel/model/vision/cifar_resnext.py +++ b/vel/model/vision/cifar_resnext.py @@ -6,7 +6,7 @@ import torch.nn as nn import torch.nn.functional as F -from vel.api import LossFunctionModel, ModelFactory +from vel.api import LossFunctionModel, ModuleFactory from vel.module.resnext import ResNeXtBottleneck @@ -91,4 +91,4 @@ def instantiate(**_): cardinality=cardinality, divisor=divisor, num_classes=num_classes ) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/model/vision/mnist_cnn_01.py b/vel/model/vision/mnist_cnn_01.py index 513f33c0..df8c5ada 100644 --- a/vel/model/vision/mnist_cnn_01.py +++ b/vel/model/vision/mnist_cnn_01.py @@ -9,7 +9,7 @@ import torch.nn.functional as F -from vel.api import LossFunctionModel, ModelFactory +from vel.api import LossFunctionModel, ModuleFactory from vel.metric.loss_metric import Loss from vel.metric.accuracy import Accuracy @@ -77,4 +77,4 @@ def create(img_rows, img_cols, img_channels, num_classes): def instantiate(**_): return Net(img_rows, img_cols, img_channels, num_classes) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/net/modular.py b/vel/net/modular.py index d448d628..c17fd81a 100644 --- a/vel/net/modular.py +++ b/vel/net/modular.py @@ -2,7 +2,7 @@ import torch.nn as nn -from vel.api import BackboneModule, ModelFactory, SizeHints +from vel.api import BackboneModule, ModuleFactory, SizeHints from vel.util.tensor_util import to_device from .layer_base import LayerFactory @@ -137,7 +137,7 @@ def forward(self, input_data, state=None): return data, output_state -class ModularNetworkFactory(ModelFactory): +class ModularNetworkFactory(ModuleFactory): """ Factory class for the modular network """ def __init__(self, layers: [LayerFactory]): self.layers = layers diff --git a/vel/rl/command/enjoy.py b/vel/rl/command/enjoy.py index 14da7fa2..2cef2bf2 100644 --- a/vel/rl/command/enjoy.py +++ b/vel/rl/command/enjoy.py @@ -3,14 +3,14 @@ import typing import time -from vel.api import ModelConfig, TrainingInfo, Storage, ModelFactory +from vel.api import ModelConfig, TrainingInfo, Storage, ModuleFactory from vel.rl.api import VecEnvFactory class EnjoyCommand: """ Play render("human") in a loop for a human to enjoy """ - def __init__(self, model_config: ModelConfig, model_factory: ModelFactory, vec_env_factory: VecEnvFactory, + def __init__(self, model_config: ModelConfig, model_factory: ModuleFactory, vec_env_factory: VecEnvFactory, storage: Storage, fps: float, sample_args: typing.Optional[dict]): self.model_config = model_config self.model_factory = model_factory diff --git a/vel/rl/command/evaluate_env_command.py b/vel/rl/command/evaluate_env_command.py index 3bf0eec7..0a27aec6 100644 --- a/vel/rl/command/evaluate_env_command.py +++ b/vel/rl/command/evaluate_env_command.py @@ -4,14 +4,14 @@ import tqdm import typing -from vel.api import ModelConfig, TrainingInfo, Storage, ModelFactory +from vel.api import ModelConfig, TrainingInfo, Storage, ModuleFactory from vel.rl.api import VecEnvFactory class EvaluateEnvCommand: """ Record environment playthrough as a game """ - def __init__(self, model_config: ModelConfig, env_factory: VecEnvFactory, model_factory: ModelFactory, - storage: Storage, parallel_envs: int, action_noise: typing.Optional[ModelFactory], takes: int, + def __init__(self, model_config: ModelConfig, env_factory: VecEnvFactory, model_factory: ModuleFactory, + storage: Storage, parallel_envs: int, action_noise: typing.Optional[ModuleFactory], takes: int, sample_args: dict = None): self.model_config = model_config self.model_factory = model_factory diff --git a/vel/rl/command/record_movie_command.py b/vel/rl/command/record_movie_command.py index a7a14d78..79598e30 100644 --- a/vel/rl/command/record_movie_command.py +++ b/vel/rl/command/record_movie_command.py @@ -7,13 +7,13 @@ import tqdm import typing -from vel.api import ModelConfig, TrainingInfo, Storage, ModelFactory +from vel.api import ModelConfig, TrainingInfo, Storage, ModuleFactory from vel.rl.api import VecEnvFactory class RecordMovieCommand: """ Record environment playthrough as a game """ - def __init__(self, model_config: ModelConfig, env_factory: VecEnvFactory, model_factory: ModelFactory, + def __init__(self, model_config: ModelConfig, env_factory: VecEnvFactory, model_factory: ModuleFactory, storage: Storage, videoname: str, takes: int, fps: int, sample_args: typing.Optional[dict] = None): self.model_config = model_config self.model_factory = model_factory diff --git a/vel/rl/layer/nature_cnn_rnn.py b/vel/rl/layer/nature_cnn_rnn.py index 699e7387..fd4864a2 100644 --- a/vel/rl/layer/nature_cnn_rnn.py +++ b/vel/rl/layer/nature_cnn_rnn.py @@ -1,4 +1,4 @@ -from vel.api import LinearBackboneModel, ModelFactory +from vel.api import LinearBackboneModel, ModuleFactory from vel.rl.backbone.nature_cnn import NatureCnn from vel.module.rnn_cell import RnnCell @@ -58,4 +58,4 @@ def instantiate(**_): rnn_type=rnn_type, cnn_output_dim=cnn_output_dim, hidden_units=hidden_units ) - return ModelFactory.generic(instantiate) + return ModuleFactory.generic(instantiate) diff --git a/vel/rl/policy/a2c.py b/vel/rl/policy/a2c.py index 8a96c277..ef4cb1d8 100644 --- a/vel/rl/policy/a2c.py +++ b/vel/rl/policy/a2c.py @@ -5,7 +5,7 @@ from vel.metric.base import AveragingNamedMetric from vel.util.situational import gym_space_to_size_hint from vel.util.stats import explained_variance -from vel.api import ModelFactory, BatchInfo, BackboneModule +from vel.api import ModuleFactory, BatchInfo, BackboneModule from vel.rl.api import RlPolicy, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae @@ -112,7 +112,7 @@ def metrics(self) -> list: ] -class A2CFactory(ModelFactory): +class A2CFactory(ModuleFactory): """ Factory class for policy gradient models """ def __init__(self, net_factory, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): self.net_factory = net_factory @@ -140,7 +140,7 @@ def instantiate(self, **extra_args): ) -def create(net: ModelFactory, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): +def create(net: ModuleFactory, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): """ Vel factory function """ return A2CFactory( net_factory=net, diff --git a/vel/rl/policy/a2c_rnn.py b/vel/rl/policy/a2c_rnn.py index 8b3a6654..5ec298de 100644 --- a/vel/rl/policy/a2c_rnn.py +++ b/vel/rl/policy/a2c_rnn.py @@ -2,7 +2,7 @@ import torch import torch.nn.functional as F -from vel.api import ModelFactory, BatchInfo, BackboneModule +from vel.api import ModuleFactory, BatchInfo, BackboneModule from vel.metric.base import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout, Trajectories from vel.rl.discount_bootstrap import discount_bootstrap_gae @@ -153,7 +153,7 @@ def metrics(self) -> list: ] -class A2CRnnFactory(ModelFactory): +class A2CRnnFactory(ModuleFactory): """ Factory class for policy gradient models """ def __init__(self, net_factory, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): self.net_factory = net_factory @@ -181,7 +181,7 @@ def instantiate(self, **extra_args): ) -def create(net: ModelFactory, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): +def create(net: ModuleFactory, entropy_coefficient, value_coefficient, discount_factor, gae_lambda=1.0): """ Vel factory function """ return A2CRnnFactory( net_factory=net, diff --git a/vel/rl/policy/acer.py b/vel/rl/policy/acer.py index dbed10ce..59003e45 100644 --- a/vel/rl/policy/acer.py +++ b/vel/rl/policy/acer.py @@ -3,7 +3,7 @@ import torch import torch.nn.functional as F -from vel.api import BackboneModule, ModelFactory, BatchInfo, OptimizerFactory, VelOptimizer +from vel.api import BackboneModule, ModuleFactory, BatchInfo, OptimizerFactory, VelOptimizer from vel.metric.base import AveragingNamedMetric from vel.rl.api import Trajectories, RlPolicy, Rollout from vel.rl.module.q_stochastic_policy import QStochasticPolicy @@ -247,7 +247,7 @@ def metrics(self) -> list: ] -class ACERFactory(ModelFactory): +class ACERFactory(ModuleFactory): """ Factory class for ACER policies """ def __init__(self, net_factory, trust_region: bool, entropy_coefficient: float, q_coefficient: float, discount_factor: float, rho_cap: float = 10.0, retrace_rho_cap: float = 1.0, diff --git a/vel/rl/policy/ddpg.py b/vel/rl/policy/ddpg.py index 915fdc3e..3f40d317 100644 --- a/vel/rl/policy/ddpg.py +++ b/vel/rl/policy/ddpg.py @@ -8,7 +8,7 @@ import vel.util.module_util as mu -from vel.api import BackboneModule, BatchInfo, ModelFactory, OptimizerFactory, VelOptimizer, SizeHints +from vel.api import BackboneModule, BatchInfo, ModuleFactory, OptimizerFactory, VelOptimizer, SizeHints from vel.metric.base import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout from vel.rl.module.actor_critic_policy import ActorCriticPolicy @@ -127,12 +127,12 @@ def metrics(self) -> list: ] -class DDPGFactory(ModelFactory): +class DDPGFactory(ModuleFactory): """ Factory for the DDPG policy """ - def __init__(self, actor_net: ModelFactory, critic_net: ModelFactory, + def __init__(self, actor_net: ModuleFactory, critic_net: ModuleFactory, discount_factor: float, tau: float, noise_std_dev: float, - input_net: typing.Optional[ModelFactory] = None): + input_net: typing.Optional[ModuleFactory] = None): self.actor_net_factory = actor_net self.critic_net_factory = critic_net self.input_net_factory = input_net @@ -182,9 +182,9 @@ def instantiate(self, **extra_args): ) -def create(actor_net: ModelFactory, critic_net: ModelFactory, +def create(actor_net: ModuleFactory, critic_net: ModuleFactory, discount_factor: float, tau: float, noise_std_dev: float, - input_net: typing.Optional[ModelFactory] = None + input_net: typing.Optional[ModuleFactory] = None ): """ Vel factory function """ return DDPGFactory( diff --git a/vel/rl/policy/dqn.py b/vel/rl/policy/dqn.py index c93bf013..b6a27aa0 100644 --- a/vel/rl/policy/dqn.py +++ b/vel/rl/policy/dqn.py @@ -6,7 +6,7 @@ import torch.nn.functional as F import torch.nn.utils -from vel.api import ModelFactory, BackboneModule, BatchInfo, Schedule, OptimizerFactory, VelOptimizer +from vel.api import ModuleFactory, BackboneModule, BatchInfo, Schedule, OptimizerFactory, VelOptimizer from vel.function.constant import ConstantSchedule from vel.metric import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout @@ -133,8 +133,8 @@ def metrics(self) -> list: ] -class DQNFactory(ModelFactory): - def __init__(self, net_factory: ModelFactory, epsilon: typing.Union[float, Schedule], discount_factor: float, +class DQNFactory(ModuleFactory): + def __init__(self, net_factory: ModuleFactory, epsilon: typing.Union[float, Schedule], discount_factor: float, target_update_frequency: int, double_dqn: bool = False, dueling_dqn: bool = False): self.net_factory = net_factory self.epsilon = epsilon @@ -165,7 +165,7 @@ def instantiate(self, **extra_args): ) -def create(net: ModelFactory, epsilon: typing.Union[float, Schedule], discount_factor: float, +def create(net: ModuleFactory, epsilon: typing.Union[float, Schedule], discount_factor: float, target_update_frequency: int, double_dqn: bool = False, dueling_dqn: bool = False): """ Vel factory function """ diff --git a/vel/rl/policy/ppo.py b/vel/rl/policy/ppo.py index 9ae38ba8..313407eb 100644 --- a/vel/rl/policy/ppo.py +++ b/vel/rl/policy/ppo.py @@ -3,7 +3,7 @@ import numbers -from vel.api import BatchInfo, ModelFactory, BackboneModule +from vel.api import BatchInfo, ModuleFactory, BackboneModule from vel.util.situational import gym_space_to_size_hint from vel.util.stats import explained_variance from vel.function.constant import ConstantSchedule @@ -152,7 +152,7 @@ def metrics(self) -> list: ] -class PPOFactory(ModelFactory): +class PPOFactory(ModuleFactory): """ Factory class for policy gradient models """ def __init__(self, net_factory, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, normalize_advantage: bool = True, gae_lambda: float = 1.0): @@ -185,7 +185,7 @@ def instantiate(self, **extra_args): ) -def create(net: ModelFactory, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, +def create(net: ModuleFactory, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, normalize_advantage: bool = True, gae_lambda: float = 1.0): """ Vel factory function """ return PPOFactory( diff --git a/vel/rl/policy/ppo_rnn.py b/vel/rl/policy/ppo_rnn.py index 0ac60339..786d4825 100644 --- a/vel/rl/policy/ppo_rnn.py +++ b/vel/rl/policy/ppo_rnn.py @@ -3,7 +3,7 @@ import gym import torch -from vel.api import BatchInfo, ModelFactory, BackboneModule +from vel.api import BatchInfo, ModuleFactory, BackboneModule from vel.function.constant import ConstantSchedule from vel.metric.base import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout, Trajectories @@ -190,7 +190,7 @@ def metrics(self) -> list: ] -class PPORnnFactory(ModelFactory): +class PPORnnFactory(ModuleFactory): """ Factory class for policy gradient models """ def __init__(self, net_factory, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, @@ -224,7 +224,7 @@ def instantiate(self, **extra_args): ) -def create(net: ModelFactory, +def create(net: ModuleFactory, entropy_coefficient, value_coefficient, cliprange, discount_factor: float, normalize_advantage: bool = True, gae_lambda: float = 1.0): """ Vel factory function """ diff --git a/vel/rl/policy/rainbow.py b/vel/rl/policy/rainbow.py index ed9be961..be7253c9 100644 --- a/vel/rl/policy/rainbow.py +++ b/vel/rl/policy/rainbow.py @@ -2,7 +2,7 @@ import torch import torch.nn.utils -from vel.api import ModelFactory, BackboneModule, BatchInfo, OptimizerFactory, VelOptimizer +from vel.api import ModuleFactory, BackboneModule, BatchInfo, OptimizerFactory, VelOptimizer from vel.metric import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout from vel.rl.module.rainbow_policy import RainbowPolicy @@ -203,8 +203,8 @@ def metrics(self) -> list: ] -class RainbowFactory(ModelFactory): - def __init__(self, net_factory: ModelFactory, discount_factor: float, target_update_frequency: int, +class RainbowFactory(ModuleFactory): + def __init__(self, net_factory: ModuleFactory, discount_factor: float, target_update_frequency: int, vmin: float, vmax: float, atoms: int = 1, initial_std_dev: float = 0.4, factorized_noise: bool = True): self.net_factory = net_factory self.discount_factor = discount_factor @@ -240,7 +240,7 @@ def instantiate(self, **extra_args): ) -def create(net: ModelFactory, discount_factor: float, target_update_frequency: int, +def create(net: ModuleFactory, discount_factor: float, target_update_frequency: int, vmin: float, vmax: float, atoms: int = 1, initial_std_dev: float = 0.4, factorized_noise: bool = True): """ Vel factory function """ return RainbowFactory( diff --git a/vel/rl/policy/trpo.py b/vel/rl/policy/trpo.py index c1fae215..8c6a554f 100644 --- a/vel/rl/policy/trpo.py +++ b/vel/rl/policy/trpo.py @@ -8,7 +8,7 @@ import torch.nn.functional as F import torch.nn.utils -from vel.api import BatchInfo, VelOptimizer, OptimizerFactory, ModelFactory, BackboneModule +from vel.api import BatchInfo, VelOptimizer, OptimizerFactory, ModuleFactory, BackboneModule from vel.util.stats import explained_variance from vel.metric.base import AveragingNamedMetric @@ -351,11 +351,11 @@ def metrics(self) -> list: ] -class TRPOFactory(ModelFactory): +class TRPOFactory(ModuleFactory): """ Factory class for policy gradient models """ - def __init__(self, policy_net: ModelFactory, value_net: ModelFactory, + def __init__(self, policy_net: ModuleFactory, value_net: ModuleFactory, max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, - discount_factor, gae_lambda, improvement_acceptance_ratio, input_net: typing.Optional[ModelFactory]): + discount_factor, gae_lambda, improvement_acceptance_ratio, input_net: typing.Optional[ModuleFactory]): self.policy_net = policy_net self.value_net = value_net self.input_net = input_net @@ -403,9 +403,9 @@ def instantiate(self, **extra_args): ) -def create(policy_net: ModelFactory, value_net: ModelFactory, +def create(policy_net: ModuleFactory, value_net: ModuleFactory, max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, - discount_factor, gae_lambda, improvement_acceptance_ratio, input_net: typing.Optional[ModelFactory]=None): + discount_factor, gae_lambda, improvement_acceptance_ratio, input_net: typing.Optional[ModuleFactory]=None): """ Vel factory function """ return TRPOFactory( diff --git a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py index 92c30a5d..548d1663 100644 --- a/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_mixed_policy_iteration_reinforcer.py @@ -4,7 +4,7 @@ import torch import tqdm -from vel.api import TrainingInfo, EpochInfo, BatchInfo, ModelFactory +from vel.api import TrainingInfo, EpochInfo, BatchInfo, ModuleFactory from vel.openai.baselines.common.vec_env import VecEnv from vel.rl.api import ( Reinforcer, ReinforcerFactory, VecEnvFactory, ReplayEnvRollerBase, ReplayEnvRollerFactoryBase, @@ -136,7 +136,7 @@ def off_policy_train_batch(self, batch_info: BatchInfo): class BufferedMixedPolicyIterationReinforcerFactory(ReinforcerFactory): """ Factory class for the PolicyGradientReplayBuffer factory """ - def __init__(self, settings, env_factory: VecEnvFactory, model_factory: ModelFactory, + def __init__(self, settings, env_factory: VecEnvFactory, model_factory: ModuleFactory, env_roller_factory: ReplayEnvRollerFactoryBase, parallel_envs: int, seed: int): self.settings = settings diff --git a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py index 9deeb210..f3dd3310 100644 --- a/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/buffered_off_policy_iteration_reinforcer.py @@ -3,7 +3,7 @@ import torch import tqdm -from vel.api import TrainingInfo, EpochInfo, BatchInfo, Model, ModelFactory +from vel.api import TrainingInfo, EpochInfo, BatchInfo, Model, ModuleFactory from vel.openai.baselines.common.vec_env import VecEnv from vel.rl.api import ( Reinforcer, ReinforcerFactory, ReplayEnvRollerBase, VecEnvFactory, ReplayEnvRollerFactoryBase, @@ -153,7 +153,7 @@ def train_on_replay_memory(self, batch_info): class BufferedOffPolicyIterationReinforcerFactory(ReinforcerFactory): """ Factory class for the DQN reinforcer """ - def __init__(self, settings, env_factory: VecEnvFactory, model_factory: ModelFactory, + def __init__(self, settings, env_factory: VecEnvFactory, model_factory: ModuleFactory, env_roller_factory: ReplayEnvRollerFactoryBase, parallel_envs: int, seed: int): self.settings = settings diff --git a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py index 64af89e5..13b9853f 100644 --- a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py @@ -4,7 +4,7 @@ import torch import tqdm -from vel.api import ModelFactory, TrainingInfo, EpochInfo, BatchInfo +from vel.api import ModuleFactory, TrainingInfo, EpochInfo, BatchInfo from vel.rl.api import ( Reinforcer, ReinforcerFactory, VecEnvFactory, EnvRollerFactoryBase, EnvRollerBase, RlPolicy @@ -146,7 +146,7 @@ def train_batch(self, batch_info: BatchInfo) -> None: class OnPolicyIterationReinforcerFactory(ReinforcerFactory): """ Vel factory class for the PolicyGradientReinforcer """ - def __init__(self, settings, parallel_envs: int, env_factory: VecEnvFactory, model_factory: ModelFactory, + def __init__(self, settings, parallel_envs: int, env_factory: VecEnvFactory, model_factory: ModuleFactory, env_roller_factory: EnvRollerFactoryBase, seed: int): self.settings = settings self.parallel_envs = parallel_envs From 5a7a3f1926793ed0fa47a54a931f4b8a44bf14ae Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 10 Oct 2019 16:59:59 -0700 Subject: [PATCH 126/162] Code lint update. --- .flake8 | 2 +- vel/api/optimizer.py | 1 - vel/api/size_hint.py | 2 +- vel/callback/sample_tracker.py | 1 - vel/data/augmentation/scale_min_size.py | 2 - vel/internal/provider.py | 3 +- vel/model/gan/simple_gan.py | 3 +- vel/model/latent/fc_iwae.py | 2 +- vel/model/latent/fc_vae.py | 2 +- vel/model/latent/vq_vae.py | 2 - .../rnn/multilayer_rnn_sequence_model.py | 122 ------------------ vel/module/input/flatten.py | 1 - vel/module/input/normalize_observations.py | 1 - vel/module/input/one_hot_encoding.py | 1 - vel/net/layer/mlp.py | 4 +- vel/net/layer_base.py | 1 - vel/rl/command/rl_train_command.py | 1 - vel/rl/layer/nature_cnn_rnn.py | 61 --------- vel/rl/module/actor_critic_policy.py | 1 - vel/rl/module/noise/eps_greedy.py | 1 - vel/rl/module/stochastic_policy.py | 1 - vel/rl/module/stochastic_rnn_policy.py | 3 - vel/rl/policy/acer.py | 2 +- vel/rl/policy/ppo.py | 1 - vel/rl/policy/ppo_rnn.py | 1 - vel/rl/policy/trpo.py | 8 +- vel/util/dataloader.py | 3 - vel/util/module_util.py | 2 - vel/util/situational.py | 1 - 29 files changed, 15 insertions(+), 221 deletions(-) delete mode 100644 vel/model/rnn/multilayer_rnn_sequence_model.py delete mode 100644 vel/rl/layer/nature_cnn_rnn.py diff --git a/.flake8 b/.flake8 index 25d4293b..d26ea701 100644 --- a/.flake8 +++ b/.flake8 @@ -1,3 +1,3 @@ [flake8] max-line-length = 120 -exclude = vel/openai, test, vel/api/__init__.py, vel/rl/api/__init__.py +exclude = vel/openai, test, vel/api/__init__.py, vel/rl/api/__init__.py, vel/data/__init__.py, vel/metric/__init__.py, vel/metric/base/__init__.py, vel/train/__init__.py, vel/optimizer/ranger.py, vel/optimizer/radam.py diff --git a/vel/api/optimizer.py b/vel/api/optimizer.py index bed2a75b..e9f97f83 100644 --- a/vel/api/optimizer.py +++ b/vel/api/optimizer.py @@ -187,4 +187,3 @@ def instantiate_multi(self, parameter_dict: dict) -> VelMultiOptimizer: od[name] = self.instantiate(value) return VelMultiOptimizer(od) - diff --git a/vel/api/size_hint.py b/vel/api/size_hint.py index b1e4fecb..c8e687a8 100644 --- a/vel/api/size_hint.py +++ b/vel/api/size_hint.py @@ -65,7 +65,7 @@ def __init__(self, size_hints: typing.Union[SizeHint, SizeTuple, SizeDict] = Non else: raise VelException("Invalid size hints: {}".format(self.size_hints)) - def assert_tuple(self, length : typing.Optional[int] = None) -> SizeTuple: + def assert_tuple(self, length: typing.Optional[int] = None) -> SizeTuple: """ Assert given size hints is a tuple """ assert self.type == self.TYPE_TUPLE, "Network needs to return a tuple" diff --git a/vel/callback/sample_tracker.py b/vel/callback/sample_tracker.py index a1c9d789..aadefd12 100644 --- a/vel/callback/sample_tracker.py +++ b/vel/callback/sample_tracker.py @@ -23,4 +23,3 @@ def write_state_dict(self, training_info: TrainingInfo, hidden_state_dict: dict) def load_state_dict(self, training_info: TrainingInfo, hidden_state_dict: dict): training_info['samples'] = hidden_state_dict['sample_tracker/samples'] - diff --git a/vel/data/augmentation/scale_min_size.py b/vel/data/augmentation/scale_min_size.py index 88554a09..0fae3bd6 100644 --- a/vel/data/augmentation/scale_min_size.py +++ b/vel/data/augmentation/scale_min_size.py @@ -2,8 +2,6 @@ Code based on: https://github.com/fastai/fastai/blob/master/fastai/transforms.py """ -import PIL.Image as Image - import vel.api as api import vel.data.operation.image_op as op diff --git a/vel/internal/provider.py b/vel/internal/provider.py index 79921125..6a6950d0 100644 --- a/vel/internal/provider.py +++ b/vel/internal/provider.py @@ -8,7 +8,8 @@ class Provider: """ Dependency injection resolver for the configuration file """ - def __init__(self, environment: dict, instances: typing.Optional[dict] = None, parameters: typing.Optional[dict] = None): + def __init__(self, environment: dict, instances: typing.Optional[dict] = None, + parameters: typing.Optional[dict] = None): self.environment = environment self.parameters = parameters if parameters is not None else {} diff --git a/vel/model/gan/simple_gan.py b/vel/model/gan/simple_gan.py index 0823888b..a8f56044 100644 --- a/vel/model/gan/simple_gan.py +++ b/vel/model/gan/simple_gan.py @@ -2,11 +2,12 @@ Simple GAN code is based on https://github.com/eriklindernoren/PyTorch-GAN/blob/master/implementations/gan/gan.py """ import collections + import numpy as np import torch import torch.nn as nn -from vel.api import OptimizedModel, ModuleFactory, VelOptimizer, OptimizerFactory +from vel.api import OptimizedModel, ModuleFactory, OptimizerFactory from vel.api.optimizer import VelMultiOptimizer from vel.metric import AveragingNamedMetric diff --git a/vel/model/latent/fc_iwae.py b/vel/model/latent/fc_iwae.py index 43e47d5e..4ba19597 100644 --- a/vel/model/latent/fc_iwae.py +++ b/vel/model/latent/fc_iwae.py @@ -2,7 +2,6 @@ import torch.distributions as dist import torch.nn as nn import torch.nn.functional as F -import torch.nn.init as init from vel.api import ModuleFactory from vel.module.layers import Flatten, Reshape @@ -76,6 +75,7 @@ def decoder_sample(self, decoded: torch.Tensor) -> torch.Tensor: """ Sample from a decoder distribution - we ignore that since it's so weak in this case """ return decoded +# import torch.nn.init as init # @staticmethod # def _weight_initializer(tensor): # init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('tanh')) diff --git a/vel/model/latent/fc_vae.py b/vel/model/latent/fc_vae.py index 4ae9323a..701c5717 100644 --- a/vel/model/latent/fc_vae.py +++ b/vel/model/latent/fc_vae.py @@ -2,7 +2,6 @@ import torch.distributions as dist import torch.nn as nn import torch.nn.functional as F -import torch.nn.init as init from vel.api import ModuleFactory from vel.module.layers import Flatten, Reshape @@ -76,6 +75,7 @@ def decoder_sample(self, decoded: torch.Tensor) -> torch.Tensor: """ Sample from a decoder distribution - we ignore that since it's so weak in this case """ return decoded +# import torch.nn.init as init # @staticmethod # def _weight_initializer(tensor): # init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('tanh')) diff --git a/vel/model/latent/vq_vae.py b/vel/model/latent/vq_vae.py index f28dbf50..608260f9 100644 --- a/vel/model/latent/vq_vae.py +++ b/vel/model/latent/vq_vae.py @@ -272,8 +272,6 @@ def calculate_gradient(self, data: dict) -> dict: return { 'loss': loss.item(), - - 'grad_norm': grad_norm, 'reconstruction': loss_recons.item(), 'loss_vq': loss_vq.item(), 'loss_commit': loss_commit.item() diff --git a/vel/model/rnn/multilayer_rnn_sequence_model.py b/vel/model/rnn/multilayer_rnn_sequence_model.py deleted file mode 100644 index 70f98c75..00000000 --- a/vel/model/rnn/multilayer_rnn_sequence_model.py +++ /dev/null @@ -1,122 +0,0 @@ -import typing - -import torch -import torch.nn.functional as F -import torch.nn as nn - -from vel.api import LossFunctionModel, ModuleFactory -from vel.module.rnn_layer import RnnLayer - - -class MultilayerRnnSequenceModel(LossFunctionModel): - """ Multilayer RNN network for sequence modeling (n:n) """ - - def __init__(self, input_block: LinearBackboneModel, rnn_type: str, hidden_layers: typing.List[int], - output_dim: int, dropout: float = 0.0): - super().__init__() - - self.output_dim = output_dim - self.hidden_layers = hidden_layers - - self.input_block = input_block - - current_dim = self.input_block.output_dim - - self.recurrent_layers = [] - self.dropout_layers = [] - - for idx, current_layer in enumerate(hidden_layers, 1): - rnn = RnnLayer( - input_size=current_dim, - hidden_size=current_layer, - rnn_type=rnn_type, - ) - - self.add_module('{}{:02}'.format(rnn_type, idx), rnn) - self.recurrent_layers.append(rnn) - - if dropout > 0.0: - dropout_layer = nn.Dropout(p=dropout) - - self.add_module('rnn_dropout{:02}'.format(idx), dropout_layer) - self.dropout_layers.append(dropout_layer) - - current_dim = current_layer - - self.output_layer = nn.Linear(current_dim, output_dim) - self.output_activation = nn.LogSoftmax(dim=2) - - def reset_weights(self): - self.input_block.reset_weights() - - def forward(self, sequence): - """ Forward propagate batch of sequences through the network, without accounting for the state """ - data = self.input_block(sequence) - - for idx in range(len(self.recurrent_layers)): - data, _ = self.recurrent_layers[idx](data) - - if self.dropout_layers: - data = self.dropout_layers[idx](data) - - data = self.output_layer(data) - - return self.output_activation(data) - - def forward_state(self, sequence, state=None): - """ Forward propagate a sequence through the network accounting for the state """ - if state is None: - state = self.zero_state(sequence.size(0)) - - data = self.input_block(sequence) - - state_outputs = [] - - # for layer_length, layer in zip(self.hidden_layers, self.recurrent_layers): - for idx in range(len(self.recurrent_layers)): - layer_length = self.recurrent_layers[idx].state_dim - - # Partition hidden state, for each layer we have layer_length of h state and layer_length of c state - current_state = state[:, :, :layer_length] - state = state[:, :, layer_length:] - - # Propagate through the GRU state - data, new_h = self.recurrent_layers[idx](data, current_state) - - if self.dropout_layers: - data = self.dropout_layers[idx](data) - - state_outputs.append(new_h) - - output_data = self.output_activation(self.output_layer(data)) - - concatenated_hidden_output = torch.cat(state_outputs, dim=2) - - return output_data, concatenated_hidden_output - - @property - def state_dim(self) -> int: - """ Dimension of model state """ - return sum(x.state_dim for x in self.recurrent_layers) - - def zero_state(self, batch_size): - """ Initial state of the network """ - return torch.zeros(1, batch_size, self.state_dim) - - def loss_value(self, x_data, y_true, y_pred): - """ Calculate a value of loss function """ - y_pred = y_pred.view(-1, y_pred.size(2)) - y_true = y_true.view(-1).to(torch.long) - return F.nll_loss(y_pred, y_true) - - -def create(input_block: ModuleFactory, rnn_type: str, hidden_layers: typing.List[int], - output_dim: int, dropout=0.0): - """ Vel factory function """ - def instantiate(**_): - return MultilayerRnnSequenceModel( - input_block.instantiate(), rnn_type=rnn_type, hidden_layers=hidden_layers, output_dim=output_dim, - dropout=dropout - ) - - return ModuleFactory.generic(instantiate) diff --git a/vel/module/input/flatten.py b/vel/module/input/flatten.py index 9462f689..7a1e5246 100644 --- a/vel/module/input/flatten.py +++ b/vel/module/input/flatten.py @@ -12,4 +12,3 @@ def __init__(self): def forward(self, input_data): return self.model(input_data) - diff --git a/vel/module/input/normalize_observations.py b/vel/module/input/normalize_observations.py index a1965a0e..a7dca4be 100644 --- a/vel/module/input/normalize_observations.py +++ b/vel/module/input/normalize_observations.py @@ -44,4 +44,3 @@ def forward(self, input_vector): self.running_var.copy_(new_var) return (input_vector - self.running_mean.unsqueeze(0)) / torch.sqrt(self.running_var.unsqueeze(0)) - diff --git a/vel/module/input/one_hot_encoding.py b/vel/module/input/one_hot_encoding.py index 6cc83b74..0bbc5f52 100644 --- a/vel/module/input/one_hot_encoding.py +++ b/vel/module/input/one_hot_encoding.py @@ -14,4 +14,3 @@ def __init__(self, alphabet_size: int): def forward(self, input_data): return self.layer(input_data) - diff --git a/vel/net/layer/mlp.py b/vel/net/layer/mlp.py index d2f57b49..551bf7f5 100644 --- a/vel/net/layer/mlp.py +++ b/vel/net/layer/mlp.py @@ -5,14 +5,14 @@ Under MIT license. """ import typing -import numpy as np +import numpy as np import torch.nn as nn import torch.nn.init as init import vel.util.network as net_util -from vel.api import SizeHints, SizeHint +from vel.api import SizeHints from vel.net.layer_base import LayerFactory, Layer diff --git a/vel/net/layer_base.py b/vel/net/layer_base.py index 1738e694..cdc90487 100644 --- a/vel/net/layer_base.py +++ b/vel/net/layer_base.py @@ -22,4 +22,3 @@ def name_base(self) -> str: def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: """ Create a given layer object """ raise NotImplementedError - diff --git a/vel/rl/command/rl_train_command.py b/vel/rl/command/rl_train_command.py index a879a0f0..b63807fb 100644 --- a/vel/rl/command/rl_train_command.py +++ b/vel/rl/command/rl_train_command.py @@ -2,7 +2,6 @@ from vel.api import ModelConfig, EpochInfo, TrainingInfo, BatchInfo, OptimizerFactory, Storage, Callback, VelOptimizer from vel.callback.time_tracker import TimeTracker -from vel.metric.samples_per_sec import SamplesPerSec from vel.rl.api import ReinforcerFactory, Reinforcer import vel.openai.baselines.logger as openai_logger diff --git a/vel/rl/layer/nature_cnn_rnn.py b/vel/rl/layer/nature_cnn_rnn.py deleted file mode 100644 index fd4864a2..00000000 --- a/vel/rl/layer/nature_cnn_rnn.py +++ /dev/null @@ -1,61 +0,0 @@ -from vel.api import LinearBackboneModel, ModuleFactory -from vel.rl.backbone.nature_cnn import NatureCnn -from vel.module.rnn_cell import RnnCell - -from vel.api import SizeHint, SizeHints -from vel.net.layer_base import Layer, LayerFactory - - -class NatureCnnRnnBackbone(LinearBackboneModel): - """ - Long-Short-Term Memory rnn cell together with DeepMind-style 'Nature' cnn preprocessing - """ - - def __init__(self, input_width: int, input_height: int, input_channels: int, rnn_type: str = 'lstm', - cnn_output_dim: int = 512, hidden_units: int = 128): - super().__init__() - - self.hidden_units = hidden_units - - self.nature_cnn = NatureCnn(input_width, input_height, input_channels, cnn_output_dim) - self.rnn_cell = RnnCell(input_size=self.nature_cnn.output_dim, hidden_size=self.hidden_units, rnn_type=rnn_type) - - def reset_weights(self): - """ Call proper initializers for the weights """ - self.nature_cnn.reset_weights() - self.rnn_cell.reset_weights() - - @property - def output_dim(self) -> int: - return self.rnn_cell.output_dim - - @property - def state_dim(self) -> int: - """ Initial state of the network """ - return self.rnn_cell.state_dim - - @property - def is_stateful(self) -> bool: - """ If the model has a state that needs to be fed between individual observations """ - return True - - def zero_state(self, batch_size): - """ Potential state for the model """ - return self.rnn_cell.zero_state(batch_size) - - def forward(self, input_image, state): - cnn_output = self.nature_cnn(input_image) - hidden_state, new_state = self.rnn_cell(cnn_output, state) - - return hidden_state, new_state - - -def create(input_width, input_height, input_channels=1, rnn_type='lstm', cnn_output_dim=512, hidden_units=128): - """ Vel factory function """ - def instantiate(**_): - return NatureCnnRnnBackbone( - input_width=input_width, input_height=input_height, input_channels=input_channels, - rnn_type=rnn_type, cnn_output_dim=cnn_output_dim, hidden_units=hidden_units - ) - - return ModuleFactory.generic(instantiate) diff --git a/vel/rl/module/actor_critic_policy.py b/vel/rl/module/actor_critic_policy.py index bbfc45de..cb252447 100644 --- a/vel/rl/module/actor_critic_policy.py +++ b/vel/rl/module/actor_critic_policy.py @@ -1,7 +1,6 @@ import itertools as it import gym -import torch from vel.api import VModule, BackboneModule diff --git a/vel/rl/module/noise/eps_greedy.py b/vel/rl/module/noise/eps_greedy.py index 328c140e..e48ace91 100644 --- a/vel/rl/module/noise/eps_greedy.py +++ b/vel/rl/module/noise/eps_greedy.py @@ -5,7 +5,6 @@ from vel.api import Schedule, VModule from vel.internal.generic_factory import GenericFactory -from vel.function.constant import ConstantSchedule class EpsGreedy(VModule): diff --git a/vel/rl/module/stochastic_policy.py b/vel/rl/module/stochastic_policy.py index d11ac0a6..ec73101d 100644 --- a/vel/rl/module/stochastic_policy.py +++ b/vel/rl/module/stochastic_policy.py @@ -39,4 +39,3 @@ def forward(self, observation): """ Calculate model outputs """ action_hidden, value_hidden = self.net(observation) return self.action_head(action_hidden), self.value_head(value_hidden) - diff --git a/vel/rl/module/stochastic_rnn_policy.py b/vel/rl/module/stochastic_rnn_policy.py index 94410c63..aa04f927 100644 --- a/vel/rl/module/stochastic_rnn_policy.py +++ b/vel/rl/module/stochastic_rnn_policy.py @@ -1,10 +1,8 @@ import gym from vel.api import VModule, BackboneModule - from vel.rl.module.head.stochastic_action_head import make_stockastic_action_head from vel.rl.module.head.value_head import ValueHead -from vel.util.tensor_util import to_device class StochasticRnnPolicy(VModule): @@ -73,4 +71,3 @@ def reset_state(self, state, dones): return out_state else: return state - diff --git a/vel/rl/policy/acer.py b/vel/rl/policy/acer.py index 59003e45..68792faf 100644 --- a/vel/rl/policy/acer.py +++ b/vel/rl/policy/acer.py @@ -291,7 +291,7 @@ def instantiate(self, **extra_args): ) -def create(net, trust_region: bool , entropy_coefficient: float, q_coefficient: float, discount_factor: float, +def create(net, trust_region: bool, entropy_coefficient: float, q_coefficient: float, discount_factor: float, rho_cap: float = 10.0, retrace_rho_cap: float = 1.0, average_model_alpha: float = 0.99, trust_region_delta: float = 1.0): """ Vel factory function """ diff --git a/vel/rl/policy/ppo.py b/vel/rl/policy/ppo.py index 313407eb..915a1bdf 100644 --- a/vel/rl/policy/ppo.py +++ b/vel/rl/policy/ppo.py @@ -197,4 +197,3 @@ def create(net: ModuleFactory, entropy_coefficient, value_coefficient, cliprange normalize_advantage=normalize_advantage, gae_lambda=gae_lambda ) - diff --git a/vel/rl/policy/ppo_rnn.py b/vel/rl/policy/ppo_rnn.py index 786d4825..c09e08cc 100644 --- a/vel/rl/policy/ppo_rnn.py +++ b/vel/rl/policy/ppo_rnn.py @@ -237,4 +237,3 @@ def create(net: ModuleFactory, normalize_advantage=normalize_advantage, gae_lambda=gae_lambda ) - diff --git a/vel/rl/policy/trpo.py b/vel/rl/policy/trpo.py index 8c6a554f..58c44ca6 100644 --- a/vel/rl/policy/trpo.py +++ b/vel/rl/policy/trpo.py @@ -256,8 +256,8 @@ def optimize(self, batch_info: BatchInfo, rollout: Rollout) -> dict: 'explained_variance': explained_variance(returns, rollout.batch_tensor('values')) } - def line_search(self, normalized_observations, rollout, original_policy_loss, original_policy_params, original_parameter_vec, - full_step, expected_improvement_full): + def line_search(self, normalized_observations, rollout, original_policy_loss, original_policy_params, + original_parameter_vec, full_step, expected_improvement_full): """ Find the right stepsize to make sure policy improves """ current_parameter_vec = original_parameter_vec.clone() @@ -405,7 +405,8 @@ def instantiate(self, **extra_args): def create(policy_net: ModuleFactory, value_net: ModuleFactory, max_kl, cg_iters, line_search_iters, cg_damping, entropy_coefficient, vf_iters, - discount_factor, gae_lambda, improvement_acceptance_ratio, input_net: typing.Optional[ModuleFactory]=None): + discount_factor, gae_lambda, improvement_acceptance_ratio, + input_net: typing.Optional[ModuleFactory] = None): """ Vel factory function """ return TRPOFactory( @@ -422,4 +423,3 @@ def create(policy_net: ModuleFactory, value_net: ModuleFactory, gae_lambda=gae_lambda, improvement_acceptance_ratio=improvement_acceptance_ratio, ) - diff --git a/vel/util/dataloader.py b/vel/util/dataloader.py index b6b03fe5..0bdbada2 100644 --- a/vel/util/dataloader.py +++ b/vel/util/dataloader.py @@ -16,6 +16,3 @@ def map_values(self, item): return { name: getattr(item, argument) for name, argument in self.field_mapping.items() } - - - diff --git a/vel/util/module_util.py b/vel/util/module_util.py index ae415425..e2dbef9b 100644 --- a/vel/util/module_util.py +++ b/vel/util/module_util.py @@ -95,5 +95,3 @@ def optimizer_parameter_helper(parameters, parameter_dict): out_dict[parameter] = value[0] return out_dict - - diff --git a/vel/util/situational.py b/vel/util/situational.py index 4a4cb402..4ea7140c 100644 --- a/vel/util/situational.py +++ b/vel/util/situational.py @@ -41,4 +41,3 @@ def gym_space_to_size_hint(space: gym.Space) -> SizeHints: def size_hint_from_shape(shape: typing.Tuple[int]) -> SizeHints: """ Convert tensor shape (without batch dimension) into a size hint """ return SizeHints(SizeHint(*([None] + list(shape)))) - From 7fb9375ca85cef761be90058dd3640014137e114 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 10 Oct 2019 17:08:12 -0700 Subject: [PATCH 127/162] Update to PyTorch 1.3 --- README.md | 2 +- requirements.in | 2 +- requirements.txt | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 57237231..22075ae9 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ pip install -e . ``` from the repository root directory. -This project requires Python at least 3.6 and PyTorch 1.2. +This project requires Python at least 3.6 and PyTorch 1.3. If you want to run YAML config examples, you'll also need a **project configuration file** `.velproject.yaml`. An example is included in this repository. diff --git a/requirements.in b/requirements.in index 70eebbb8..416b5ede 100644 --- a/requirements.in +++ b/requirements.in @@ -12,6 +12,6 @@ pyyaml scikit-learn torchtext torchvision -torch~=1.2 +torch~=1.3 tqdm visdom diff --git a/requirements.txt b/requirements.txt index 28dbf680..5776b12b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,8 +13,8 @@ chardet==3.0.4 # via requests cloudpickle==1.2.2 cycler==0.10.0 # via matplotlib dnspython==1.16.0 -future==0.17.1 # via pyglet -gym[atari,box2d,classic_control]==0.14.0 +future==0.18.0 # via pyglet +gym[atari,box2d,classic_control]==0.15.3 idna==2.8 # via requests importlib-metadata==0.23 # via pluggy, pytest joblib==0.14.0 # via scikit-learn @@ -42,10 +42,10 @@ requests==2.22.0 # via torchtext, visdom scikit-learn==0.21.3 scipy==1.3.1 # via gym, scikit-learn, visdom six==1.12.0 # via atari-py, cycler, gym, packaging, python-dateutil, torchtext, torchvision, visdom, websocket-client -torch==1.2.0 +torch==1.3.0 torchfile==0.1.0 # via visdom torchtext==0.4.0 -torchvision==0.4.0 +torchvision==0.4.1 tornado==6.0.3 # via visdom tqdm==4.36.1 urllib3==1.25.6 # via requests From 9f36e733ded91ec911ba0e3af8d2d1b876633f90 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Mon, 14 Oct 2019 11:04:41 -0700 Subject: [PATCH 128/162] Another refactoring of remaining examples. --- .../cats_vs_dogs_resnet34.yaml | 5 +- .../classification/imdb_sentiment_gru.yaml | 58 ++++-- examples-configs/rl/mujoco/mujoco_a2c.yaml | 2 +- examples-configs/rl/mujoco/mujoco_ddpg.yaml | 14 +- examples-configs/rl/mujoco/mujoco_ppo.yaml | 2 +- examples-configs/rl/mujoco/mujoco_trpo.yaml | 3 +- vel/api/optimizer.py | 88 +++++++-- vel/api/vmodule.py | 4 + vel/data/bucket_loader.py | 5 + vel/data/source/nlp/imdb.py | 18 +- vel/model/imagenet/resnet34.py | 27 ++- vel/model/nlp/language_model.py | 5 +- vel/model/nlp/sequence_classification.py | 91 ++++++++++ .../multilayer_rnn_sequence_classification.py | 167 ------------------ ...observations.py => normalize_expanding.py} | 4 +- vel/module/rnn_layer.py | 159 ++++++++--------- vel/net/layer/arch/parallel.py | 42 +++-- vel/net/layer/dropout.py | 21 +-- vel/net/layer/input/image_to_tensor.py | 18 +- vel/net/layer/input/normalize.py | 49 ----- vel/net/layer/input/normalize_expanding.py | 50 ++++++ vel/net/layer/mlp.py | 35 ++-- vel/net/layer/nlp/alphabet_embedding.py | 21 ++- vel/net/layer/nlp/alphabet_one_hot_encode.py | 22 ++- vel/net/layer/nlp/pretrained_embedding.py | 61 +++++++ vel/net/layer/nlp/select_final_features.py | 65 +++++++ vel/net/layer/rnn.py | 41 +++-- vel/net/layer/util/concat.py | 17 +- vel/net/layer/util/repeat.py | 16 +- vel/net/layer_base.py | 97 +++++++++- vel/net/modular.py | 43 +++-- vel/net/sequence.py | 76 ++++++++ vel/optimizer/adadelta.py | 26 +-- vel/optimizer/adam.py | 28 +-- vel/optimizer/radam.py | 28 +-- vel/optimizer/ranger.py | 69 +------- vel/optimizer/rmsprop.py | 28 +-- vel/optimizer/rmsprop_tf.py | 28 +-- vel/optimizer/sgd.py | 33 +--- vel/rl/layer/double_nature_cnn.py | 15 +- vel/rl/layer/double_noisy_nature_cnn.py | 16 +- vel/{model/rnn => rl/layer/input}/__init__.py | 0 vel/rl/layer/nature_cnn.py | 15 +- vel/rl/layer/nature_cnn_small.py | 15 +- vel/rl/layer/rnn_cell.py | 16 +- vel/rl/module/actor_critic_policy.py | 10 ++ vel/rl/policy/ddpg.py | 3 +- vel/rl/vecenv/dummy.py | 4 +- vel/train/phase/cycle.py | 7 + vel/train/phase/freeze.py | 8 +- vel/util/module_util.py | 5 + 51 files changed, 984 insertions(+), 696 deletions(-) create mode 100644 vel/model/nlp/sequence_classification.py delete mode 100644 vel/model/rnn/multilayer_rnn_sequence_classification.py rename vel/module/input/{normalize_observations.py => normalize_expanding.py} (93%) delete mode 100644 vel/net/layer/input/normalize.py create mode 100644 vel/net/layer/input/normalize_expanding.py create mode 100644 vel/net/layer/nlp/pretrained_embedding.py create mode 100644 vel/net/layer/nlp/select_final_features.py create mode 100644 vel/net/sequence.py rename vel/{model/rnn => rl/layer/input}/__init__.py (100%) diff --git a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml b/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml index a53623fb..da2f8b8f 100644 --- a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml +++ b/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml @@ -67,6 +67,7 @@ commands: name: vel.command.phase_train_command phases: - name: vel.train.phase.freeze + groups: ['top', 'mid'] - name: vel.train.phase.cycle init_lr: 0.001 init_iter: 20 @@ -80,8 +81,8 @@ commands: init_lr: 0.001 init_iter: 20 - max_lr: [1.0e-4, 1.0e-3, 1.0e-2] - min_lr: [0.0, 0.0, 0.0] + max_lr: {"top": 1.0e-4, "mid": 1.0e-3, "bottom": 1.0e-2} + min_lr: {"top": 0.0, "mid": 0.0, "bottom": 0.0} interpolate: 'cosine' cycles: 3 cycle_len: 1 diff --git a/examples-configs/nlp/classification/imdb_sentiment_gru.yaml b/examples-configs/nlp/classification/imdb_sentiment_gru.yaml index 3e85dac8..cb3c9e2a 100644 --- a/examples-configs/nlp/classification/imdb_sentiment_gru.yaml +++ b/examples-configs/nlp/classification/imdb_sentiment_gru.yaml @@ -3,7 +3,7 @@ name: 'imdb_sentiment_gru' source: name: vel.data.source.nlp.imdb - vectors: "glove.6B.100d" # precomputed 100-dimensional embeddings + vocab_size: 25_000 loader: @@ -11,37 +11,57 @@ loader: batch_size: 32 - model: - name: vel.model.rnn.multilayer_rnn_sequence_classification + name: vel.model.nlp.sequence_classification + output_dim: 2 # Positive or negative sentiment - input_block: - name: vel.module.input.embedding - alphabet_size: 25_002 # Size of the alphabet + net: + name: vel.net.modular + layers: + - name: vel.net.layer.nlp.pretrained_embedding + group: "embedding" + vectors: "glove.6B.100d" # precomputed 100-dimensional embeddings - output_dim: 100 # Embedding dimension + - name: vel.net.layer.rnn + group: "rnn" + hidden_size: 256 + rnn_type: 'gru' + bidirectional: true - pretrained: True -# frozen: True + - name: vel.net.layer.dropout + p: 0.3 - rnn_type: 'gru' - rnn_layers: [256, 128] - rnn_dropout: 0.5 - bidirectional: True + - name: vel.net.layer.rnn + group: "rnn" + hidden_size: 128 + rnn_type: 'gru' + bidirectional: true - linear_layers: [64] - linear_dropout: 0.2 + # For sequence classification we only want outputs for the last hidden state + - name: vel.net.layer.nlp.select_final_features + bidirectional: true - output_dim: 2 # Positive or negative sentiment + - name: vel.net.layer.dropout + p: 0.3 + + - name: vel.net.layer.mlp + hidden_layers: [64] + activation: 'relu' + + - name: vel.net.layer.dropout + p: 0.2 optimizer: name: vel.optimizer.adam - lr: [1.0e-4, 1.0e-3, 1.0e-2, 1.0e-2] - weight_decay: [0.0, 0.0001, 0.001, 0.001] + lr: 1.0e-2 + weight_decay: 0.001 epsilon: 1.0e-5 betas: [0.7, 0.99] - layer_groups: true + + parameter_groups: + embedding: {"lr": 1.0e-4, "weight_decay": 0.0} + rnn: {"lr": 1.0e-3, "weight_decay": 0.0001} commands: diff --git a/examples-configs/rl/mujoco/mujoco_a2c.yaml b/examples-configs/rl/mujoco/mujoco_a2c.yaml index 504871e1..9f89431a 100644 --- a/examples-configs/rl/mujoco/mujoco_a2c.yaml +++ b/examples-configs/rl/mujoco/mujoco_a2c.yaml @@ -22,7 +22,7 @@ model: net: name: vel.net.modular layers: - - name: vel.net.layer.input.normalize + - name: vel.net.layer.input.normalize_expanding - name: vel.net.layer.mlp hidden_layers: [64, 64] activation: 'tanh' diff --git a/examples-configs/rl/mujoco/mujoco_ddpg.yaml b/examples-configs/rl/mujoco/mujoco_ddpg.yaml index d82f0baf..f75ef7e3 100644 --- a/examples-configs/rl/mujoco/mujoco_ddpg.yaml +++ b/examples-configs/rl/mujoco/mujoco_ddpg.yaml @@ -1,5 +1,6 @@ name: 'mujoco_ddpg' + env: name: vel.rl.env.mujoco game: !param game = 'Reacher-v2' @@ -20,10 +21,11 @@ model: input_net: name: vel.net.modular layers: - - name: vel.net.layer.input.normalize + - name: vel.net.layer.input.normalize_expanding actor_net: name: vel.net.modular + group: 'actor' layers: - name: vel.net.layer.mlp hidden_layers: [64, 64] @@ -31,6 +33,7 @@ model: critic_net: name: vel.net.modular + group: 'critic' layers: - name: vel.net.layer.util.concat # Concatenate observation and action - name: vel.net.layer.mlp @@ -63,9 +66,14 @@ optimizer: name: vel.optimizer.adam # OpenAI has two different optimizers optimizing each network separately. # As far as I know it should be equivalent to optimizing two separate networks together with a sum of loss functions - lr: [1.0e-4, 1.0e-3] - weight_decay: [0.0, 0.0] + lr: 1.0e-3 + weight_decay: 0.0 epsilon: 1.0e-4 + parameter_groups: + actor: + lr: 1.0e-4 + critic: + lr: 1.0e-3 commands: diff --git a/examples-configs/rl/mujoco/mujoco_ppo.yaml b/examples-configs/rl/mujoco/mujoco_ppo.yaml index 63de365a..975eabf6 100644 --- a/examples-configs/rl/mujoco/mujoco_ppo.yaml +++ b/examples-configs/rl/mujoco/mujoco_ppo.yaml @@ -25,7 +25,7 @@ model: net: name: vel.net.modular layers: - - name: vel.net.layer.input.normalize + - name: vel.net.layer.input.normalize_expanding - name: vel.net.layer.util.repeat times: 2 # Need to repeat output twice, to consume by the 'parallel' layers - name: vel.net.layer.arch.parallel diff --git a/examples-configs/rl/mujoco/mujoco_trpo.yaml b/examples-configs/rl/mujoco/mujoco_trpo.yaml index 743877f1..f88fc5ba 100644 --- a/examples-configs/rl/mujoco/mujoco_trpo.yaml +++ b/examples-configs/rl/mujoco/mujoco_trpo.yaml @@ -1,5 +1,6 @@ name: 'mujoco_trpo' + env: name: vel.rl.env.mujoco game: !param game = 'Reacher-v2' @@ -27,7 +28,7 @@ model: input_net: name: vel.net.modular layers: - - name: vel.net.layer.input.normalize + - name: vel.net.layer.input.normalize_expanding policy_net: name: vel.net.modular diff --git a/vel/api/optimizer.py b/vel/api/optimizer.py index e9f97f83..e5d8a50c 100644 --- a/vel/api/optimizer.py +++ b/vel/api/optimizer.py @@ -49,22 +49,39 @@ def create_scheduler(self, scheduler_factory: SchedulerFactory, last_epoch: int class VelOptimizerProxy(VelOptimizer): """ Proxy PyTorch optimizer into a Vel optimizer """ - def __init__(self, optimizer: Optimizer, max_grad_norm: typing.Optional[float] = None): + def __init__(self, optimizer: Optimizer, group_names: [str], max_grad_norm: typing.Optional[float] = None): self.optimizer = optimizer + self.group_names = group_names self.max_grad_norm = max_grad_norm + if 'default' in self.group_names: + self.main_idx = self.group_names.index('default') + else: + self.main_idx = len(self.group_names) - 1 + + assert len(self.optimizer.param_groups) == len(self.group_names), \ + "There must be equal number of parameter groups and group names" + + self.initial_lrs = [x['lr'] for x in self.optimizer.param_groups] + def get_lr(self) -> float: """ Return current learning rate of the optimizer """ - return self.optimizer.param_groups[-1]['lr'] + return self.optimizer.param_groups[self.main_idx]['lr'] def set_lr(self, lr: float): """ Set current learning rate of the optimizer """ if isinstance(lr, list): for group_lr, param_group in zip(lr, self.optimizer.param_groups): param_group['lr'] = group_lr + elif isinstance(lr, dict): + for idx, name in enumerate(self.group_names): + self.optimizer.param_groups[idx]['lr'] = lr[name] else: - for param_group in self.optimizer.param_groups: - param_group['lr'] = lr + canonical_lr = self.initial_lrs[0] + + for idx, param_group in enumerate(self.optimizer.param_groups): + opt_lr = self.initial_lrs[idx] / canonical_lr * lr + param_group['lr'] = opt_lr def state_dict(self) -> dict: return self.optimizer.state_dict() @@ -112,7 +129,10 @@ def __init__(self, optimizers: typing.Dict[str, VelOptimizer], canonical_name: t self.optimizers = optimizers # Canonical, chosen optimizer - self.canonical_name = list(optimizers.keys())[0] + if canonical_name is None: + self.canonical_name = list(optimizers.keys())[0] + else: + self.canonical_name = canonical_name self.initial_lrs = { name: optimizer.get_lr() @@ -126,11 +146,18 @@ def get_lr(self) -> float: return self.optimizers[self.canonical_name].get_lr() def set_lr(self, lr: float): - canonical_lr = self.initial_lrs[self.canonical_name] + if isinstance(lr, list): + # TODO: implement + raise NotImplementedError + elif isinstance(lr, dict): + # TODO: implement + raise NotImplementedError + else: + canonical_lr = self.initial_lrs[self.canonical_name] - for name, optimizer in self.optimizers.items(): - opt_lr = self.initial_lrs[name] / canonical_lr * lr - optimizer.set_lr(opt_lr) + for name, optimizer in self.optimizers.items(): + opt_lr = self.initial_lrs[name] / canonical_lr * lr + optimizer.set_lr(opt_lr) def state_dict(self) -> dict: output = {} @@ -173,11 +200,48 @@ def metrics(self) -> list: class OptimizerFactory: """ Base class for optimizer factories """ + def __init__(self): + self.parameter_groups = None - def instantiate(self, parameters) -> VelOptimizer: - raise NotImplementedError + def with_parameter_groups(self, parameter_groups=None): + """ Set `parameter_groups` for this factory """ + self.parameter_groups = parameter_groups + return self - def instantiate_parameter_groups(self, parameters) -> VelOptimizer: + def preprocess(self, parameters): + """ Preprocess given parameters input into proper optimizer parameter groups, with their names """ + parameters = list(parameters) + + # Make sure parameters have right format + if parameters: + if not isinstance(parameters[0], collections.Sequence) or not isinstance(parameters[0][0], str): + parameters = [("default", parameters)] + + groups = collections.defaultdict(list) + + for name, group in parameters: + group = [x for x in group if x.requires_grad] + if group: # Must have at least 1 element + groups[name].extend(group) + + group_names = [] + sorted_groups = [] + + for name in sorted(groups.keys()): + parameter_group = { + 'params': groups[name] + } + + if self.parameter_groups and name in self.parameter_groups: + parameter_group.update(self.parameter_groups[name]) + + sorted_groups.append(parameter_group) + group_names.append(name) + + return sorted_groups, group_names + + def instantiate(self, parameters) -> VelOptimizer: + """ Instantiate VelOptimizer for iterable of parameters or iterable of (parameter, group) """ raise NotImplementedError def instantiate_multi(self, parameter_dict: dict) -> VelMultiOptimizer: diff --git a/vel/api/vmodule.py b/vel/api/vmodule.py index 1374258e..4c6d9c50 100644 --- a/vel/api/vmodule.py +++ b/vel/api/vmodule.py @@ -19,6 +19,10 @@ def zero_state(self, batch_size): """ Potential state for the model """ return None + def grouped_parameters(self): + """ Return iterable of parameters (group, parameters) """ + return [("default", self.parameters())] + class BackboneModule(VModule): """ Network, whose output feeds into other models. Needs to provide size hints. """ diff --git a/vel/data/bucket_loader.py b/vel/data/bucket_loader.py index 46f70a4b..740d83c1 100644 --- a/vel/data/bucket_loader.py +++ b/vel/data/bucket_loader.py @@ -58,6 +58,11 @@ def size(self): """ Get a dict of sizes of each loader """ return self._loader_sizes + @property + def alphabet_size(self): + """ Size of the text alphabet """ + return self.source.metadata.get('alphabet_size', 0) + def create(model_config: ModelConfig, source: LanguageSource, batch_size: int): """ Vel factory function """ diff --git a/vel/data/source/nlp/imdb.py b/vel/data/source/nlp/imdb.py index 71168756..badf59c7 100644 --- a/vel/data/source/nlp/imdb.py +++ b/vel/data/source/nlp/imdb.py @@ -45,7 +45,7 @@ def __init__(self, path, text_field, label_field, **kwargs): data.Dataset.__init__(self, examples, fields, **kwargs) -def create(model_config, data_dir='imdb', vectors=None): +def create(model_config, vocab_size: int, data_dir='imdb', vectors=None): """ Create an IMDB dataset """ path = model_config.data_dir(data_dir) @@ -58,7 +58,7 @@ def create(model_config, data_dir='imdb', vectors=None): label_field=label_field ) - text_field.build_vocab(train_source, max_size=25_000, vectors=vectors) + text_field.build_vocab(train_source, max_size=vocab_size, vectors=vectors) label_field.build_vocab(train_source) return LanguageSource( @@ -68,16 +68,8 @@ def create(model_config, data_dir='imdb', vectors=None): mapping={ 'x': 'text', 'y': 'label' + }, + metadata={ + 'alphabet_size': vocab_size+2 } ) - - # train_iterator, test_iterator = data.BucketIterator.splits( - # (train_source, test_source), - # batch_size=batch_size, - # device=model_config.torch_device(), - # shuffle=True - # ) - - # return SupervisedTextData( - # train_source, test_source, train_iterator, test_iterator, text_field, label_field - # ) diff --git a/vel/model/imagenet/resnet34.py b/vel/model/imagenet/resnet34.py index 3537a226..6139a44d 100644 --- a/vel/model/imagenet/resnet34.py +++ b/vel/model/imagenet/resnet34.py @@ -63,14 +63,16 @@ def __init__(self, fc_layers=None, dropout=None, pretrained=True): self.model = final_model - def freeze(self, number=None): + def freeze(self, groups=None): """ Freeze given number of layers in the model """ - if number is None: - number = self.head_layers + layer_groups = dict(self.layer_groups()) - for idx, child in enumerate(self.model.children()): - if idx < number: - mu.freeze_layer(child) + if groups is None: + groups = layer_groups.keys() + + for group in groups: + for module in layer_groups[group]: + mu.freeze_layer(module) def unfreeze(self): """ Unfreeze model layers """ @@ -82,11 +84,18 @@ def layer_groups(self): g1 = list(self.model[:self.group_cut_layers[0]]) g2 = list(self.model[self.group_cut_layers[0]:self.group_cut_layers[1]]) g3 = list(self.model[self.group_cut_layers[1]:]) - return [g1, g2, g3] + + return [ + ('top', g1), + ('mid', g2), + ('bottom', g3) + ] + + def parameter_groups(self): + return [(name, mu.module_list_to_param_list(m)) for name, m in self.layer_groups()] def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: - parameters = mu.to_parameter_groups(self.layer_groups()) - return optimizer_factory.instantiate_parameter_groups(parameters) + return optimizer_factory.instantiate(self.parameter_groups()) def forward(self, x): """ Calculate model value """ diff --git a/vel/model/nlp/language_model.py b/vel/model/nlp/language_model.py index e27835b9..def34db5 100644 --- a/vel/model/nlp/language_model.py +++ b/vel/model/nlp/language_model.py @@ -11,7 +11,6 @@ class LanguageModel(LossFunctionModel): def __init__(self, alphabet_size: int, net: BackboneModule): super().__init__() - self.net = net self.alphabet_size = alphabet_size self.output_dim = self.alphabet_size + 1 @@ -43,11 +42,11 @@ def forward(self, input_data: torch.Tensor, state=None) -> torch.Tensor: """ if self.net.is_stateful: output, new_state = self.net(input_data, state=state) + return F.log_softmax(self.output_layer(output), dim=-1), new_state else: output = self.net(input_data) - new_state = state + return F.log_softmax(self.output_layer(output), dim=-1) - return F.log_softmax(self.output_layer(output), dim=-1), new_state def loss_value(self, x_data, y_true, y_pred) -> torch.tensor: """ Calculate a value of loss function """ diff --git a/vel/model/nlp/sequence_classification.py b/vel/model/nlp/sequence_classification.py new file mode 100644 index 00000000..1aa6bd3e --- /dev/null +++ b/vel/model/nlp/sequence_classification.py @@ -0,0 +1,91 @@ +import itertools as it + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from vel.api import ( + LossFunctionModel, ModuleFactory, VModule, BackboneModule, SizeHints, SizeHint, OptimizerFactory, + VelOptimizer +) +from vel.metric.accuracy import Accuracy +from vel.metric.loss_metric import Loss + + +class SequenceClassification(LossFunctionModel): + """ NLP (text) sequence classification """ + + def __init__(self, net: BackboneModule, output_size: int): + super().__init__() + + self.net = net + self.output_layer = nn.Linear( + in_features=self.net.size_hints().assert_single().last(), + out_features=output_size + ) + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return self.net.is_stateful + + def zero_state(self, batch_size): + """ Potential state for the model """ + return self.net.zero_state(batch_size) + + def forward(self, input_data: torch.Tensor, state=None) -> torch.Tensor: + r"""Defines the computation performed at every call. + + Should be overridden by all subclasses. + + .. note:: + Although the recipe for forward pass needs to be defined within + this function, one should call the :class:`Module` instance afterwards + instead of this since the former takes care of running the + registered hooks while the latter silently ignores them. + """ + if self.net.is_stateful: + output, new_state = self.net(input_data, state=state) + output = F.log_softmax(self.output_layer(output), dim=-1) + return output, new_state + else: + output = self.net(input_data) + output = F.log_softmax(self.output_layer(output), dim=-1) + return output + + def loss_value(self, x_data, y_true, y_pred) -> torch.tensor: + """ Calculate a value of loss function """ + return F.nll_loss(y_pred, y_true) + + def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: + grouped = self.net.grouped_parameters() + parameters = it.chain(grouped, [("output", self.output_layer.parameters())]) + return optimizer_factory.instantiate(parameters) + + def metrics(self) -> list: + """ Set of metrics for this model """ + return [Loss(), Accuracy()] + + +class SequenceClassificationFactory(ModuleFactory): + def __init__(self, net_factory: ModuleFactory, alphabet_size: int, output_dim: int): + self.net_factory = net_factory + self.output_dim = output_dim + self.alphabet_size = alphabet_size + + def instantiate(self, **extra_args) -> VModule: + size_hint = SizeHints(SizeHint(None, None)) + net = self.net_factory.instantiate(alphabet_size=self.alphabet_size, size_hint=size_hint) + + return SequenceClassification( + net=net, output_size=self.output_dim + ) + + +def create(loader, net: ModuleFactory, output_dim: int): + """ Vel factory function """ + return SequenceClassificationFactory( + net_factory=net, + alphabet_size=loader.alphabet_size, + output_dim=output_dim + ) diff --git a/vel/model/rnn/multilayer_rnn_sequence_classification.py b/vel/model/rnn/multilayer_rnn_sequence_classification.py deleted file mode 100644 index 82953b5b..00000000 --- a/vel/model/rnn/multilayer_rnn_sequence_classification.py +++ /dev/null @@ -1,167 +0,0 @@ -import typing - -import torch -import torch.nn.functional as F -import torch.nn as nn - -import vel.util.module_util as mu - -from vel.api import LossFunctionModel, ModuleFactory, LinearBackboneModel, OptimizerFactory, VelOptimizer -from vel.metric.accuracy import Accuracy -from vel.metric.loss_metric import Loss -from vel.module.rnn_layer import RnnLayer - - -class MultilayerRnnSequenceClassification(LossFunctionModel): - """ Multilayer RNN network for sequence modeling (n:1) """ - - def __init__(self, input_block: LinearBackboneModel, rnn_type: str, output_dim: int, - rnn_layers: typing.List[int], rnn_dropout: float = 0.0, bidirectional: bool = False, - linear_layers: typing.List[int] = None, linear_dropout: float = 0.0): - super().__init__() - - self.output_dim = output_dim - - self.rnn_layers_sizes = rnn_layers - self.rnn_dropout = rnn_dropout - self.linear_layers_sizes = linear_layers - self.linear_dropout = linear_dropout - - self.bidirectional = bidirectional - self.input_block = input_block - - current_dim = self.input_block.output_dim - - self.rnn_layers = [] - self.rnn_dropout_layers = [] - - bidirectional_multiplier = 1 - - for idx, current_layer in enumerate(rnn_layers, 1): - rnn = RnnLayer( - input_size=current_dim * bidirectional_multiplier, - hidden_size=current_layer, - rnn_type=rnn_type, - bidirectional=bidirectional, - ) - - self.add_module('{}{:02}'.format(rnn_type, idx), rnn) - self.rnn_layers.append(rnn) - - if self.rnn_dropout > 0.0: - dropout_layer = nn.Dropout(p=self.rnn_dropout) - - self.add_module('rnn_dropout{:02}'.format(idx), dropout_layer) - self.rnn_dropout_layers.append(dropout_layer) - - current_dim = current_layer - - if self.bidirectional: - bidirectional_multiplier = 2 - else: - bidirectional_multiplier = 1 - - self.linear_layers = [] - self.linear_dropout_layers = [] - - for idx, current_layer in enumerate(linear_layers, 1): - linear_layer = nn.Linear(current_dim * bidirectional_multiplier, current_layer) - - self.add_module('linear{:02}'.format(idx), linear_layer) - self.linear_layers.append(linear_layer) - - if self.linear_dropout > 0.0: - dropout_layer = nn.Dropout(p=self.linear_dropout) - - self.add_module('linear_dropout{:02}'.format(idx), dropout_layer) - self.linear_dropout_layers.append(dropout_layer) - - bidirectional_multiplier = 1 - current_dim = current_layer - - if self.bidirectional: - self.output_layer = nn.Linear(bidirectional_multiplier * current_dim, output_dim) - else: - self.output_layer = nn.Linear(current_dim, output_dim) - - self.output_activation = nn.LogSoftmax(dim=1) - - def reset_weights(self): - self.input_block.reset_weights() - - for layer in self.linear_layers: - nn.init.kaiming_normal_(layer.weight, nonlinearity='relu') - nn.init.zeros_(layer.bias) - - nn.init.kaiming_normal_(self.output_layer.weight, nonlinearity='relu') - nn.init.zeros_(self.output_layer.bias) - - def forward(self, sequence): - """ Forward propagate batch of sequences through the network, without accounting for the state """ - data = self.input_block(sequence) - - for idx in range(len(self.rnn_layers)): - data, _ = self.rnn_layers[idx](data) - - if self.rnn_dropout_layers: - data = self.rnn_dropout_layers[idx](data) - - # We are interested only in the last element of the sequence - if self.bidirectional: - last_hidden_size = self.rnn_layers_sizes[-1] - data = torch.cat([data[:, -1, :last_hidden_size], data[:, 0, last_hidden_size:]], dim=1) - else: - data = data[:, -1] - - for idx in range(len(self.linear_layers_sizes)): - data = F.relu(self.linear_layers[idx](data)) - - if self.linear_dropout_layers: - data = self.linear_dropout_layers[idx](data) - - data = self.output_layer(data) - - return self.output_activation(data) - - def layer_groups(self): - return [ - self.input_block, - self.rnn_layers, - self.linear_layers, - self.output_layer - ] - - def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: - """ Create optimizer for the purpose of optimizing this model """ - parameters = mu.to_parameter_groups(self.layer_groups()) - return optimizer_factory.instantiate_parameter_groups(parameters) - - @property - def state_dim(self) -> int: - """ Dimension of model state """ - return sum(x.state_dim for x in self.gru_layers) - - def loss_value(self, x_data, y_true, y_pred): - """ Calculate a value of loss function """ - return F.nll_loss(y_pred, y_true) - - def metrics(self) -> list: - """ Set of metrics for this model """ - return [Loss(), Accuracy()] - - -def create(input_block: ModuleFactory, rnn_type: str, output_dim: int, - rnn_layers: typing.List[int], rnn_dropout: float = 0.0, bidirectional: bool = False, - linear_layers: typing.List[int] = None, linear_dropout: float = 0.0): - """ Vel factory function """ - if linear_layers is None: - linear_layers = [] - - def instantiate(**_): - return MultilayerRnnSequenceClassification( - input_block=input_block.instantiate(), rnn_type=rnn_type, output_dim=output_dim, - rnn_layers=rnn_layers, rnn_dropout=rnn_dropout, bidirectional=bidirectional, - linear_layers=linear_layers, linear_dropout=linear_dropout - ) - - return ModuleFactory.generic(instantiate) diff --git a/vel/module/input/normalize_observations.py b/vel/module/input/normalize_expanding.py similarity index 93% rename from vel/module/input/normalize_observations.py rename to vel/module/input/normalize_expanding.py index a7dca4be..b649594f 100644 --- a/vel/module/input/normalize_observations.py +++ b/vel/module/input/normalize_expanding.py @@ -3,8 +3,8 @@ from vel.api import VModule -class NormalizeObservations(VModule): - """ Normalize a vector of observations """ +class NormalizeExpanding(VModule): + """ Normalize a vector of observations - across the batch dim """ def __init__(self, input_shape, epsilon=1e-6): super().__init__() diff --git a/vel/module/rnn_layer.py b/vel/module/rnn_layer.py index 0c5b2c85..1e7a8fa4 100644 --- a/vel/module/rnn_layer.py +++ b/vel/module/rnn_layer.py @@ -1,79 +1,80 @@ -import torch -import torch.nn as nn -import torch.nn.init as init - - -from vel.api import LinearBackboneModel - - -class RnnLayer(LinearBackboneModel): - """ Generalization of RNN layer (Simple RNN, LSTM or GRU) """ - - def __init__(self, input_size, hidden_size, rnn_type, bias=True, bidirectional=False, nonlinearity='tanh'): - super().__init__() - - assert rnn_type in {'rnn', 'lstm', 'gru'}, "RNN type {} is not supported".format(rnn_type) - - self.input_size = input_size - self.hidden_size = hidden_size - self.rnn_type = rnn_type - self.bidirectional = bidirectional - - if self.rnn_type == 'rnn': - self.rnn_cell = nn.RNN( - input_size=input_size, hidden_size=hidden_size, bias=bias, nonlinearity=nonlinearity, - bidirectional=bidirectional, batch_first=True - ) - elif self.rnn_type == 'lstm': - self.rnn_cell = nn.LSTM( - input_size=input_size, hidden_size=hidden_size, bias=bias, - bidirectional=bidirectional, batch_first=True - ) - elif self.rnn_type == 'gru': - self.rnn_cell = nn.GRU( - input_size=input_size, hidden_size=hidden_size, bias=bias, - bidirectional=bidirectional, batch_first=True - ) - - def reset_weights(self): - init.xavier_normal_(self.rnn_cell.weight_hh) - init.xavier_normal_(self.rnn_cell.weight_ih) - init.zeros_(self.rnn_cell.bias_ih) - init.zeros_(self.rnn_cell.bias_hh) - - @property - def output_dim(self) -> int: - """ Final dimension of model output """ - if self.bidirectional: - return 2.0 * self.hidden_size - else: - return self.hidden_size - - @property - def state_dim(self) -> int: - """ Dimension of model state """ - if self.rnn_type == 'lstm': - return 2 * self.hidden_size - else: - return self.hidden_size - - def zero_state(self, batch_size): - """ State for the model """ - return torch.zeros(batch_size, self.state_dim) - - def forward(self, input_data, state=None): - if state is None: - if self.bidirectional: - state = self.zero_state(input_data.size(0)).unsqueeze(0).repeat(2, 1, 1).to(input_data.device) - else: - state = self.zero_state(input_data.size(0)).unsqueeze(0).to(input_data.device) - - if self.rnn_type == 'lstm': - hidden_state, cell_state = torch.split(state, self.hidden_size, 2) - hidden_state = hidden_state.contiguous() - cell_state = cell_state.contiguous() - output, (hidden_state, cell_state) = self.rnn_cell(input_data, (hidden_state, cell_state)) - new_state = torch.cat([hidden_state, cell_state], dim=2) - return output, new_state - else: - return self.rnn_cell(input_data, state) +# Temporarily commented out as it's an invalid code at the moment, may be deleted later +# import torch +# import torch.nn as nn +# import torch.nn.init as init +# +# +# from vel.api import LinearBackboneModel +# +# +# class RnnLayer(LinearBackboneModel): +# """ Generalization of RNN layer (Simple RNN, LSTM or GRU) """ +# +# def __init__(self, input_size, hidden_size, rnn_type, bias=True, bidirectional=False, nonlinearity='tanh'): +# super().__init__() +# +# assert rnn_type in {'rnn', 'lstm', 'gru'}, "RNN type {} is not supported".format(rnn_type) +# +# self.input_size = input_size +# self.hidden_size = hidden_size +# self.rnn_type = rnn_type +# self.bidirectional = bidirectional +# +# if self.rnn_type == 'rnn': +# self.rnn_cell = nn.RNN( +# input_size=input_size, hidden_size=hidden_size, bias=bias, nonlinearity=nonlinearity, +# bidirectional=bidirectional, batch_first=True +# ) +# elif self.rnn_type == 'lstm': +# self.rnn_cell = nn.LSTM( +# input_size=input_size, hidden_size=hidden_size, bias=bias, +# bidirectional=bidirectional, batch_first=True +# ) +# elif self.rnn_type == 'gru': +# self.rnn_cell = nn.GRU( +# input_size=input_size, hidden_size=hidden_size, bias=bias, +# bidirectional=bidirectional, batch_first=True +# ) +# +# def reset_weights(self): +# init.xavier_normal_(self.rnn_cell.weight_hh) +# init.xavier_normal_(self.rnn_cell.weight_ih) +# init.zeros_(self.rnn_cell.bias_ih) +# init.zeros_(self.rnn_cell.bias_hh) +# +# @property +# def output_dim(self) -> int: +# """ Final dimension of model output """ +# if self.bidirectional: +# return 2.0 * self.hidden_size +# else: +# return self.hidden_size +# +# @property +# def state_dim(self) -> int: +# """ Dimension of model state """ +# if self.rnn_type == 'lstm': +# return 2 * self.hidden_size +# else: +# return self.hidden_size +# +# def zero_state(self, batch_size): +# """ State for the model """ +# return torch.zeros(batch_size, self.state_dim) +# +# def forward(self, input_data, state=None): +# if state is None: +# if self.bidirectional: +# state = self.zero_state(input_data.size(0)).unsqueeze(0).repeat(2, 1, 1).to(input_data.device) +# else: +# state = self.zero_state(input_data.size(0)).unsqueeze(0).to(input_data.device) +# +# if self.rnn_type == 'lstm': +# hidden_state, cell_state = torch.split(state, self.hidden_size, 2) +# hidden_state = hidden_state.contiguous() +# cell_state = cell_state.contiguous() +# output, (hidden_state, cell_state) = self.rnn_cell(input_data, (hidden_state, cell_state)) +# new_state = torch.cat([hidden_state, cell_state], dim=2) +# return output, new_state +# else: +# return self.rnn_cell(input_data, state) diff --git a/vel/net/layer/arch/parallel.py b/vel/net/layer/arch/parallel.py index c25cc0ec..cebe1e5e 100644 --- a/vel/net/layer/arch/parallel.py +++ b/vel/net/layer/arch/parallel.py @@ -1,14 +1,15 @@ import torch.nn as nn +import typing from vel.api import SizeHints -from vel.net.layer_base import LayerFactory, Layer +from vel.net.layer_base import LayerFactory, Layer, LayerInfo, LayerFactoryContext class ParallelLayer(Layer): """ Network that consists of parallel "towers" """ - def __init__(self, name: str, layers: [Layer]): - super().__init__(name) + def __init__(self, info: LayerInfo, layers: [Layer]): + super().__init__(info) self.layers = nn.ModuleList(layers) self._size_hints = SizeHints(tuple(layer.size_hints().unwrap() for layer in self.layers)) @@ -22,11 +23,16 @@ def forward(self, direct, state: dict = None, context: dict = None): results = [layer(x, state, context) for layer, x in zip(self.layers, direct)] return tuple(results) + def grouped_parameters(self) -> typing.Iterable[(str, object)]: + """ Return iterable of pairs (group, parameters) """ + raise NotImplementedError + class ParallelLayerFactory(LayerFactory): """ Factory for Parallel layer """ def __init__(self, layers: [LayerFactory]): + super().__init__() self.layers = layers @property @@ -34,22 +40,38 @@ def name_base(self) -> str: """ Base of layer name """ return "parallel" - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: + """ Create a given layer object """ hints = direct_input.assert_tuple(len(self.layers)) layers = [] + info = self.make_info(context) + for idx, (size_hint, layer_factory) in enumerate(zip(hints, self.layers)): counter = idx + 1 - local_name = "{}_{:04d}".format(layer_factory.name_base, counter) - global_name = f"{name}/{local_name}" - layer = layer_factory.instantiate(name=global_name, direct_input=SizeHints(size_hint), context=context) + child_context = LayerFactoryContext( + idx=counter, + parent_group=info.group, + parent_name=info.name, + data=context.data + ) + + layer = layer_factory.instantiate( + direct_input=SizeHints(size_hint), + context=child_context, + extra_args=extra_args + ) + layers.append(layer) - return ParallelLayer(name, layers) + return ParallelLayer( + info=info, + layers=layers + ) -def create(layers: [LayerFactory]): +def create(layers: [LayerFactory], label=None, group=None): """ Vel factory function """ - return ParallelLayerFactory(layers=layers) + return ParallelLayerFactory(layers=layers).with_given_name(label).with_given_group(group) diff --git a/vel/net/layer/dropout.py b/vel/net/layer/dropout.py index 3d6df1f9..ec68f9aa 100644 --- a/vel/net/layer/dropout.py +++ b/vel/net/layer/dropout.py @@ -1,6 +1,6 @@ import torch.nn.functional as F from vel.api import SizeHints -from vel.net.layer_base import Layer, LayerFactory +from vel.net.layer_base import Layer, LayerFactory, LayerFactoryContext, LayerInfo class DropoutLayer(Layer): @@ -11,18 +11,18 @@ class DropoutLayer(Layer): See :class:`~torch.nn.Dropout` for details. """ - def __init__(self, name: str, input_size: SizeHints, p: float): - super().__init__(name) + def __init__(self, info: LayerInfo, input_shape: SizeHints, p: float): + super().__init__(info) self.p = p - self.input_size = input_size + self.input_shape = input_shape def forward(self, direct, state: dict = None, context: dict = None): return F.dropout(direct, p=self.p, training=self.training) def size_hints(self) -> SizeHints: """ Size hints for this network """ - return self.input_size + return self.input_shape def extra_repr(self) -> str: """Set the extra representation of the module""" @@ -33,6 +33,7 @@ class DropoutLayerFactory(LayerFactory): """ Factory class for the Dropout layer """ def __init__(self, p: float): + super().__init__() self.p = p @property @@ -40,15 +41,15 @@ def name_base(self) -> str: """ Base of layer name """ return "dropout" - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: """ Create a given layer object """ return DropoutLayer( - name=name, - input_size=direct_input, + info=self.make_info(context), + input_shape=direct_input, p=self.p ) -def create(p: float): +def create(p: float, label=None, group=None): """ Vel factory function """ - return DropoutLayerFactory(p) + return DropoutLayerFactory(p).with_given_name(label).with_given_group(group) diff --git a/vel/net/layer/input/image_to_tensor.py b/vel/net/layer/input/image_to_tensor.py index 1f5adb9c..ed073f7f 100644 --- a/vel/net/layer/input/image_to_tensor.py +++ b/vel/net/layer/input/image_to_tensor.py @@ -1,6 +1,6 @@ from vel.api import SizeHints, SizeHint from vel.module.input.image_to_tensor import image_to_tensor -from vel.net.layer_base import LayerFactory, Layer +from vel.net.layer_base import LayerFactory, Layer, LayerFactoryContext, LayerInfo class ImageToTensorLayer(Layer): @@ -9,8 +9,8 @@ class ImageToTensorLayer(Layer): Flip channels to a [C, W, H] order and potentially convert 8-bit color values to floats """ - def __init__(self, name: str, shape: tuple = None): - super().__init__(name) + def __init__(self, info: LayerInfo, shape: tuple = None): + super().__init__(info) if shape is not None: assert len(shape) == 3, "Images must have three dimensions" @@ -27,6 +27,7 @@ def size_hints(self) -> SizeHints: class ImageToTensorLayerFactory(LayerFactory): def __init__(self, shape: tuple = None): + super().__init__() self.shape = shape @property @@ -34,16 +35,19 @@ def name_base(self) -> str: """ Base of layer name """ return "image_to_tensor" - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: """ Create a given layer object """ if self.shape is None: shape = direct_input.assert_single().shape() else: shape = self.shape - return ImageToTensorLayer(name=name, shape=shape) + return ImageToTensorLayer( + info=self.make_info(context), + shape=shape + ) -def create(shape: tuple = None): +def create(shape: tuple = None, label=None, group=None): """ Vel factory function """ - return ImageToTensorLayerFactory(shape=shape) + return ImageToTensorLayerFactory(shape=shape).with_given_name(label).with_given_group(group) diff --git a/vel/net/layer/input/normalize.py b/vel/net/layer/input/normalize.py deleted file mode 100644 index 91766a38..00000000 --- a/vel/net/layer/input/normalize.py +++ /dev/null @@ -1,49 +0,0 @@ -import collections.abc as abc - -from vel.api import SizeHints, SizeHint -from vel.module.input.normalize_observations import NormalizeObservations -from vel.net.layer_base import LayerFactory, Layer - - -class NormalizeLayer(Layer): - """ Layer that normalizes the inputs """ - - def __init__(self, name: str, shape): - super().__init__(name) - if not isinstance(shape, abc.Sequence): - self.shape = (shape,) - else: - self.shape = shape - - self.normalize = NormalizeObservations(input_shape=shape) - - def forward(self, direct, state: dict = None, context: dict = None): - return self.normalize(direct) - - def size_hints(self) -> SizeHints: - return SizeHints(SizeHint(*([None] + list(self.shape)))) - - -class NormalizeLayerFactory(LayerFactory): - def __init__(self, shape=None): - self.shape = shape - - @property - def name_base(self) -> str: - """ Base of layer name """ - return "image_to_tensor" - - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: - """ Create a given layer object """ - # Potential improvement here is to use either direct input or size parameter - if self.shape is None: - shape = direct_input.assert_single().shape() - else: - shape = self.shape - - return NormalizeLayer(name=name, shape=shape) - - -def create(shape=None): - """ Vel factory function """ - return NormalizeLayerFactory(shape=shape) diff --git a/vel/net/layer/input/normalize_expanding.py b/vel/net/layer/input/normalize_expanding.py new file mode 100644 index 00000000..35dc2abe --- /dev/null +++ b/vel/net/layer/input/normalize_expanding.py @@ -0,0 +1,50 @@ +from vel.api import SizeHints, SizeHint +from vel.module.input.normalize_expanding import NormalizeExpanding +from vel.net.layer_base import LayerFactory, Layer, LayerFactoryContext, LayerInfo + + +class NormalizeLayer(Layer): + """ Layer that normalizes the inputs """ + + def __init__(self, info: LayerInfo, input_shape: SizeHints): + super().__init__(info) + + self.input_shape = input_shape + + self.normalize = NormalizeExpanding( + input_shape=self.input_shape.assert_single()[1:] # Remove batch axis + ) + + def forward(self, direct, state: dict = None, context: dict = None): + return self.normalize(direct) + + def size_hints(self) -> SizeHints: + return self.input_shape + + +class NormalizeLayerFactory(LayerFactory): + def __init__(self, shape=None): + super().__init__() + self.shape = shape + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "image_to_tensor" + + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: + """ Create a given layer object """ + if self.shape is None: + input_shape = direct_input + else: + input_shape = SizeHints(SizeHint(*([None] + list(self.shape)))) + + return NormalizeLayer( + info=self.make_info(context), + input_shape=input_shape + ) + + +def create(shape=None): + """ Vel factory function """ + return NormalizeLayerFactory(shape=shape) diff --git a/vel/net/layer/mlp.py b/vel/net/layer/mlp.py index 551bf7f5..0e7d27b3 100644 --- a/vel/net/layer/mlp.py +++ b/vel/net/layer/mlp.py @@ -13,17 +13,17 @@ import vel.util.network as net_util from vel.api import SizeHints -from vel.net.layer_base import LayerFactory, Layer +from vel.net.layer_base import LayerFactory, Layer, LayerInfo, LayerFactoryContext class MLP(Layer): """ Simple Multi-Layer-Perceptron network """ - def __init__(self, name: str, input_size: SizeHints, hidden_layers: typing.List[int], activation: str = 'tanh', - normalization: typing.Optional[str] = None): - super().__init__(name) + def __init__(self, info: LayerInfo, input_shape: SizeHints, hidden_layers: typing.List[int], + activation: str = 'tanh', normalization: typing.Optional[str] = None): + super().__init__(info) - self.input_size = input_size - self.input_length = input_size.assert_single().last() + self.input_shape = input_shape + self.input_length = input_shape.assert_single().last() self.hidden_layers = hidden_layers self.activation = activation self.normalization = normalization @@ -31,18 +31,18 @@ def __init__(self, name: str, input_size: SizeHints, hidden_layers: typing.List[ layer_objects = [] layer_sizes = zip([self.input_length] + hidden_layers, hidden_layers) - for input_size, output_size in layer_sizes: - layer_objects.append(nn.Linear(input_size, output_size)) + for i_size, o_size in layer_sizes: + layer_objects.append(nn.Linear(i_size, o_size)) if self.normalization: - layer_objects.append(net_util.normalization(normalization)(output_size)) + layer_objects.append(net_util.normalization(normalization)(o_size)) layer_objects.append(net_util.activation(activation)()) self.model = nn.Sequential(*layer_objects) self.hidden_units = hidden_layers[-1] if hidden_layers else self.input_length - self.output_size = input_size.assert_single().drop_last().append(self.hidden_units) + self.output_shape = SizeHints(input_shape.assert_single().drop_last().append(self.hidden_units)) def reset_weights(self): """ Call proper initializers for the weights """ @@ -57,12 +57,13 @@ def forward(self, direct, state: dict = None, context: dict = None): def size_hints(self) -> SizeHints: """ Size hints for this network """ - return SizeHints(self.output_size) + return self.output_shape class MLPFactory(LayerFactory): def __init__(self, hidden_layers: typing.List[int], activation: str = 'tanh', normalization: typing.Optional[str] = None): + super().__init__() self.hidden_layers = hidden_layers self.activation = activation self.normalization = normalization @@ -72,17 +73,19 @@ def name_base(self) -> str: """ Base of layer name """ return "mlp" - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: """ Create a given layer object """ return MLP( - name=name, - input_size=direct_input, + info=self.make_info(context), + input_shape=direct_input, hidden_layers=self.hidden_layers, activation=self.activation, normalization=self.normalization ) -def create(hidden_layers, activation='tanh', normalization=None): +def create(hidden_layers: [int], activation='tanh', normalization=None, label=None, group=None): """ Vel factory function """ - return MLPFactory(hidden_layers=hidden_layers, activation=activation, normalization=normalization) + return MLPFactory( + hidden_layers=hidden_layers, activation=activation, normalization=normalization + ).with_given_name(label).with_given_group(group) diff --git a/vel/net/layer/nlp/alphabet_embedding.py b/vel/net/layer/nlp/alphabet_embedding.py index 2fec003a..2e61b86d 100644 --- a/vel/net/layer/nlp/alphabet_embedding.py +++ b/vel/net/layer/nlp/alphabet_embedding.py @@ -1,34 +1,36 @@ import torch.nn as nn from vel.api import SizeHints -from vel.net.layer_base import Layer, LayerFactory +from vel.net.layer_base import Layer, LayerFactory, LayerFactoryContext, LayerInfo class AlphabetEmbeddingLayer(Layer): """ Encode incoming tensor encoded using certain alphabet into one-hot encoding """ - def __init__(self, name: str, alphabet_size: int, dim: int, input_shape: SizeHints): - super().__init__(name) + def __init__(self, info: LayerInfo, alphabet_size: int, dim: int, input_shape: SizeHints): + super().__init__(info) self.alphabet_size = alphabet_size self.dim = dim - self.output_size = SizeHints(input_shape.assert_single().append(self.dim)) + self.output_shape = SizeHints(input_shape.assert_single().append(self.dim)) self.layer = nn.Embedding(self.alphabet_size + 1, self.dim) def forward(self, direct, state: dict = None, context: dict = None): + """ Forward propagation of a single layer """ return self.layer(direct) def size_hints(self) -> SizeHints: """ Size hints for this network """ - return self.output_size + return self.output_shape class AlphabetEmbeddingLayerFactory(LayerFactory): """ Factory class for the AlphabetOneHotEncode layer """ def __init__(self, dim: int): + super().__init__() self.dim = dim @property @@ -36,17 +38,18 @@ def name_base(self) -> str: """ Base of layer name """ return "alphabet_embedding" - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: + """ Create a given layer object """ alphabet_size = extra_args['alphabet_size'] return AlphabetEmbeddingLayer( - name=name, + info=self.make_info(context), alphabet_size=alphabet_size, dim=self.dim, input_shape=direct_input ) -def create(dim: int): +def create(dim: int, label=None, group=None): """ Vel factory function """ - return AlphabetEmbeddingLayerFactory(dim) + return AlphabetEmbeddingLayerFactory(dim).with_given_name(label).with_given_group(group) diff --git a/vel/net/layer/nlp/alphabet_one_hot_encode.py b/vel/net/layer/nlp/alphabet_one_hot_encode.py index b26745c5..1198369b 100644 --- a/vel/net/layer/nlp/alphabet_one_hot_encode.py +++ b/vel/net/layer/nlp/alphabet_one_hot_encode.py @@ -1,5 +1,5 @@ from vel.api import SizeHints -from vel.net.layer_base import Layer, LayerFactory +from vel.net.layer_base import Layer, LayerFactory, LayerInfo, LayerFactoryContext from vel.util.tensor_util import one_hot_encoding @@ -8,40 +8,38 @@ class AlphabetOneHotEncodeLayer(Layer): """ Encode incoming tensor encoded using certain alphabet into one-hot encoding """ - def __init__(self, name: str, alphabet_size: int, input_shape: SizeHints): - super().__init__(name) + def __init__(self, info: LayerInfo, alphabet_size: int, input_shape: SizeHints): + super().__init__(info) self.alphabet_size = alphabet_size - self.output_size = SizeHints(input_shape.assert_single().append(self.alphabet_size + 1)) + self.output_shape = SizeHints(input_shape.assert_single().append(self.alphabet_size + 1)) def forward(self, direct, state: dict = None, context: dict = None): return one_hot_encoding(direct, num_labels=self.alphabet_size + 1) def size_hints(self) -> SizeHints: """ Size hints for this network """ - return self.output_size + return self.output_shape class AlphabetOneHotEncodeLayerFactory(LayerFactory): """ Factory class for the AlphabetoneHotEncode layer """ - def __init__(self): - pass - @property def name_base(self) -> str: """ Base of layer name """ return "alphabet_one_hot_encode" - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: + """ Create a given layer object """ alphabet_size = extra_args['alphabet_size'] return AlphabetOneHotEncodeLayer( - name=name, + info=self.make_info(context), alphabet_size=alphabet_size, input_shape=direct_input ) -def create(): +def create(label=None, group=None): """ Vel factory function """ - return AlphabetOneHotEncodeLayerFactory() + return AlphabetOneHotEncodeLayerFactory().with_given_name(label).with_given_group(group) diff --git a/vel/net/layer/nlp/pretrained_embedding.py b/vel/net/layer/nlp/pretrained_embedding.py new file mode 100644 index 00000000..45350838 --- /dev/null +++ b/vel/net/layer/nlp/pretrained_embedding.py @@ -0,0 +1,61 @@ +import numpy as np + +import torch.nn as nn + +from vel.api import SizeHints, LanguageSource +from vel.net.layer_base import Layer, LayerFactory, LayerFactoryContext, LayerInfo + + +class PretrainedEmbeddingLayer(Layer): + """ Load a pretrained word embedding """ + def __init__(self, info: LayerInfo, vectors: np.ndarray, input_shape: SizeHints, freeze: bool = False): + super().__init__(info) + + self.output_shape = SizeHints(input_shape.assert_single().append(vectors.shape[1])) + + self.layer = nn.Embedding(vectors.shape[0], vectors.shape[1]) + self.layer.weight.data.copy_(vectors) + + self.freeze = freeze + + if self.freeze: + self.layer.weight.requires_grad_(False) + + def forward(self, direct, state: dict = None, context: dict = None): + return self.layer(direct) + + def size_hints(self) -> SizeHints: + """ Size hints for this network """ + return self.output_shape + + +class PretrainedEmbeddingLayerFactory(LayerFactory): + """ Load a pretrained word embedding """ + def __init__(self, source: LanguageSource, vectors: str, freeze: bool): + super().__init__() + self.vectors = vectors + self.source = source + self.freeze = freeze + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "pretrained_embedding" + + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: + vocab = self.source.fields[self.source.mapping['x']].vocab + vocab.load_vectors(self.vectors) + + return PretrainedEmbeddingLayer( + info=self.make_info(context), + vectors=vocab.vectors, + freeze=self.freeze, + input_shape=direct_input, + ) + + +def create(source: LanguageSource, vectors: str, freeze: bool = False, label=None, group=None): + """ Vel factory function """ + return PretrainedEmbeddingLayerFactory( + source, vectors, freeze=freeze + ).with_given_name(label).with_given_group(group) diff --git a/vel/net/layer/nlp/select_final_features.py b/vel/net/layer/nlp/select_final_features.py new file mode 100644 index 00000000..4b55b303 --- /dev/null +++ b/vel/net/layer/nlp/select_final_features.py @@ -0,0 +1,65 @@ +import torch + +from vel.api import SizeHints, SizeHint +from vel.net.layer_base import Layer, LayerFactory, LayerInfo, LayerFactoryContext + + +class SelectFinalFeaturesLayer(Layer): + """ + For many sequence processing tasks we only care about the output from the final element + """ + def __init__(self, info: LayerInfo, bidirectional: bool, input_shape: SizeHints): + super().__init__(info) + + self.bidirectional = bidirectional + + b, s, x = input_shape.assert_single(3) + self.output_shape = SizeHints(SizeHint(b, x)) + + def forward(self, direct, state: dict = None, context: dict = None): + if self.bidirectional: + final_shape = direct.shape[-1] + assert final_shape % 2 == 0 + half_final_shape = final_shape // 2 + + # dimensions are: batch, seq, features + # first one is from forward pass + # second one is backward pass + part1 = direct[:, -1, :half_final_shape] + part2 = direct[:, 0, half_final_shape:] + + return torch.cat([part1, part2], dim=1) + else: + return direct[:, -1, :] + + def size_hints(self) -> SizeHints: + """ Size hints for this network """ + return self.output_shape + + +class SelectFinalFeaturesLayerFactory(LayerFactory): + """ Factory for the SelectFinalFeatures layer """ + + def __init__(self, bidirectional: bool = False): + super().__init__() + self.bidirectional = bidirectional + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "select_final_features" + + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: + """ Create a given layer object """ + return SelectFinalFeaturesLayer( + info=self.make_info(context), + bidirectional=self.bidirectional, + input_shape=direct_input + ) + + +def create(bidirectional=False, label=None, group=None): + """ Vel factory function """ + return SelectFinalFeaturesLayerFactory( + bidirectional=bidirectional + ).with_given_name(label).with_given_group(group) diff --git a/vel/net/layer/rnn.py b/vel/net/layer/rnn.py index 7cbf9e13..bceb9f3d 100644 --- a/vel/net/layer/rnn.py +++ b/vel/net/layer/rnn.py @@ -3,17 +3,17 @@ import torch.nn.init as init from vel.api import SizeHints -from vel.net.layer_base import Layer, LayerFactory +from vel.net.layer_base import Layer, LayerFactory, LayerFactoryContext, LayerInfo class RnnLayer(Layer): """ Single Recurrent Layer """ - def __init__(self, name: str, input_size: SizeHints, hidden_size: int, rnn_type: str, + def __init__(self, info: LayerInfo, input_shape: SizeHints, hidden_size: int, rnn_type: str, bias: bool = True, bidirectional: bool = False, nonlinearity: str = 'tanh'): - super().__init__(name) + super().__init__(info) - self.input_size = input_size - self.input_length = input_size.assert_single().last() + self.input_shape = input_shape + self.input_length = input_shape.assert_single().last() self.hidden_size = hidden_size self.rnn_type = rnn_type @@ -37,7 +37,10 @@ def __init__(self, name: str, input_size: SizeHints, hidden_size: int, rnn_type: bidirectional=bidirectional, batch_first=True ) - self.output_size = input_size.assert_single().drop_last().append(self.hidden_size) + if self.bidirectional: + self.output_shape = SizeHints(input_shape.assert_single().drop_last().append(2 * self.hidden_size)) + else: + self.output_shape = SizeHints(input_shape.assert_single().drop_last().append(self.hidden_size)) def reset_weights(self): """ Call proper initializers for the weights """ @@ -61,27 +64,30 @@ def state_dim(self) -> int: def zero_state(self, batch_size): """ Potential state for the model """ - return {self.name: torch.zeros(batch_size, self.state_dim)} + if self.bidirectional: + return {self.global_name: torch.zeros(2, batch_size, self.state_dim)} + else: + return {self.global_name: torch.zeros(1, batch_size, self.state_dim)} def forward(self, input_data, state: dict, context: dict = None): """ Forward propagation of a single layer """ if self.rnn_type == 'lstm': - state_tensor = state[self.name].unsqueeze(0) + state_tensor = state[self.name] hidden_state, cell_state = torch.split(state_tensor, self.hidden_size, dim=2) output, (hidden_state, cell_state) = self.rnn_cell( input_data, (hidden_state.contiguous(), cell_state.contiguous()) ) new_state = torch.cat([hidden_state, cell_state], dim=2) - return output, {self.name: new_state[0]} + return output, {self.name: new_state} else: - state_tensor = state[self.name].unsqueeze(0) + state_tensor = state[self.name] output, new_state = self.rnn_cell(input_data, state_tensor) - return output, {self.name: new_state[0]} + return output, {self.name: new_state} def size_hints(self) -> SizeHints: """ Size hints for this network """ - return SizeHints(self.output_size) + return self.output_shape class RnnLayerFactory(LayerFactory): @@ -89,6 +95,7 @@ class RnnLayerFactory(LayerFactory): def __init__(self, hidden_size: int, rnn_type: str, bias: bool = True, bidirectional: bool = False, nonlinearity: str = 'tanh'): + super().__init__() self.hidden_size = hidden_size self.rnn_type = rnn_type @@ -101,11 +108,11 @@ def name_base(self) -> str: """ Base of layer name """ return "rnn" - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: """ Create instance of 'RnnLayer' """ return RnnLayer( - name=name, - input_size=direct_input, + info=self.make_info(context), + input_shape=direct_input, hidden_size=self.hidden_size, rnn_type=self.rnn_type, bias=self.bias, @@ -115,7 +122,7 @@ def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_a def create(hidden_size: int, rnn_type: str, bias: bool = True, bidirectional: bool = False, - nonlinearity: str = 'tanh'): + nonlinearity: str = 'tanh', label=None, group=None): """ Vel factory function """ return RnnLayerFactory( hidden_size=hidden_size, @@ -123,4 +130,4 @@ def create(hidden_size: int, rnn_type: str, bias: bool = True, bidirectional: bo bias=bias, bidirectional=bidirectional, nonlinearity=nonlinearity - ) + ).with_given_name(label).with_given_group(group) diff --git a/vel/net/layer/util/concat.py b/vel/net/layer/util/concat.py index 7d2d7b57..511b8c66 100644 --- a/vel/net/layer/util/concat.py +++ b/vel/net/layer/util/concat.py @@ -1,14 +1,14 @@ import torch from vel.api import SizeHints, SizeHint -from vel.net.layer_base import LayerFactory, Layer +from vel.net.layer_base import LayerFactory, Layer, LayerFactoryContext, LayerInfo class Concat(Layer): """ Repeat single tensor multiple times """ - def __init__(self, name: str, size_hints: SizeHints, axis: int = -1): - super().__init__(name) + def __init__(self, info: LayerInfo, size_hints: SizeHints, axis: int = -1): + super().__init__(info) self.axis = axis self._size_hints = size_hints @@ -21,7 +21,9 @@ def size_hints(self) -> SizeHints: class ConcatFactory(LayerFactory): + """ Factory for Concat Layer """ def __init__(self, axis: int = -1): + super().__init__() self.axis = axis @property @@ -29,7 +31,8 @@ def name_base(self) -> str: """ Base of layer name """ return "concat" - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: + """ Create a given layer object """ inputs = direct_input.assert_tuple() result = [] @@ -48,12 +51,12 @@ def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_a result.append(inputs[0][i]) return Concat( - name=name, + info=self.make_info(context), axis=self.axis, size_hints=SizeHints(SizeHint(*result)) ) -def create(axis: int = -1): +def create(axis: int = -1, label=None, group=None): """ Vel factory function """ - return ConcatFactory(axis=axis) + return ConcatFactory(axis=axis).with_given_name(label).with_given_group(group) diff --git a/vel/net/layer/util/repeat.py b/vel/net/layer/util/repeat.py index 9fda8050..8041ebde 100644 --- a/vel/net/layer/util/repeat.py +++ b/vel/net/layer/util/repeat.py @@ -1,12 +1,13 @@ from vel.api import SizeHints, SizeHint -from vel.net.layer_base import LayerFactory, Layer +from vel.net.layer_base import LayerFactory, Layer, LayerInfo, LayerFactoryContext class RepeatTensor(Layer): """ Repeat single tensor multiple times """ - def __init__(self, name: str, times: int, size_hint: SizeHint): - super().__init__(name) + def __init__(self, info: LayerInfo, times: int, size_hint: SizeHint): + super().__init__(info) + self.times = times self.size_hint = size_hint @@ -19,6 +20,7 @@ def size_hints(self) -> SizeHints: class RepeatTensorFactory(LayerFactory): def __init__(self, times: int): + super().__init__() self.times = times @property @@ -26,14 +28,14 @@ def name_base(self) -> str: """ Base of layer name """ return "repeat_tensor" - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: return RepeatTensor( - name=name, + info=self.make_info(context), times=self.times, size_hint=direct_input.assert_single() ) -def create(times: int): +def create(times: int, label=None, group=None): """ Vel factory function """ - return RepeatTensorFactory(times=times) + return RepeatTensorFactory(times=times).with_given_name(label).with_given_group(group) diff --git a/vel/net/layer_base.py b/vel/net/layer_base.py index cdc90487..591041c6 100644 --- a/vel/net/layer_base.py +++ b/vel/net/layer_base.py @@ -1,24 +1,113 @@ +import attr +import typing + from vel.api import BackboneModule, SizeHints +@attr.s(auto_attribs=True) +class LayerInfo: + """ Information about the layer """ + name: str + global_name: str + group: str + + class Layer(BackboneModule): - def __init__(self, name: str): + """ Layer class that fits into modular network framework """ + def __init__(self, info: LayerInfo): super().__init__() - self.name = name - def forward(self, direct, state: dict = None, context: dict = None): + self.info = info + + @property + def name(self) -> str: + """ Name of this layer """ + return self.info.name + + @property + def global_name(self) -> str: + """ Name of this layer - globally unique version """ + return self.info.global_name + + @property + def group(self) -> str: + """ Group of this layer """ + return self.info.group + + def forward(self, direct, state: dict, context: dict): """ Forward propagation of a single layer """ raise NotImplementedError + def grouped_parameters(self): + """ Return iterable of pairs (group, parameters) """ + return [(self.group, self.parameters())] + + +@attr.s(auto_attribs=True) +class LayerFactoryContext: + """ Context information about the layer being currently created """ + + idx: int + """ Index of this layer within parent """ + + parent_group: str + """ Group of the parent layer """ + + parent_name: typing.Optional[str] = None + """ Name of the parent - None if it's a top level layer """ + + data: dict = {} + """ Generic information potentially passed by layer in a hierarchy """ + class LayerFactory: """ Factory for layers """ + def __init__(self): + self.given_name = None + self.given_group = None + + def with_given_name(self, given_name) -> 'LayerFactory': + """ Set given name """ + self.given_name = given_name + return self + + def with_given_group(self, given_group) -> 'LayerFactory': + """ Set given group """ + self.given_group = given_group + return self + + def suggested_name(self, idx: int): + """ Reasonable layer name suggestion """ + return "{}_{:04d}".format(self.name_base, idx) + + def make_info(self, context: LayerFactoryContext) -> LayerInfo: + """ Make info for child layer """ + if self.given_name is not None: + name = self.given_name + else: + name = self.suggested_name(context.idx) + + if self.given_group is not None: + group = self.given_group + else: + group = context.parent_group + + if context.parent_name is None: + global_name = name + else: + global_name = f"{context.parent_name}/{name}" + + return LayerInfo( + name=name, + group=group, + global_name=global_name + ) @property def name_base(self) -> str: """ Base of layer name """ raise NotImplementedError - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: """ Create a given layer object """ raise NotImplementedError diff --git a/vel/net/modular.py b/vel/net/modular.py index c17fd81a..ca9efecc 100644 --- a/vel/net/modular.py +++ b/vel/net/modular.py @@ -1,10 +1,11 @@ +import itertools as it import collections import torch.nn as nn from vel.api import BackboneModule, ModuleFactory, SizeHints from vel.util.tensor_util import to_device -from .layer_base import LayerFactory +from .layer_base import LayerFactory, LayerFactoryContext class ModularSequential(nn.Module): @@ -30,19 +31,25 @@ def forward(self, direct, state: dict = None, context: dict = None): return direct -def instantiate_layers(layers: [LayerFactory], size_hint: SizeHints, extra_args: dict) -> nn.Module: +def instantiate_layers(layers: [LayerFactory], group: str, size_hint: SizeHints, extra_args: dict) -> nn.Module: """ Instantiate list of layer factories into PyTorch Module """ module_dict = collections.OrderedDict() - context = {} + context_data = {} for idx, layer_factory in enumerate(layers): counter = idx + 1 - name = "{}_{:04d}".format(layer_factory.name_base, counter) - layer = layer_factory.instantiate(name=name, direct_input=size_hint, context=context, extra_args=extra_args) + context = LayerFactoryContext( + idx=counter, + parent_group=group, + parent_name=None, + data=context_data + ) + + layer = layer_factory.instantiate(direct_input=size_hint, context=context, extra_args=extra_args) size_hint = layer.size_hints() - module_dict[name] = layer + module_dict[layer.name] = layer return ModularSequential(module_dict) @@ -79,7 +86,11 @@ def reset_state(self, state, dones): def forward(self, input_data, state=None): context = {} - return self.layers(input_data, context=context) + return self.layers(input_data, state=None, context=context) + + def grouped_parameters(self): + """ Return iterable of pairs (group, parameters) """ + return it.chain.from_iterable(l.grouped_parameters() for l in self.layers) class StatefulModularNetwork(BackboneModule): @@ -125,6 +136,7 @@ def forward(self, input_data, state=None): output_state = {} if state is None: + # input_data.device here may break. Should be fixed at some point state = to_device(self.zero_state(input_data.size(0)), input_data.device) for layer in self.layers: @@ -136,18 +148,27 @@ def forward(self, input_data, state=None): return data, output_state + def grouped_parameters(self): + """ Return iterable of pairs (group, parameters) """ + return it.chain.from_iterable(l.grouped_parameters() for l in self.layers) + class ModularNetworkFactory(ModuleFactory): """ Factory class for the modular network """ - def __init__(self, layers: [LayerFactory]): + def __init__(self, layers: [LayerFactory], group=None): self.layers = layers + if group is None: + self.group = "default" + else: + self.group = group + def instantiate(self, size_hint=None, **extra_args) -> BackboneModule: """ Create either stateful or not modular network instance """ if size_hint is None: size_hint = SizeHints() - layers = instantiate_layers(self.layers, size_hint=size_hint, extra_args=extra_args) + layers = instantiate_layers(self.layers, self.group, size_hint=size_hint, extra_args=extra_args) is_stateful = any(l.is_stateful for l in layers) if is_stateful: @@ -156,6 +177,6 @@ def instantiate(self, size_hint=None, **extra_args) -> BackboneModule: return ModularNetwork(layers) -def create(layers: [LayerFactory]): +def create(layers: [LayerFactory], group=None): """ Vel factory function """ - return ModularNetworkFactory(layers) + return ModularNetworkFactory(layers, group) diff --git a/vel/net/sequence.py b/vel/net/sequence.py new file mode 100644 index 00000000..f759051c --- /dev/null +++ b/vel/net/sequence.py @@ -0,0 +1,76 @@ +import collections +import typing + +from vel.api import BackboneModule, SizeHints +from vel.exception import VelException +from vel.util.tensor_util import to_device + + +class GenericModularSequential(BackboneModule): + """ Modification of nn.Sequential for the purpose of modular networks """ + + def __init__(self, layers: typing.Union[collections.OrderedDict, collections.Sequence]): + super().__init__() + self._layers = [] + + if isinstance(layers, collections.OrderedDict): + for key, module in layers.items(): + self.add_module(key, module) + self._layers.append(module) + elif isinstance(layers, collections.Sequence): + for idx, module in enumerate(layers): + key = str(idx) + self.add_module(key, module) + self._layers.append(module) + else: + raise VelException("Incorrectly specified layers, must be a sequence or an ordered dict") + + self._is_stateful = any(l.is_stateful() for l in self._layers) + + def size_hints(self) -> SizeHints: + return self._layers[-1].size_hints() + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return self._is_stateful + + def zero_state(self, batch_size): + """ Potential state for the model """ + zero_state = {} + + for l in self.layers: + layer_zero_state = l.zero_state(batch_size) + if layer_zero_state is not None: + zero_state.update(layer_zero_state) + + return zero_state + + def __len__(self): + return len(self._layers) + + def __getitem__(self, item): + return self._layers[item] + + def forward(self, direct, state: dict = None, context: dict = None): + if not self.is_stateful: + for layer in self._layers: + direct = layer(direct, state=state, context=context) + return direct + else: + output_state = {} + + if state is None: + # direct.device here may break. Should be fixed at some point + state = to_device(self.zero_state(direct.size(0)), direct.device) + + data = direct + + for layer in self.layers: + if layer.is_stateful: + data, new_state = layer(data, state=state, context=context) + output_state.update(new_state) + else: + data = layer(data, state=state, context=context) + + return data, output_state diff --git a/vel/optimizer/adadelta.py b/vel/optimizer/adadelta.py index e5e01f0e..108cd72e 100644 --- a/vel/optimizer/adadelta.py +++ b/vel/optimizer/adadelta.py @@ -2,8 +2,6 @@ from torch.optim.adadelta import Adadelta -import vel.util.module_util as mu - from vel.api import OptimizerFactory, VelOptimizerProxy, VelOptimizer @@ -12,6 +10,7 @@ class AdadeltaFactory(OptimizerFactory): def __init__(self, lr: float = 1.0, rho: float = 0.9, eps: float = 1e-6, weight_decay: float = 0.0, max_grad_norm: typing.Optional[float] = None): + super().__init__() self.lr = lr self.rho = rho self.eps = eps @@ -19,27 +18,16 @@ def __init__(self, lr: float = 1.0, rho: float = 0.9, eps: float = 1e-6, weight_ self.max_grad_norm = max_grad_norm def instantiate(self, parameters) -> VelOptimizer: + optimizer_params, group_names = self.preprocess(parameters) + return VelOptimizerProxy(Adadelta( - parameters, + optimizer_params, lr=self.lr, rho=self.rho, eps=self.eps, weight_decay=self.weight_decay - ), self.max_grad_norm) - - def instantiate_parameter_groups(self, out_parameters) -> VelOptimizer: - settings_dict = { - 'lr': self.lr, - 'rho': self.rho, - 'eps': self.eps, - 'weight_decay': self.weight_decay - } - - out_parameters = out_parameters.copy() - out_settings_dict = mu.optimizer_parameter_helper(out_parameters, settings_dict) - - return VelOptimizerProxy(Adadelta(out_parameters, **out_settings_dict), self.max_grad_norm) + ), group_names, self.max_grad_norm) def create(lr: float = 1.0, rho: float = 0.9, eps: float = 1e-6, weight_decay: float = 0.0, - max_grad_norm: typing.Optional[float] = None): + max_grad_norm: typing.Optional[float] = None, parameter_groups=None): """ Vel factory function """ return AdadeltaFactory( lr=lr, @@ -47,4 +35,4 @@ def create(lr: float = 1.0, rho: float = 0.9, eps: float = 1e-6, weight_decay: f eps=eps, weight_decay=weight_decay, max_grad_norm=max_grad_norm - ) + ).with_parameter_groups(parameter_groups) diff --git a/vel/optimizer/adam.py b/vel/optimizer/adam.py index 46ad3f06..8e9fb575 100644 --- a/vel/optimizer/adam.py +++ b/vel/optimizer/adam.py @@ -2,8 +2,6 @@ from torch.optim.adam import Adam -import vel.util.module_util as mu - from vel.api import OptimizerFactory, VelOptimizer, VelOptimizerProxy @@ -12,6 +10,7 @@ class AdamFactory(OptimizerFactory): def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=False, max_grad_norm: typing.Optional[float] = None): + super().__init__() self.lr = lr self.betas = betas self.eps = eps @@ -20,25 +19,16 @@ def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgra self.max_grad_norm = max_grad_norm def instantiate(self, parameters) -> VelOptimizer: + optimizer_params, group_names = self.preprocess(parameters) + return VelOptimizerProxy(Adam( - parameters, + optimizer_params, lr=self.lr, betas=self.betas, eps=self.eps, weight_decay=self.weight_decay, amsgrad=self.amsgrad - ), self.max_grad_norm) - - def instantiate_parameter_groups(self, out_parameters) -> VelOptimizer: - settings_dict = { - 'lr': self.lr, - 'eps': self.eps, - 'weight_decay': self.weight_decay, - 'amsgrad': self.amsgrad - } - - out_parameters = out_parameters.copy() - out_settings_dict = mu.optimizer_parameter_helper(out_parameters, settings_dict) - - return VelOptimizerProxy(Adam(out_parameters, betas=self.betas, **out_settings_dict), self.max_grad_norm) + ), group_names, self.max_grad_norm) -def create(lr, betas=(0.9, 0.999), weight_decay=0, epsilon=1e-8, max_grad_norm=None): +def create(lr, betas=(0.9, 0.999), weight_decay=0, epsilon=1e-8, max_grad_norm=None, parameter_groups=None): """ Vel factory function """ - return AdamFactory(lr=lr, betas=betas, weight_decay=weight_decay, eps=epsilon, max_grad_norm=max_grad_norm) + return AdamFactory( + lr=lr, betas=betas, weight_decay=weight_decay, eps=epsilon, max_grad_norm=max_grad_norm, + ).with_parameter_groups(parameter_groups) diff --git a/vel/optimizer/radam.py b/vel/optimizer/radam.py index 7abc4959..b6c6e825 100644 --- a/vel/optimizer/radam.py +++ b/vel/optimizer/radam.py @@ -1,15 +1,12 @@ """ RAdam implementation from: https://github.com/LiyuanLucasLiu/RAdam/blob/master/cifar_imagenet/utils/radam.py """ -import collections import math import torch import typing from torch.optim.optimizer import Optimizer -import vel.util.module_util as mu - from vel.api import OptimizerFactory, VelOptimizer, VelOptimizerProxy @@ -93,6 +90,7 @@ class RAdamFactory(OptimizerFactory): def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, max_grad_norm: typing.Optional[float] = None): + super().__init__() self.lr = lr self.betas = betas self.eps = eps @@ -100,24 +98,16 @@ def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, self.max_grad_norm = max_grad_norm def instantiate(self, parameters) -> VelOptimizer: + optimizer_params, group_names = self.preprocess(parameters) + return VelOptimizerProxy(RAdam( - parameters, + optimizer_params, lr=self.lr, betas=self.betas, eps=self.eps, weight_decay=self.weight_decay - ), self.max_grad_norm) - - def instantiate_parameter_groups(self, out_parameters) -> VelOptimizer: - settings_dict = { - 'lr': self.lr, - 'eps': self.eps, - 'weight_decay': self.weight_decay - } - - out_parameters = out_parameters.copy() - out_settings_dict = mu.optimizer_parameter_helper(out_parameters, settings_dict) - - return VelOptimizerProxy(RAdam(out_parameters, betas=self.betas, **out_settings_dict), self.max_grad_norm) + ), group_names, self.max_grad_norm) -def create(lr, betas=(0.9, 0.999), weight_decay=0, epsilon=1e-8, max_grad_norm=None): +def create(lr, betas=(0.9, 0.999), weight_decay=0, epsilon=1e-8, max_grad_norm=None, parameter_groups=None): """ Vel factory function """ - return RAdamFactory(lr=lr, betas=betas, weight_decay=weight_decay, eps=epsilon, max_grad_norm=max_grad_norm) + return RAdamFactory( + lr=lr, betas=betas, weight_decay=weight_decay, eps=epsilon, max_grad_norm=max_grad_norm + ).with_parameter_groups(parameter_groups) diff --git a/vel/optimizer/ranger.py b/vel/optimizer/ranger.py index 9f688ead..1c3143b6 100644 --- a/vel/optimizer/ranger.py +++ b/vel/optimizer/ranger.py @@ -7,8 +7,6 @@ from torch.optim.optimizer import Optimizer -import vel.util.module_util as mu - from vel.api import OptimizerFactory, VelOptimizer, VelOptimizerProxy @@ -154,53 +152,12 @@ def step(self, closure=None): return loss -# class RangerFactory(OptimizerFactory): -# """ RAdam optimizer factory """ -# -# def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, layer_groups=False): -# self.lr = lr -# self.betas = betas -# self.eps = eps -# self.weight_decay = weight_decay -# self.layer_groups = layer_groups -# -# def instantiate(self, model: Model) -> Ranger: -# if self.layer_groups: -# parameters = mu.to_parameter_groups(model.get_layer_groups()) -# -# if isinstance(self.lr, collections.Sequence): -# for idx, lr in enumerate(self.lr): -# parameters[idx]['lr'] = lr -# -# default_lr = self.lr[0] -# else: -# default_lr = float(self.lr) -# -# if isinstance(self.weight_decay, collections.Sequence): -# for idx, weight_decay in enumerate(self.weight_decay): -# parameters[idx]['weight_decay'] = weight_decay -# -# default_weight_decay = self.weight_decay[0] -# else: -# default_weight_decay = self.weight_decay -# -# return Ranger( -# parameters, -# lr=default_lr, betas=self.betas, eps=self.eps, weight_decay=default_weight_decay, -# ) -# else: -# parameters = filter(lambda p: p.requires_grad, model.parameters()) -# -# return Ranger( -# parameters, -# lr=self.lr, betas=self.betas, eps=self.eps, weight_decay=self.weight_decay, -# ) - class RangerFactory(OptimizerFactory): """ Adam optimizer factory """ def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, max_grad_norm: typing.Optional[float] = None): + super().__init__() self.lr = lr self.betas = betas self.eps = eps @@ -208,24 +165,16 @@ def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, self.max_grad_norm = max_grad_norm def instantiate(self, parameters) -> VelOptimizer: + optimizer_params, group_names = self.preprocess(parameters) + return VelOptimizerProxy(Ranger( - parameters, + optimizer_params, lr=self.lr, betas=self.betas, eps=self.eps, weight_decay=self.weight_decay - ), self.max_grad_norm) - - def instantiate_parameter_groups(self, out_parameters) -> VelOptimizer: - settings_dict = { - 'lr': self.lr, - 'eps': self.eps, - 'weight_decay': self.weight_decay - } - - out_parameters = out_parameters.copy() - out_settings_dict = mu.optimizer_parameter_helper(out_parameters, settings_dict) - - return VelOptimizerProxy(Ranger(out_parameters, betas=self.betas, **out_settings_dict), self.max_grad_norm) + ), group_names, self.max_grad_norm) -def create(lr, betas=(0.9, 0.999), weight_decay=0, epsilon=1e-8, max_grad_norm=None): +def create(lr, betas=(0.9, 0.999), weight_decay=0, epsilon=1e-8, max_grad_norm=None, parameter_groups=None): """ Vel factory function """ - return RangerFactory(lr=lr, betas=betas, weight_decay=weight_decay, eps=epsilon, max_grad_norm=max_grad_norm) + return RangerFactory( + lr=lr, betas=betas, weight_decay=weight_decay, eps=epsilon, max_grad_norm=max_grad_norm + ).with_parameter_groups(parameter_groups) diff --git a/vel/optimizer/rmsprop.py b/vel/optimizer/rmsprop.py index eacf02ac..c967431a 100644 --- a/vel/optimizer/rmsprop.py +++ b/vel/optimizer/rmsprop.py @@ -2,8 +2,6 @@ from torch.optim.rmsprop import RMSprop -import vel.util.module_util as mu - from vel.api import OptimizerFactory, VelOptimizerProxy, VelOptimizer @@ -12,6 +10,7 @@ class RMSpropFactory(OptimizerFactory): def __init__(self, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0, momentum=0, centered=False, max_grad_norm: typing.Optional[float] = None): + super().__init__() self.lr = lr self.alpha = alpha self.eps = eps @@ -21,31 +20,18 @@ def __init__(self, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0, momentum=0, ce self.max_grad_norm = max_grad_norm def instantiate(self, parameters) -> VelOptimizer: + optimizer_params, group_names = self.preprocess(parameters) + return VelOptimizerProxy(RMSprop( - parameters, + optimizer_params, lr=self.lr, alpha=self.alpha, eps=self.eps, weight_decay=self.weight_decay, momentum=self.momentum, centered=self.centered - ), self.max_grad_norm) - - def instantiate_parameter_groups(self, out_parameters) -> VelOptimizer: - settings_dict = { - 'lr': self.lr, - 'alpha': self.alpha, - 'eps': self.eps, - 'weight_decay': self.weight_decay, - 'momentum': self.momentum, - 'centered': self.centered - } - - out_parameters = out_parameters.copy() - out_settings_dict = mu.optimizer_parameter_helper(out_parameters, settings_dict) - - return VelOptimizerProxy(RMSprop(out_parameters, **out_settings_dict), self.max_grad_norm) + ), group_names, self.max_grad_norm) -def create(lr, alpha, momentum=0, weight_decay=0, epsilon=1e-8, max_grad_norm=None): +def create(lr, alpha, momentum=0, weight_decay=0, epsilon=1e-8, max_grad_norm=None, parameter_groups=None): """ Vel factory function """ return RMSpropFactory( lr=lr, alpha=alpha, momentum=momentum, weight_decay=weight_decay, eps=float(epsilon), max_grad_norm=max_grad_norm - ) + ).with_parameter_groups(parameter_groups) diff --git a/vel/optimizer/rmsprop_tf.py b/vel/optimizer/rmsprop_tf.py index 934d5090..7fea5b97 100644 --- a/vel/optimizer/rmsprop_tf.py +++ b/vel/optimizer/rmsprop_tf.py @@ -3,8 +3,6 @@ from torch.optim.optimizer import Optimizer -import vel.util.module_util as mu - from vel.api import OptimizerFactory, VelOptimizer, VelOptimizerProxy @@ -120,6 +118,7 @@ class RMSpropTFFactory(OptimizerFactory): def __init__(self, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0, momentum=0, centered=False, max_grad_norm: typing.Optional[float] = None): + super().__init__() self.lr = lr self.alpha = alpha self.eps = eps @@ -129,31 +128,18 @@ def __init__(self, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0, momentum=0, ce self.max_grad_norm = max_grad_norm def instantiate(self, parameters) -> VelOptimizer: + optimizer_params, group_names = self.preprocess(parameters) + return VelOptimizerProxy(RMSpropTF( - parameters, + optimizer_params, lr=self.lr, alpha=self.alpha, eps=self.eps, weight_decay=self.weight_decay, momentum=self.momentum, centered=self.centered - ), self.max_grad_norm) - - def instantiate_parameter_groups(self, out_parameters) -> VelOptimizer: - settings_dict = { - 'lr': self.lr, - 'alpha': self.alpha, - 'eps': self.eps, - 'weight_decay': self.weight_decay, - 'momentum': self.momentum, - 'centered': self.centered - } - - out_parameters = out_parameters.copy() - out_settings_dict = mu.optimizer_parameter_helper(out_parameters, settings_dict) - - return VelOptimizerProxy(RMSpropTF(out_parameters, **out_settings_dict), self.max_grad_norm) + ), group_names, self.max_grad_norm) -def create(lr, alpha, momentum=0, weight_decay=0, epsilon=1e-8, max_grad_norm=None): +def create(lr, alpha, momentum=0, weight_decay=0, epsilon=1e-8, max_grad_norm=None, parameter_groups=None): """ Vel factory function """ return RMSpropTFFactory( lr=lr, alpha=alpha, momentum=momentum, weight_decay=weight_decay, eps=float(epsilon), max_grad_norm=max_grad_norm - ) + ).with_parameter_groups(parameter_groups) diff --git a/vel/optimizer/sgd.py b/vel/optimizer/sgd.py index 383053d5..4479b41f 100644 --- a/vel/optimizer/sgd.py +++ b/vel/optimizer/sgd.py @@ -2,8 +2,6 @@ from torch.optim.sgd import SGD -import vel.util.module_util as mu - from vel.api import OptimizerFactory, VelOptimizer, VelOptimizerProxy @@ -12,6 +10,7 @@ class SgdFactory(OptimizerFactory): def __init__(self, lr, momentum=0, dampening=0, weight_decay=0, nesterov=False, max_grad_norm: typing.Optional[float] = None): + super().__init__() self.lr = lr self.momentum = momentum self.dampening = dampening @@ -20,33 +19,19 @@ def __init__(self, lr, momentum=0, dampening=0, weight_decay=0, nesterov=False, self.max_grad_norm = max_grad_norm def instantiate(self, parameters) -> VelOptimizer: - return VelOptimizerProxy( - SGD( - parameters, - lr=self.lr, momentum=self.momentum, dampening=self.dampening, weight_decay=self.weight_decay, - nesterov=self.nesterov - ), self.max_grad_norm - ) - - def instantiate_parameter_groups(self, parameters) -> VelOptimizer: - settings_dict = { - 'lr': self.lr, - 'momentum': self.momentum, - 'dampening': self.dampening, - 'weight_decay': self.weight_decay, - 'nesterov': self.nesterov - } - - parameters = parameters.copy() - out_settings_dict = mu.optimizer_parameter_helper(parameters, settings_dict) + optimizer_params, group_names = self.preprocess(parameters) - return VelOptimizerProxy(SGD(parameters, **out_settings_dict), self.max_grad_norm) + return VelOptimizerProxy(SGD( + optimizer_params, + lr=self.lr, momentum=self.momentum, dampening=self.dampening, weight_decay=self.weight_decay, + nesterov=self.nesterov + ), group_names, self.max_grad_norm) def create(lr, momentum=0, dampening=0, weight_decay=0, nesterov=False, - max_grad_norm: typing.Optional[float] = None): + max_grad_norm: typing.Optional[float] = None, parameter_groups=None): """ Vel factory function """ return SgdFactory( lr=lr, momentum=momentum, dampening=dampening, weight_decay=weight_decay, nesterov=nesterov, max_grad_norm=max_grad_norm - ) + ).with_parameter_groups(parameter_groups) diff --git a/vel/rl/layer/double_nature_cnn.py b/vel/rl/layer/double_nature_cnn.py index 3f78db01..f053c3b2 100644 --- a/vel/rl/layer/double_nature_cnn.py +++ b/vel/rl/layer/double_nature_cnn.py @@ -13,7 +13,7 @@ import vel.util.network as net_util from vel.api import SizeHints, SizeHint -from vel.net.layer_base import Layer, LayerFactory +from vel.net.layer_base import Layer, LayerFactory, LayerFactoryContext, LayerInfo class DoubleNatureCnn(Layer): @@ -21,8 +21,8 @@ class DoubleNatureCnn(Layer): Neural network as defined in the paper 'Human-level control through deep reinforcement learning' but with two separate heads. """ - def __init__(self, name: str, input_width, input_height, input_channels, output_dim=512): - super().__init__(name) + def __init__(self, info: LayerInfo, input_width, input_height, input_channels, output_dim=512): + super().__init__(info) self.output_dim = output_dim @@ -103,6 +103,7 @@ class DoubleNatureCnnFactory(LayerFactory): """ Nature Cnn Network Factory """ def __init__(self, output_dim: int = 512): + super().__init__() self.output_dim = output_dim @property @@ -110,11 +111,11 @@ def name_base(self) -> str: """ Base of layer name """ return "double_nature_cnn" - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: (b, c, w, h) = direct_input.assert_single(4) return DoubleNatureCnn( - name=name, + info=self.make_info(context), input_width=w, input_height=h, input_channels=c, @@ -122,6 +123,6 @@ def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_a ) -def create(output_dim: int = 512): +def create(output_dim: int = 512, label=None, group=None): """ Vel factory function """ - return DoubleNatureCnnFactory(output_dim=output_dim) + return DoubleNatureCnnFactory(output_dim=output_dim).with_given_name(label).with_given_group(group) diff --git a/vel/rl/layer/double_noisy_nature_cnn.py b/vel/rl/layer/double_noisy_nature_cnn.py index f0740be8..536eccce 100644 --- a/vel/rl/layer/double_noisy_nature_cnn.py +++ b/vel/rl/layer/double_noisy_nature_cnn.py @@ -14,7 +14,7 @@ from vel.api import SizeHints, SizeHint -from vel.net.layer_base import Layer, LayerFactory +from vel.net.layer_base import Layer, LayerFactory, LayerFactoryContext from vel.rl.module.noisy_linear import NoisyLinear @@ -23,9 +23,9 @@ class DoubleNoisyNatureCnn(Layer): Neural network as defined in the paper 'Human-level control through deep reinforcement learning' but with two separate heads and "noisy" linear layer. """ - def __init__(self, name: str, input_width, input_height, input_channels, output_dim=512, initial_std_dev=0.4, + def __init__(self, info: LayerInfo, input_width, input_height, input_channels, output_dim=512, initial_std_dev=0.4, factorized_noise=True): - super().__init__(name) + super().__init__(info) self.output_dim = output_dim @@ -119,6 +119,7 @@ class DoubleNoisyNatureCnnFactory(LayerFactory): """ Nature Cnn Network Factory """ def __init__(self, initial_std_dev: float = 0.4, factorized_noise: bool = True, output_dim: int = 512): + super().__init__() self.initial_std_dev = initial_std_dev self.factorized_noise = factorized_noise self.output_dim = output_dim @@ -128,11 +129,11 @@ def name_base(self) -> str: """ Base of layer name """ return "double_noisy_nature_cnn" - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: (b, c, w, h) = direct_input.assert_single(4) return DoubleNoisyNatureCnn( - name=name, + info=self.make_info(context), input_width=w, input_height=h, input_channels=c, @@ -142,10 +143,11 @@ def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_a ) -def create(initial_std_dev: float = 0.4, factorized_noise: bool = True, output_dim: int = 512): +def create(initial_std_dev: float = 0.4, factorized_noise: bool = True, output_dim: int = 512, + label=None, group=None): """ Vel factory function """ return DoubleNoisyNatureCnnFactory( output_dim=output_dim, initial_std_dev=initial_std_dev, factorized_noise=factorized_noise - ) + ).with_given_name(label).with_given_group(group) diff --git a/vel/model/rnn/__init__.py b/vel/rl/layer/input/__init__.py similarity index 100% rename from vel/model/rnn/__init__.py rename to vel/rl/layer/input/__init__.py diff --git a/vel/rl/layer/nature_cnn.py b/vel/rl/layer/nature_cnn.py index b9845a16..a30b60ce 100644 --- a/vel/rl/layer/nature_cnn.py +++ b/vel/rl/layer/nature_cnn.py @@ -13,14 +13,14 @@ import vel.util.network as net_util from vel.api import SizeHint, SizeHints -from vel.net.layer_base import Layer, LayerFactory +from vel.net.layer_base import Layer, LayerFactory, LayerFactoryContext, LayerInfo class NatureCnn(Layer): """ Neural network as defined in the paper 'Human-level control through deep reinforcement learning' """ - def __init__(self, name: str, input_width, input_height, input_channels, output_dim=512): - super().__init__(name) + def __init__(self, info: LayerInfo, input_width, input_height, input_channels, output_dim=512): + super().__init__(info) self.output_dim = output_dim @@ -87,6 +87,7 @@ class NatureCnnFactory(LayerFactory): """ Nature Cnn Network Factory """ def __init__(self, output_dim: int = 512): + super().__init__() self.output_dim = output_dim @property @@ -94,11 +95,11 @@ def name_base(self) -> str: """ Base of layer name """ return "nature_cnn" - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: (b, c, w, h) = direct_input.assert_single(4) return NatureCnn( - name=name, + info=self.make_info(context), input_width=w, input_height=h, input_channels=c, @@ -106,6 +107,6 @@ def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_a ) -def create(output_dim=512): +def create(output_dim=512, label=None, group=None): """ Vel factory function """ - return NatureCnnFactory(output_dim=output_dim) + return NatureCnnFactory(output_dim=output_dim).with_given_name(label).with_given_group(group) diff --git a/vel/rl/layer/nature_cnn_small.py b/vel/rl/layer/nature_cnn_small.py index e8bc8928..ec8d9497 100644 --- a/vel/rl/layer/nature_cnn_small.py +++ b/vel/rl/layer/nature_cnn_small.py @@ -13,7 +13,7 @@ import vel.util.network as net_util from vel.api import SizeHint, SizeHints -from vel.net.modular import Layer, LayerFactory +from vel.net.layer_base import LayerFactoryContext, Layer, LayerFactory, LayerInfo class NatureCnnSmall(Layer): @@ -21,8 +21,8 @@ class NatureCnnSmall(Layer): Neural network as defined in the paper 'Human-level control through deep reinforcement learning' Smaller version. """ - def __init__(self, name: str, input_width, input_height, input_channels, output_dim=128): - super().__init__(name) + def __init__(self, info: LayerInfo, input_width, input_height, input_channels, output_dim=128): + super().__init__(info) self.output_dim = output_dim @@ -81,6 +81,7 @@ class NatureCnnSmallFactory(LayerFactory): """ Nature Cnn Network Factory """ def __init__(self, output_dim: int = 128): + super().__init__() self.output_dim = output_dim @property @@ -88,11 +89,11 @@ def name_base(self) -> str: """ Base of layer name """ return "nature_cnn_small" - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: (b, c, w, h) = direct_input.assert_single(4) return NatureCnnSmall( - name=name, + info=self.make_info(context), input_width=w, input_height=h, input_channels=c, @@ -100,6 +101,6 @@ def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_a ) -def create(output_dim: int = 128): +def create(output_dim: int = 128, label=None, group=None): """ Vel factory function """ - return NatureCnnSmallFactory(output_dim=output_dim) + return NatureCnnSmallFactory(output_dim=output_dim).with_given_name(label).with_given_group(group) diff --git a/vel/rl/layer/rnn_cell.py b/vel/rl/layer/rnn_cell.py index a509072d..2eadf942 100644 --- a/vel/rl/layer/rnn_cell.py +++ b/vel/rl/layer/rnn_cell.py @@ -4,15 +4,15 @@ from vel.api import SizeHint, SizeHints -from vel.net.layer_base import Layer, LayerFactory +from vel.net.layer_base import Layer, LayerFactory, LayerFactoryContext, LayerInfo class RnnCell(Layer): """ Generalization of RNN cell (Simple RNN, LSTM or GRU) """ - def __init__(self, name: str, input_size: int, hidden_size: int, rnn_type: str, bias: bool = True, + def __init__(self, info: LayerInfo, input_size: int, hidden_size: int, rnn_type: str, bias: bool = True, nonlinearity: str = 'tanh'): - super().__init__(name) + super().__init__(info) assert rnn_type in {'rnn', 'lstm', 'gru'}, "Rnn type {} is not supported".format(rnn_type) @@ -73,6 +73,7 @@ class RnnCellFactory(LayerFactory): """ Factory for the RnnCell layer """ def __init__(self, hidden_size: int, rnn_type: str, bias: bool = True, nonlinearity: str = 'tanh'): + super().__init__() self.hidden_size = hidden_size self.rnn_type = rnn_type self.bias = bias @@ -82,11 +83,12 @@ def __init__(self, hidden_size: int, rnn_type: str, bias: bool = True, nonlinear def name_base(self) -> str: return "rnn_cell" - def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_args: dict) -> Layer: + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: + """ Create a given layer object """ input_size = direct_input.assert_single().last() return RnnCell( - name=name, + info=self.make_info(context), input_size=input_size, hidden_size=self.hidden_size, rnn_type=self.rnn_type, @@ -95,11 +97,11 @@ def instantiate(self, name: str, direct_input: SizeHints, context: dict, extra_a ) -def create(hidden_size: int, rnn_type: str, bias: bool = True, nonlinearity: str = 'tanh'): +def create(hidden_size: int, rnn_type: str, bias: bool = True, nonlinearity: str = 'tanh', label=None, group=None): """ Vel factory function """ return RnnCellFactory( hidden_size=hidden_size, rnn_type=rnn_type, bias=bias, nonlinearity=nonlinearity - ) + ).with_given_name(label).with_given_group(group) diff --git a/vel/rl/module/actor_critic_policy.py b/vel/rl/module/actor_critic_policy.py index cb252447..bc4a5eae 100644 --- a/vel/rl/module/actor_critic_policy.py +++ b/vel/rl/module/actor_critic_policy.py @@ -36,6 +36,16 @@ def layer_groups(self): [self.input_net, self.value_backbone, self.critic_head], ] + def grouped_parameters(self): + """ Return iterable of pairs (group, parameters) """ + return it.chain( + self.input_net.grouped_parameters(), + self.policy_backbone.grouped_parameters(), + self.value_backbone.grouped_parameters(), + [("actor", self.action_head.parameters())], + [("critic", self.critic_head.parameters())], + ) + def reset_weights(self): """ Initialize properly model weights """ self.input_net.reset_weights() diff --git a/vel/rl/policy/ddpg.py b/vel/rl/policy/ddpg.py index 3f40d317..3d286d16 100644 --- a/vel/rl/policy/ddpg.py +++ b/vel/rl/policy/ddpg.py @@ -47,8 +47,7 @@ def reset_episodic_state(self, dones: torch.Tensor): def create_optimizer(self, optimizer_factory: OptimizerFactory) -> VelOptimizer: """ Create optimizer for the purpose of optimizing this model """ - parameter_groups = mu.to_parameter_groups(self.net.layer_groups()) - return optimizer_factory.instantiate_parameter_groups(parameter_groups) + return optimizer_factory.instantiate(self.net.grouped_parameters()) def forward(self, observation, state=None): """ Calculate model outputs """ diff --git a/vel/rl/vecenv/dummy.py b/vel/rl/vecenv/dummy.py index 29b405e0..4df8cd2b 100644 --- a/vel/rl/vecenv/dummy.py +++ b/vel/rl/vecenv/dummy.py @@ -29,7 +29,7 @@ def instantiate(self, parallel_envs, seed=0, preset='default') -> VecEnv: def instantiate_single(self, seed=0, preset='default'): """ Create a new Env instance - single """ - env = self.env.instantiate(seed=seed, serial_id=0, preset=preset) + env = self.env.instantiate() if self.frame_history is not None: env = FrameStack(env, self.frame_history) @@ -38,7 +38,7 @@ def instantiate_single(self, seed=0, preset='default'): def _creation_function(self, idx, seed, preset): """ Helper function to create a proper closure around supplied values """ - return lambda: self.env.instantiate(seed=seed, serial_id=idx, preset=preset) + return lambda: self.env.instantiate() def create(env, frame_history=None, normalize_returns=False): diff --git a/vel/train/phase/cycle.py b/vel/train/phase/cycle.py index 63fae205..cb04c409 100644 --- a/vel/train/phase/cycle.py +++ b/vel/train/phase/cycle.py @@ -74,6 +74,13 @@ def on_batch_begin(self, batch_info: BatchInfo, dataset: typing.Optional[str] = interp.interpolate_single(max_lr, min_lr, interpolation_number, how=self.interpolate) for max_lr, min_lr in zip(self.max_lr, self.min_lr) ] + elif isinstance(self.max_lr, dict): + lr = { + name: interp.interpolate_single( + self.max_lr[name], self.min_lr[name], interpolation_number, how=self.interpolate + ) + for name in self.max_lr + } else: lr = interp.interpolate_single(self.max_lr, self.min_lr, interpolation_number, how=self.interpolate) diff --git a/vel/train/phase/freeze.py b/vel/train/phase/freeze.py index c230c762..635b7ebd 100644 --- a/vel/train/phase/freeze.py +++ b/vel/train/phase/freeze.py @@ -3,12 +3,14 @@ class FreezePhase(train.EmptyTrainPhase): """ Freeze the model """ + def __init__(self, groups=None): + self.groups = groups def set_up_phase(self, training_info, model, loader): """ Freeze the model """ - model.freeze() + model.freeze(groups=self.groups) -def create(): +def create(groups=None): """ Vel factory function """ - return FreezePhase() + return FreezePhase(groups) diff --git a/vel/util/module_util.py b/vel/util/module_util.py index e2dbef9b..4d1ca456 100644 --- a/vel/util/module_util.py +++ b/vel/util/module_util.py @@ -83,6 +83,11 @@ def to_parameter_groups(layer_groups): return [{'params': chain_params(x)} for x in layer_groups] +def module_list_to_param_list(module_list): + """ Conver a list of pytorch modules into a list of parameters """ + return it.chain.from_iterable(m.parameters() for m in module_list) + + def optimizer_parameter_helper(parameters, parameter_dict): """ Helper function for creating layer group optimizer instances """ out_dict = parameter_dict.copy() From f4e61077bd2d95b840c079961ecdf3bbed98dc58 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Mon, 14 Oct 2019 11:12:09 -0700 Subject: [PATCH 129/162] Improving formatting. --- vel/model/nlp/language_model.py | 1 - vel/rl/layer/double_noisy_nature_cnn.py | 3 +-- vel/rl/policy/ddpg.py | 2 -- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/vel/model/nlp/language_model.py b/vel/model/nlp/language_model.py index def34db5..e45347a0 100644 --- a/vel/model/nlp/language_model.py +++ b/vel/model/nlp/language_model.py @@ -47,7 +47,6 @@ def forward(self, input_data: torch.Tensor, state=None) -> torch.Tensor: output = self.net(input_data) return F.log_softmax(self.output_layer(output), dim=-1) - def loss_value(self, x_data, y_true, y_pred) -> torch.tensor: """ Calculate a value of loss function """ y_pred = y_pred.view(-1, y_pred.size(2)) diff --git a/vel/rl/layer/double_noisy_nature_cnn.py b/vel/rl/layer/double_noisy_nature_cnn.py index 536eccce..da867a76 100644 --- a/vel/rl/layer/double_noisy_nature_cnn.py +++ b/vel/rl/layer/double_noisy_nature_cnn.py @@ -13,8 +13,7 @@ import vel.util.network as net_util from vel.api import SizeHints, SizeHint - -from vel.net.layer_base import Layer, LayerFactory, LayerFactoryContext +from vel.net.layer_base import Layer, LayerFactory, LayerFactoryContext, LayerInfo from vel.rl.module.noisy_linear import NoisyLinear diff --git a/vel/rl/policy/ddpg.py b/vel/rl/policy/ddpg.py index 3d286d16..3e47955f 100644 --- a/vel/rl/policy/ddpg.py +++ b/vel/rl/policy/ddpg.py @@ -6,8 +6,6 @@ import torch.nn as nn import torch.nn.functional as F -import vel.util.module_util as mu - from vel.api import BackboneModule, BatchInfo, ModuleFactory, OptimizerFactory, VelOptimizer, SizeHints from vel.metric.base import AveragingNamedMetric from vel.rl.api import RlPolicy, Rollout From a00d125b857c08bc2e8167ff1ec4cb790a8af4b2 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Mon, 14 Oct 2019 11:13:23 -0700 Subject: [PATCH 130/162] Formatting. --- vel/rl/policy/ddpg.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vel/rl/policy/ddpg.py b/vel/rl/policy/ddpg.py index 3e47955f..84d64a60 100644 --- a/vel/rl/policy/ddpg.py +++ b/vel/rl/policy/ddpg.py @@ -181,8 +181,7 @@ def instantiate(self, **extra_args): def create(actor_net: ModuleFactory, critic_net: ModuleFactory, discount_factor: float, tau: float, noise_std_dev: float, - input_net: typing.Optional[ModuleFactory] = None - ): + input_net: typing.Optional[ModuleFactory] = None): """ Vel factory function """ return DDPGFactory( actor_net=actor_net, From 284260a115287a92b7b3270aba4bdf1248d75b9c Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Mon, 14 Oct 2019 12:05:46 -0700 Subject: [PATCH 131/162] EWMA input normalization. --- examples-configs/rl/mujoco/mujoco_a2c.yaml | 2 +- examples-configs/rl/mujoco/mujoco_ddpg.yaml | 2 +- examples-configs/rl/mujoco/mujoco_ppo.yaml | 2 +- examples-configs/rl/mujoco/mujoco_trpo.yaml | 2 +- vel/module/input/normalize_ewma.py | 47 +++++++++++++++ vel/net/layer/input/normalize_ewma.py | 65 +++++++++++++++++++++ 6 files changed, 116 insertions(+), 4 deletions(-) create mode 100644 vel/module/input/normalize_ewma.py create mode 100644 vel/net/layer/input/normalize_ewma.py diff --git a/examples-configs/rl/mujoco/mujoco_a2c.yaml b/examples-configs/rl/mujoco/mujoco_a2c.yaml index 9f89431a..f85a277a 100644 --- a/examples-configs/rl/mujoco/mujoco_a2c.yaml +++ b/examples-configs/rl/mujoco/mujoco_a2c.yaml @@ -22,7 +22,7 @@ model: net: name: vel.net.modular layers: - - name: vel.net.layer.input.normalize_expanding + - name: vel.net.layer.input.normalize_ewma - name: vel.net.layer.mlp hidden_layers: [64, 64] activation: 'tanh' diff --git a/examples-configs/rl/mujoco/mujoco_ddpg.yaml b/examples-configs/rl/mujoco/mujoco_ddpg.yaml index f75ef7e3..a5c837e0 100644 --- a/examples-configs/rl/mujoco/mujoco_ddpg.yaml +++ b/examples-configs/rl/mujoco/mujoco_ddpg.yaml @@ -21,7 +21,7 @@ model: input_net: name: vel.net.modular layers: - - name: vel.net.layer.input.normalize_expanding + - name: vel.net.layer.input.normalize_ewma actor_net: name: vel.net.modular diff --git a/examples-configs/rl/mujoco/mujoco_ppo.yaml b/examples-configs/rl/mujoco/mujoco_ppo.yaml index 975eabf6..5fdbd7bd 100644 --- a/examples-configs/rl/mujoco/mujoco_ppo.yaml +++ b/examples-configs/rl/mujoco/mujoco_ppo.yaml @@ -25,7 +25,7 @@ model: net: name: vel.net.modular layers: - - name: vel.net.layer.input.normalize_expanding + - name: vel.net.layer.input.normalize_ewma - name: vel.net.layer.util.repeat times: 2 # Need to repeat output twice, to consume by the 'parallel' layers - name: vel.net.layer.arch.parallel diff --git a/examples-configs/rl/mujoco/mujoco_trpo.yaml b/examples-configs/rl/mujoco/mujoco_trpo.yaml index f88fc5ba..5c2a83ee 100644 --- a/examples-configs/rl/mujoco/mujoco_trpo.yaml +++ b/examples-configs/rl/mujoco/mujoco_trpo.yaml @@ -28,7 +28,7 @@ model: input_net: name: vel.net.modular layers: - - name: vel.net.layer.input.normalize_expanding + - name: vel.net.layer.input.normalize_ewma policy_net: name: vel.net.modular diff --git a/vel/module/input/normalize_ewma.py b/vel/module/input/normalize_ewma.py new file mode 100644 index 00000000..3219e358 --- /dev/null +++ b/vel/module/input/normalize_ewma.py @@ -0,0 +1,47 @@ +import torch + +from vel.api import VModule + + +class NormalizeEwma(VModule): + """ Normalize a vector of observations - across the batch dim """ + + def __init__(self, input_shape, beta=0.99, per_element_update=False, epsilon=1e-1): + super().__init__() + + self.input_shape = input_shape + self.epsilon = epsilon + self.beta = beta + self.per_element_update = per_element_update + + self.register_buffer('running_mean', torch.zeros(input_shape, dtype=torch.float)) + self.register_buffer('running_var', torch.ones(input_shape, dtype=torch.float)) + self.register_buffer('debiasing_term', torch.tensor(self.epsilon, dtype=torch.float)) + + def reset_weights(self): + self.running_mean.zero_() + self.running_var.fill_(1.0) + self.count.fill_(self.epsilon) + + def forward(self, input_vector): + # Make sure input is float32 + input_vector = input_vector.to(torch.float) + + if self.training: + batch_size = input_vector.size(0) + batch_mean = input_vector.mean(dim=0) + batch_var = input_vector.var(dim=0, unbiased=False) + + if self.per_element_update: + weight = self.beta ** batch_size + else: + weight = self.beta + + self.running_mean.mul_(weight).add_(batch_mean * (1.0 - weight)) + self.running_var.mul_(weight).add_(batch_var * (1.0 - weight)) + self.debiasing_term.mul_(weight).add_(1.0 * (1.0 - weight)) + + debiased_mean = self.running_mean / self.debiasing_term + debiased_var = self.running_var / self.debiasing_term + + return (input_vector - debiased_mean.unsqueeze(0)) / torch.sqrt(debiased_var.unsqueeze(0)) diff --git a/vel/net/layer/input/normalize_ewma.py b/vel/net/layer/input/normalize_ewma.py new file mode 100644 index 00000000..50b6dec5 --- /dev/null +++ b/vel/net/layer/input/normalize_ewma.py @@ -0,0 +1,65 @@ +from vel.api import SizeHints, SizeHint +from vel.module.input.normalize_ewma import NormalizeEwma +from vel.net.layer_base import LayerFactory, Layer, LayerFactoryContext, LayerInfo + + +class NormalizeEwmaLayer(Layer): + """ Layer that normalizes the inputs """ + + def __init__(self, info: LayerInfo, input_shape: SizeHints, beta: float = 0.99, epsilon: float = 1e-1, + per_element_update=False): + super().__init__(info) + + self.input_shape = input_shape + self.beta = beta + self.epsilon = epsilon + self.per_element_update = per_element_update + + self.normalize = NormalizeEwma( + beta=self.beta, + epsilon=self.epsilon, + per_element_update=self.per_element_update, + input_shape=self.input_shape.assert_single()[1:] # Remove batch axis + ) + + def forward(self, direct, state: dict = None, context: dict = None): + return self.normalize(direct) + + def size_hints(self) -> SizeHints: + return self.input_shape + + +class NormalizeEwmaLayerFactory(LayerFactory): + def __init__(self, beta: float = 0.99, epsilon: float = 1e-2, shape=None, per_element_update=False): + super().__init__() + self.shape = shape + self.beta = beta + self.epsilon = epsilon + self.per_element_update = per_element_update + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "image_to_tensor" + + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: + """ Create a given layer object """ + if self.shape is None: + input_shape = direct_input + else: + input_shape = SizeHints(SizeHint(*([None] + list(self.shape)))) + + return NormalizeEwmaLayer( + info=self.make_info(context), + beta=self.beta, + epsilon=self.epsilon, + per_element_update=self.per_element_update, + input_shape=input_shape + ) + + +def create(beta=0.99, epsilon=1e-1, shape=None, per_element_update=False, label=None, group=None): + """ Vel factory function """ + return NormalizeEwmaLayerFactory( + beta=beta, epsilon=epsilon, shape=shape, per_element_update=per_element_update + ).with_given_name(label).with_given_group(group) From a2bb1314fa76a398d88674d7187ce73d13ecfa52 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Fri, 18 Oct 2019 18:18:09 -0700 Subject: [PATCH 132/162] Implemented WANDB streaming. --- vel/metric/base/base_metric.py | 17 +++++++++++++++-- vel/storage/streaming/visdom.py | 4 ++-- vel/storage/streaming/wandb.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 vel/storage/streaming/wandb.py diff --git a/vel/metric/base/base_metric.py b/vel/metric/base/base_metric.py index 6a64d1b2..8f073247 100644 --- a/vel/metric/base/base_metric.py +++ b/vel/metric/base/base_metric.py @@ -1,9 +1,22 @@ -import collections +import attr +import typing from vel.api import TrainingInfo -MetricKey = collections.namedtuple('MetricKey', ['dataset', 'name', 'scope']) +@attr.s(auto_attribs=True, frozen=True) +class MetricKey: + """ Key for each metric """ + name: str + scope: str + dataset: typing.Optional[str] = None + + def format(self): + """ Format a metric key into a string """ + if self.dataset is None: + return f"{self.scope}/{self.name}" + else: + return f"{self.dataset}:{self.scope}/{self.name}" class BaseMetric: diff --git a/vel/storage/streaming/visdom.py b/vel/storage/streaming/visdom.py index c861afe0..b0c31277 100644 --- a/vel/storage/streaming/visdom.py +++ b/vel/storage/streaming/visdom.py @@ -33,8 +33,8 @@ def on_batch_end(self, batch_info, dataset=None): """ Stream LR to visdom """ if self.settings.stream_lr: iteration_idx = ( - float(batch_info.epoch_number) + - float(batch_info.batch_number) / batch_info.batches_per_epoch + float(batch_info.epoch_number) + + float(batch_info.batch_number) / batch_info.batches_per_epoch ) lr = batch_info.optimizer.param_groups[-1]['lr'] diff --git a/vel/storage/streaming/wandb.py b/vel/storage/streaming/wandb.py new file mode 100644 index 00000000..24bf0297 --- /dev/null +++ b/vel/storage/streaming/wandb.py @@ -0,0 +1,32 @@ +import wandb + + +from vel.api import ModelConfig, Callback, TrainingInfo + + +class WandbStreaming(Callback): + """ Stream live results from training to WandB """ + + def __init__(self, model_config: ModelConfig): + self.model_config = model_config + + def on_train_begin(self, training_info: TrainingInfo) -> None: + wandb.init( + job_type='train', + project='vel', + dir=self.model_config.output_dir('wandb'), + group=self.model_config.name, + name=self.model_config.run_name, + resume=training_info.start_epoch_idx > 0, + tags=[self.model_config.tag] if self.model_config.tag else [] + ) + + def on_epoch_end(self, epoch_info): + """ Send data to wandb """ + result = {k.format(): v for k, v in epoch_info.result.items()} + wandb.log(row=result, step=epoch_info.global_epoch_idx) + + +def create(model_config): + """ Vel factory function """ + return WandbStreaming(model_config) From b2b60d42aec35b5f5647af3e968addf95d9cdedc Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Fri, 18 Oct 2019 18:18:53 -0700 Subject: [PATCH 133/162] Added wandb settings to git ignore. --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index b0a800fd..e0b8259c 100644 --- a/.gitignore +++ b/.gitignore @@ -117,3 +117,6 @@ environment.yaml # Test cache /.pytest_cache + +# WANDB settings +/wandb From e3e211fc393856869359b3d6e84e901dd89f722f Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Fri, 18 Oct 2019 18:58:57 -0700 Subject: [PATCH 134/162] Added faster VAE NLL command. --- .../autoencoder/mnist/mnist_cnn_ae.yaml | 2 +- .../cifar10/cifar10_cnn_01.yaml | 2 +- .../cifar10/cifar10_resnetv1_110.yaml | 2 +- .../cifar10/cifar10_resnetv1_32.yaml | 2 +- .../cifar10/cifar10_resnetv2_110.yaml | 2 +- .../cifar10_resnetv2_164_bottleneck.yaml | 2 +- .../cifar10/cifar10_resnetv2_32.yaml | 2 +- .../cifar10/cifar10_resnext_29_c1.yaml | 2 +- .../cifar10/cifar10_resnext_29_c8.yaml | 2 +- .../cats_vs_dogs_resnet34.yaml | 2 +- .../classification/mnist/mnist_cnn_01.yaml | 2 +- examples-configs/gan/mnist/mnist_gan.yaml | 2 +- .../latent/mnist/mnist_cnn_iwae.yaml | 2 +- .../latent/mnist/mnist_cnn_vae.yaml | 2 +- .../latent/mnist/mnist_cnn_vq_vae.yaml | 2 +- .../latent/mnist/mnist_fc_iwae.yaml | 2 +- .../latent/mnist/mnist_fc_vae.yaml | 9 +++- .../latent/omniglot/omniglot_cnn_vae.yaml | 2 +- .../latent/omniglot/omniglot_fc_vae.yaml | 2 +- .../classification/imdb_sentiment_gru.yaml | 2 +- .../nlp/generation/gen_shakespeare.yaml | 2 +- vel/command/augvis_command.py | 2 +- vel/command/latent/__init__.py | 0 vel/command/latent/vae_nll.py | 53 +++++++++++++++++++ vel/data/__init__.py | 1 - vel/data/loader/__init__.py | 3 ++ vel/data/{ => loader}/bucket_loader.py | 21 +++++--- vel/data/{ => loader}/dataset_loader.py | 11 ++-- .../{ => loader}/text_character_loader.py | 17 +++--- vel/model/latent/vae_base.py | 29 +++++----- vel/train/phase/generic.py | 2 +- vel/train/train_phase.py | 2 +- vel/train/trainer.py | 2 +- 33 files changed, 133 insertions(+), 59 deletions(-) create mode 100644 vel/command/latent/__init__.py create mode 100644 vel/command/latent/vae_nll.py create mode 100644 vel/data/loader/__init__.py rename vel/data/{ => loader}/bucket_loader.py (81%) rename vel/data/{ => loader}/dataset_loader.py (88%) rename vel/data/{ => loader}/text_character_loader.py (88%) diff --git a/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml b/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml index 2591cc04..897bafaf 100644 --- a/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml +++ b/examples-configs/autoencoder/mnist/mnist_cnn_ae.yaml @@ -15,7 +15,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_cnn_01.yaml b/examples-configs/classification/cifar10/cifar10_cnn_01.yaml index e6292546..d192c20b 100644 --- a/examples-configs/classification/cifar10/cifar10_cnn_01.yaml +++ b/examples-configs/classification/cifar10/cifar10_cnn_01.yaml @@ -14,7 +14,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv1_110.yaml b/examples-configs/classification/cifar10/cifar10_resnetv1_110.yaml index 3ce7feb8..c6f45b84 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv1_110.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv1_110.yaml @@ -15,7 +15,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv1_32.yaml b/examples-configs/classification/cifar10/cifar10_resnetv1_32.yaml index 935b8277..2cd2fedb 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv1_32.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv1_32.yaml @@ -15,7 +15,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv2_110.yaml b/examples-configs/classification/cifar10/cifar10_resnetv2_110.yaml index f0bd7291..fb0213d6 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv2_110.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv2_110.yaml @@ -14,7 +14,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml b/examples-configs/classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml index a7ff1491..a24785bf 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml @@ -16,7 +16,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_resnetv2_32.yaml b/examples-configs/classification/cifar10/cifar10_resnetv2_32.yaml index 60ebf5ad..2d67a653 100644 --- a/examples-configs/classification/cifar10/cifar10_resnetv2_32.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnetv2_32.yaml @@ -14,7 +14,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_resnext_29_c1.yaml b/examples-configs/classification/cifar10/cifar10_resnext_29_c1.yaml index c007c5fe..601a9a27 100644 --- a/examples-configs/classification/cifar10/cifar10_resnext_29_c1.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnext_29_c1.yaml @@ -19,7 +19,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/cifar10/cifar10_resnext_29_c8.yaml b/examples-configs/classification/cifar10/cifar10_resnext_29_c8.yaml index 6e90611d..632a5f45 100644 --- a/examples-configs/classification/cifar10/cifar10_resnext_29_c8.yaml +++ b/examples-configs/classification/cifar10/cifar10_resnext_29_c8.yaml @@ -19,7 +19,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 num_workers: 4 diff --git a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml b/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml index da2f8b8f..83784841 100644 --- a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml +++ b/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml @@ -16,7 +16,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader num_workers: 8 batch_size: 64 diff --git a/examples-configs/classification/mnist/mnist_cnn_01.yaml b/examples-configs/classification/mnist/mnist_cnn_01.yaml index d11b5742..f23a96c6 100644 --- a/examples-configs/classification/mnist/mnist_cnn_01.yaml +++ b/examples-configs/classification/mnist/mnist_cnn_01.yaml @@ -14,7 +14,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 # num_workers: 4 diff --git a/examples-configs/gan/mnist/mnist_gan.yaml b/examples-configs/gan/mnist/mnist_gan.yaml index 4de4ecce..6a3ea519 100644 --- a/examples-configs/gan/mnist/mnist_gan.yaml +++ b/examples-configs/gan/mnist/mnist_gan.yaml @@ -14,7 +14,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 # num_workers: 4 # pin_memory: true diff --git a/examples-configs/latent/mnist/mnist_cnn_iwae.yaml b/examples-configs/latent/mnist/mnist_cnn_iwae.yaml index 90cb5da7..56e86038 100644 --- a/examples-configs/latent/mnist/mnist_cnn_iwae.yaml +++ b/examples-configs/latent/mnist/mnist_cnn_iwae.yaml @@ -17,7 +17,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 num_workers: 4 pin_memory: true diff --git a/examples-configs/latent/mnist/mnist_cnn_vae.yaml b/examples-configs/latent/mnist/mnist_cnn_vae.yaml index 118ad430..11debfb0 100644 --- a/examples-configs/latent/mnist/mnist_cnn_vae.yaml +++ b/examples-configs/latent/mnist/mnist_cnn_vae.yaml @@ -16,7 +16,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 256 num_workers: 4 pin_memory: true diff --git a/examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml b/examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml index bab34608..e99c7703 100644 --- a/examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml +++ b/examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml @@ -18,7 +18,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 # num_workers: 4 # pin_memory: true diff --git a/examples-configs/latent/mnist/mnist_fc_iwae.yaml b/examples-configs/latent/mnist/mnist_fc_iwae.yaml index 215906dd..46111a50 100644 --- a/examples-configs/latent/mnist/mnist_fc_iwae.yaml +++ b/examples-configs/latent/mnist/mnist_fc_iwae.yaml @@ -17,7 +17,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 num_workers: 4 pin_memory: true diff --git a/examples-configs/latent/mnist/mnist_fc_vae.yaml b/examples-configs/latent/mnist/mnist_fc_vae.yaml index 1fa51447..d9a64b4b 100644 --- a/examples-configs/latent/mnist/mnist_fc_vae.yaml +++ b/examples-configs/latent/mnist/mnist_fc_vae.yaml @@ -15,7 +15,7 @@ source: name: vel.data.source.vision.mnist loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 num_workers: 4 pin_memory: true @@ -42,4 +42,9 @@ scheduler: commands: train: name: vel.command.train_command - epochs: 3280 \ No newline at end of file + epochs: 3280 + + nll: + name: vel.command.latent.vae_nll + max_batch: 10_000 + samples: !param samples = 10 \ No newline at end of file diff --git a/examples-configs/latent/omniglot/omniglot_cnn_vae.yaml b/examples-configs/latent/omniglot/omniglot_cnn_vae.yaml index 2df6f80b..e952d0e0 100644 --- a/examples-configs/latent/omniglot/omniglot_cnn_vae.yaml +++ b/examples-configs/latent/omniglot/omniglot_cnn_vae.yaml @@ -17,7 +17,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 num_workers: 4 pin_memory: true diff --git a/examples-configs/latent/omniglot/omniglot_fc_vae.yaml b/examples-configs/latent/omniglot/omniglot_fc_vae.yaml index 263a72eb..700ca014 100644 --- a/examples-configs/latent/omniglot/omniglot_fc_vae.yaml +++ b/examples-configs/latent/omniglot/omniglot_fc_vae.yaml @@ -17,7 +17,7 @@ source: loader: - name: vel.data.dataset_loader + name: vel.data.loader.dataset_loader batch_size: 128 num_workers: 4 pin_memory: true diff --git a/examples-configs/nlp/classification/imdb_sentiment_gru.yaml b/examples-configs/nlp/classification/imdb_sentiment_gru.yaml index cb3c9e2a..8fa86a95 100644 --- a/examples-configs/nlp/classification/imdb_sentiment_gru.yaml +++ b/examples-configs/nlp/classification/imdb_sentiment_gru.yaml @@ -7,7 +7,7 @@ source: loader: - name: vel.data.bucket_loader + name: vel.data.loader.bucket_loader batch_size: 32 diff --git a/examples-configs/nlp/generation/gen_shakespeare.yaml b/examples-configs/nlp/generation/gen_shakespeare.yaml index ac7bd121..f27a7161 100644 --- a/examples-configs/nlp/generation/gen_shakespeare.yaml +++ b/examples-configs/nlp/generation/gen_shakespeare.yaml @@ -9,7 +9,7 @@ source: loader: - name: vel.data.text_character_loader + name: vel.data.loader.text_character_loader sequence_length: 128 batch_size: 64 diff --git a/vel/command/augvis_command.py b/vel/command/augvis_command.py index 880830b9..734f5e44 100644 --- a/vel/command/augvis_command.py +++ b/vel/command/augvis_command.py @@ -1,7 +1,7 @@ import matplotlib.pyplot as plt import numpy as np -from vel.data import DatasetLoader +from vel.data.loader import DatasetLoader class AugmentationVisualizationCommand: diff --git a/vel/command/latent/__init__.py b/vel/command/latent/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/command/latent/vae_nll.py b/vel/command/latent/vae_nll.py new file mode 100644 index 00000000..9aafa3c0 --- /dev/null +++ b/vel/command/latent/vae_nll.py @@ -0,0 +1,53 @@ +import numpy as np +import torch +import tqdm + +from vel.api import TrainingInfo + + +class VaeNllCommand: + """ Calculate NLL for the VAE using importance sampling """ + def __init__(self, model_config, model_factory, loader, storage, max_batch: int, samples: int): + self.model_config = model_config + self.model_factory = model_factory + self.loader = loader + self.storage = storage + + self.max_batch = max_batch + self.samples = samples + + @torch.no_grad() + def run(self): + device = self.model_config.torch_device() + model = self.model_factory.instantiate().to(device) + + start_epoch = self.storage.last_epoch_idx() + + training_info = TrainingInfo(start_epoch_idx=start_epoch) + + model_state, hidden_state = self.storage.load(training_info) + model.load_state_dict(model_state) + + model.eval() + + validation_dataset = self.loader.source.validation + + results = [] + + # Always take at least one + batch_size = max(self.max_batch // self.samples, 1) + + for i in tqdm.trange(validation_dataset.num_batches(batch_size)): + batch = validation_dataset.get_batch(i, batch_size)['x'].to(self.model_config.device) + nll = model.nll(batch, num_posterior_samples=self.samples) + + results.append(nll.cpu().numpy()) + + full_results = np.concatenate(results) + + print("NLL: {:.2f}".format(np.mean(full_results))) + + +def create(model_config, model, loader, storage, max_batch: int = 1024, samples: int = 100): + """ Vel factory function """ + return VaeNllCommand(model_config, model, loader, storage, max_batch=max_batch, samples=samples) diff --git a/vel/data/__init__.py b/vel/data/__init__.py index 122edbd6..3a6245c7 100644 --- a/vel/data/__init__.py +++ b/vel/data/__init__.py @@ -1,2 +1 @@ from .dataflow import DataFlow -from .dataset_loader import DatasetLoader diff --git a/vel/data/loader/__init__.py b/vel/data/loader/__init__.py new file mode 100644 index 00000000..57122da0 --- /dev/null +++ b/vel/data/loader/__init__.py @@ -0,0 +1,3 @@ +from .dataset_loader import DatasetLoader +from .bucket_loader import BucketLoader +from .text_character_loader import TextCharacterLoader diff --git a/vel/data/bucket_loader.py b/vel/data/loader/bucket_loader.py similarity index 81% rename from vel/data/bucket_loader.py rename to vel/data/loader/bucket_loader.py index 740d83c1..f587d4a6 100644 --- a/vel/data/bucket_loader.py +++ b/vel/data/loader/bucket_loader.py @@ -8,12 +8,12 @@ class BucketLoader: """ Loads sequence data from a source and batches together examples of similar length """ def __init__(self, model_config: ModelConfig, source: LanguageSource, batch_size: int): - self.source = source + self._source = source self.batch_size = batch_size - if self.source.test is None: + if self._source.test is None: self.train_loader, self.val_loader = data.BucketIterator.splits( - (self.source.train, self.source.validation), + (self._source.train, self._source.validation), batch_size=batch_size, device=model_config.torch_device(), shuffle=True @@ -21,17 +21,17 @@ def __init__(self, model_config: ModelConfig, source: LanguageSource, batch_size self.test_loader = None else: self.train_loader, self.val_loader, self.test_loader = data.BucketIterator.splits( - (self.source.train, self.source.validation, self.source.test), + (self._source.train, self._source.validation, self._source.test), batch_size=batch_size, device=model_config.torch_device(), shuffle=True ) - self.train_loader = IteratorDictWrapper(self.train_loader, self.source.mapping) - self.val_loader = IteratorDictWrapper(self.val_loader, self.source.mapping) + self.train_loader = IteratorDictWrapper(self.train_loader, self._source.mapping) + self.val_loader = IteratorDictWrapper(self.val_loader, self._source.mapping) if self.test_loader: - self.test_loader = IteratorDictWrapper(self.test_loader, self.source.mapping) + self.test_loader = IteratorDictWrapper(self.test_loader, self._source.mapping) self._loaders = { 'train': self.train_loader, @@ -48,6 +48,11 @@ def __init__(self, model_config: ModelConfig, source: LanguageSource, batch_size def __getitem__(self, item): return self._loaders[item] + @property + def source(self): + """ Return the source for this loader """ + return self._source + @property def loader(self): """ Get a dict of loaders """ @@ -61,7 +66,7 @@ def size(self): @property def alphabet_size(self): """ Size of the text alphabet """ - return self.source.metadata.get('alphabet_size', 0) + return self._source.metadata.get('alphabet_size', 0) def create(model_config: ModelConfig, source: LanguageSource, batch_size: int): diff --git a/vel/data/dataset_loader.py b/vel/data/loader/dataset_loader.py similarity index 88% rename from vel/data/dataset_loader.py rename to vel/data/loader/dataset_loader.py index 59be7841..aa37fd62 100644 --- a/vel/data/dataset_loader.py +++ b/vel/data/loader/dataset_loader.py @@ -3,7 +3,7 @@ from vel.api import Source -from .dataflow import DataFlow +from vel.data.dataflow import DataFlow class DatasetLoader: @@ -11,14 +11,14 @@ class DatasetLoader: def __init__(self, source: Source, batch_size: int, num_workers: int, transformations: typing.Optional[list] = None, pin_memory=False): - self.source = source + self._source = source self.batch_size = batch_size self.num_workers = num_workers self.transformations = transformations self.pin_memory = pin_memory if transformations is not None: - self.transformed_source = DataFlow.transform(self.source, transformations) + self.transformed_source = DataFlow.transform(self._source, transformations) else: self.transformed_source = source @@ -54,6 +54,11 @@ def __init__(self, source: Source, batch_size: int, num_workers: int, def __getitem__(self, item): return self._loaders[item] + @property + def source(self): + """ Return the source for this loader """ + return self.transformed_source + @property def loader(self): """ Get a dict of loaders """ diff --git a/vel/data/text_character_loader.py b/vel/data/loader/text_character_loader.py similarity index 88% rename from vel/data/text_character_loader.py rename to vel/data/loader/text_character_loader.py index 5eafc95b..d37e095f 100644 --- a/vel/data/text_character_loader.py +++ b/vel/data/loader/text_character_loader.py @@ -81,18 +81,18 @@ class TextCharacterLoader: """ Loader for the text character data source """ def __init__(self, source, sequence_length: int, batch_size: int): - self.source = source + self._source = source self.sequence_length = sequence_length self.batch_size = batch_size - self.alphabet = self.source.metadata['alphabet'] + self.alphabet = self._source.metadata['alphabet'] - self.train_loader = TextLoader(self.source.train, self.sequence_length, self.batch_size, len(self.alphabet)) - self.val_loader = TextLoader(self.source.validation, self.sequence_length, self.batch_size, len(self.alphabet)) + self.train_loader = TextLoader(self._source.train, self.sequence_length, self.batch_size, len(self.alphabet)) + self.val_loader = TextLoader(self._source.validation, self.sequence_length, self.batch_size, len(self.alphabet)) - if self.source.test is None: + if self._source.test is None: self.test_loader = None else: - self.test_loader = TextLoader(self.source.test, self.sequence_length, self.batch_size, len(self.alphabet)) + self.test_loader = TextLoader(self._source.test, self.sequence_length, self.batch_size, len(self.alphabet)) self._loaders = { 'train': self.train_loader, @@ -109,6 +109,11 @@ def __init__(self, source, sequence_length: int, batch_size: int): def __getitem__(self, item): return self._loaders[item] + @property + def source(self): + """ Return source for this loader """ + return self._source + @property def alphabet_size(self): """ Size of the text alphabet """ diff --git a/vel/model/latent/vae_base.py b/vel/model/latent/vae_base.py index 75562b42..4c3509ab 100644 --- a/vel/model/latent/vae_base.py +++ b/vel/model/latent/vae_base.py @@ -129,8 +129,6 @@ def nll(self, sample: torch.Tensor, num_posterior_samples: int = 1): """ assert num_posterior_samples >= 1, "Need at least one posterior sample" - buffer = [] - encoded = self.encoder_network(sample) z_dist = self.encoder_distribution(encoded) prior = self.prior_distribution() @@ -138,24 +136,25 @@ def nll(self, sample: torch.Tensor, num_posterior_samples: int = 1): if self.analytical_kl_div: kl_divergence = dist.kl_divergence(z_dist, prior) - for i in range(num_posterior_samples): - z = z_dist.rsample() - decoded = self.decoder_network(z) - x_dist = self.decoder_distribution(decoded) + bs = encoded.size(0) + z = z_dist.rsample((num_posterior_samples,)) - if not self.analytical_kl_div: - lpz = prior.log_prob(z) - lqzx = z_dist.log_prob(z) - kl_divergence = -lpz + lqzx + # Reshape, decode, reshape + z_reshaped = z.view([bs * num_posterior_samples] + list(z.shape[2:])) + decoded = self.decoder_network(z_reshaped) + decoded = decoded.view([num_posterior_samples, bs] + list(decoded.shape[1:])) - likelihood = x_dist.log_prob(sample) - elbo = likelihood - kl_divergence + x_dist = self.decoder_distribution(decoded) - buffer.append(elbo) + if not self.analytical_kl_div: + lpz = prior.log_prob(z) + lqzx = z_dist.log_prob(z) + kl_divergence = -lpz + lqzx - averaged = self.log_mean_exp(torch.stack(buffer, dim=-1), dim=-1) + likelihood = x_dist.log_prob(sample) + elbo = likelihood - kl_divergence - return -averaged + return -self.log_mean_exp(elbo, dim=0) #################################################################################################################### # Utility methods diff --git a/vel/train/phase/generic.py b/vel/train/phase/generic.py index 00c86660..4619a390 100644 --- a/vel/train/phase/generic.py +++ b/vel/train/phase/generic.py @@ -1,5 +1,5 @@ from vel.api import TrainingInfo, EpochInfo, OptimizedModel -from vel.data import DatasetLoader +from vel.data.loader import DatasetLoader from vel.train import TrainPhase diff --git a/vel/train/train_phase.py b/vel/train/train_phase.py index 13733d12..8f7ff84f 100644 --- a/vel/train/train_phase.py +++ b/vel/train/train_phase.py @@ -1,5 +1,5 @@ from vel.api import TrainingInfo, EpochInfo, Model, VelOptimizer -from vel.data import DatasetLoader +from vel.data.loader import DatasetLoader from .trainer import Trainer diff --git a/vel/train/trainer.py b/vel/train/trainer.py index 7d151b18..2a18266f 100644 --- a/vel/train/trainer.py +++ b/vel/train/trainer.py @@ -4,7 +4,7 @@ import tqdm from vel.api import OptimizedModel, TrainingInfo, EpochInfo, BatchInfo -from vel.data import DatasetLoader +from vel.data.loader import DatasetLoader from vel.util.tensor_util import to_device From 8923844cdf92e115e324c3f832b649495025d5f2 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Fri, 18 Oct 2019 19:01:55 -0700 Subject: [PATCH 135/162] Fixed issue in train command. --- vel/command/train_command.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vel/command/train_command.py b/vel/command/train_command.py index 9e94450c..bbe45e8b 100644 --- a/vel/command/train_command.py +++ b/vel/command/train_command.py @@ -1,9 +1,9 @@ import typing import vel.api as api -import vel.data as data import vel.train as train +from vel.data.loader import DatasetLoader from vel.metric.samples_per_sec import SamplesPerSec from vel.callback.time_tracker import TimeTracker from vel.callback.sample_tracker import SampleTracker @@ -14,7 +14,7 @@ class SimpleTrainCommand: def __init__(self, epochs: int, model_config: api.ModelConfig, model_factory: api.ModuleFactory, optimizer_factory: api.OptimizerFactory, scheduler_factory: typing.Optional[api.SchedulerFactory], - loader: data.DatasetLoader, storage: api.Storage, + loader: DatasetLoader, storage: api.Storage, callbacks: typing.Optional[typing.List[api.Callback]]): self.epochs = epochs self.model_config = model_config From 3eb8d2ffd4916070ac7fae37d3075f3fc33f579d Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 19 Oct 2019 12:29:42 -0700 Subject: [PATCH 136/162] Remove omniglot VAE examples. --- .../latent/omniglot/omniglot_cnn_vae.yaml | 49 ----------------- .../latent/omniglot/omniglot_fc_vae.yaml | 54 ------------------- 2 files changed, 103 deletions(-) delete mode 100644 examples-configs/latent/omniglot/omniglot_cnn_vae.yaml delete mode 100644 examples-configs/latent/omniglot/omniglot_fc_vae.yaml diff --git a/examples-configs/latent/omniglot/omniglot_cnn_vae.yaml b/examples-configs/latent/omniglot/omniglot_cnn_vae.yaml deleted file mode 100644 index e952d0e0..00000000 --- a/examples-configs/latent/omniglot/omniglot_cnn_vae.yaml +++ /dev/null @@ -1,49 +0,0 @@ -name: 'omniglot_cnn_vae' - - -model: - name: vel.model.latent.cnn_vae - img_rows: 28 - img_cols: 28 - img_channels: 1 - channels: [64, 128, 256] - representation_length: 50 - max_grad_norm: 1.0 - analytical_kl_div: true - - -source: - name: vel.data.source.vision.omniglot - - -loader: - name: vel.data.loader.dataset_loader - batch_size: 128 - num_workers: 4 - pin_memory: true - - transformations: - - name: vel.data.transformation.pil_resize - shape: [28, 28] - - name: vel.data.transformation.to_array - - name: vel.data.transformation.binarize_image - - name: vel.data.transformation.image_to_tensor - - name: vel.data.transformation.unsupervised - - -optimizer: - name: vel.optimizer.radam - lr: 1.0e-3 - eps: 1.0e-4 - - -commands: - augvis: - name: vel.command.augvis_command - samples: 5 - cases: 3 - - train: - name: vel.command.train_command - epochs: 3280 - diff --git a/examples-configs/latent/omniglot/omniglot_fc_vae.yaml b/examples-configs/latent/omniglot/omniglot_fc_vae.yaml deleted file mode 100644 index 700ca014..00000000 --- a/examples-configs/latent/omniglot/omniglot_fc_vae.yaml +++ /dev/null @@ -1,54 +0,0 @@ -name: 'omniglot_fc_vae' - - -model: - name: vel.model.latent.fc_vae - img_rows: 28 - img_cols: 28 - img_channels: 1 - layers: [200, 200] - representation_length: 50 - max_grad_norm: 1.0 - analytical_kl_div: true - - -source: - name: vel.data.source.vision.omniglot - - -loader: - name: vel.data.loader.dataset_loader - batch_size: 128 - num_workers: 4 - pin_memory: true - - transformations: - - name: vel.data.transformation.pil_resize - shape: [28, 28] - - name: vel.data.transformation.to_array - - name: vel.data.transformation.binarize_image - - name: vel.data.transformation.image_to_tensor - - name: vel.data.transformation.unsupervised - - -optimizer: - name: vel.optimizer.radam - lr: 1.0e-3 - eps: 1.0e-4 - - -scheduler: - name: vel.scheduler.multi_step - gamma: 0.71968 # 10 * (-1/7) - milestones: [ 1, 4, 13, 40, 121, 364, 1093, 3280] - - -commands: - augvis: - name: vel.command.augvis_command - samples: 5 - cases: 3 - - train: - name: vel.command.train_command - epochs: 3280 From da8a3b7d6440483e17e5a844950d557b06e18e15 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 20 Oct 2019 12:40:01 -0700 Subject: [PATCH 137/162] Initial benchmarks. --- README.md | 6 +++- docs/Benchmarks.md | 26 ++++++++++++++++ .../latent/mnist/mnist_cnn_iwae.yaml | 1 + .../latent/mnist/mnist_cnn_vae.yaml | 5 ++-- .../latent/mnist/mnist_cnn_vq_vae.yaml | 1 + .../latent/mnist/mnist_fc_iwae.yaml | 4 ++- .../latent/mnist/mnist_fc_vae.yaml | 4 ++- vel/model/latent/fc_vae.py | 30 ++++++++++--------- 8 files changed, 58 insertions(+), 19 deletions(-) create mode 100644 docs/Benchmarks.md diff --git a/README.md b/README.md index 22075ae9..29681425 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,9 @@ If that's not the case few bits of custom glue code should do the job. This repository is still in an early stage of that journey but it will grow as I'll be putting work into it. +For up-to-date benchmarks, look here: +[Benchmarks](docs/Benchmarks.md) + # Blogposts @@ -59,7 +62,7 @@ If you want to run YAML config examples, you'll also need a **project configurat `.velproject.yaml`. An example is included in this repository. Default project configuration writes logs to the tensorboard directory `output/tensorboard` -under the main directory. Outputs to visdom and mongodb are also implemented. +under the main directory. Output modules to visdom, mongodb and wandb are also implemented. If you don't want any logging, there is included another example file `.velproject.dummy.yaml` that writes training progress to the standard output only. @@ -77,6 +80,7 @@ To use it, just rename it to `.velproject.yaml`. understand what exactly the model is doing for newcomers already comfortable with PyTorch. - All state-of-the-art models should be implemented in the framework with accuracy matching published results. + For up-to-date benchmarks, look here: [Benchmarks](docs/Benchmarks.md) - All common deep learning workflows should be fast to implement, while uncommon ones should be possible, at least as far as PyTorch allows. diff --git a/docs/Benchmarks.md b/docs/Benchmarks.md new file mode 100644 index 00000000..274224fa --- /dev/null +++ b/docs/Benchmarks.md @@ -0,0 +1,26 @@ +# Benchmarks + +In this file I'll gather up to date benchmarking results for examples included in this repository. + +Levels of hierarchy will be first task, then dataset (benchmark) and then table listing model results of +relevant metrics. + +Each metric I'll try to average over six runs and provide mean and standard deviation of results. + + +## Generative models + + +### Binarized MNIST + + +For VAE models, I'll include upper bound for Negative Log Likelihood (NLL) for given number of importance samples (IS). + + +| Model | NLL (IS=1) | NLL (IS=100) | NLL (IS=5000) | +| ----- | ---------- | ------------ | ------------- | +| FC VAE | 90.98 ± 0.14 | 87.07 ± 0.18 | 86.93 ± 0.18 | +| CNN VAE | +| FC IWAE | +| CNN IWAE | + diff --git a/examples-configs/latent/mnist/mnist_cnn_iwae.yaml b/examples-configs/latent/mnist/mnist_cnn_iwae.yaml index 56e86038..fc3d39fd 100644 --- a/examples-configs/latent/mnist/mnist_cnn_iwae.yaml +++ b/examples-configs/latent/mnist/mnist_cnn_iwae.yaml @@ -33,6 +33,7 @@ optimizer: name: vel.optimizer.radam lr: 1.0e-3 eps: 1.0e-4 + max_grad_norm: 1.0 scheduler: diff --git a/examples-configs/latent/mnist/mnist_cnn_vae.yaml b/examples-configs/latent/mnist/mnist_cnn_vae.yaml index 11debfb0..56340d6b 100644 --- a/examples-configs/latent/mnist/mnist_cnn_vae.yaml +++ b/examples-configs/latent/mnist/mnist_cnn_vae.yaml @@ -18,8 +18,8 @@ source: loader: name: vel.data.loader.dataset_loader batch_size: 256 - num_workers: 4 - pin_memory: true +# num_workers: 4 +# pin_memory: true transformations: - name: vel.data.transformation.to_array @@ -45,6 +45,7 @@ optimizer: name: vel.optimizer.radam lr: 1.0e-3 eps: 1.0e-4 + max_grad_norm: 1.0 scheduler: diff --git a/examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml b/examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml index e99c7703..8e1dbed1 100644 --- a/examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml +++ b/examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml @@ -33,6 +33,7 @@ optimizer: name: vel.optimizer.radam lr: 1.0e-3 eps: 1.0e-4 + max_grad_norm: 1.0 scheduler: diff --git a/examples-configs/latent/mnist/mnist_fc_iwae.yaml b/examples-configs/latent/mnist/mnist_fc_iwae.yaml index 46111a50..48fa116f 100644 --- a/examples-configs/latent/mnist/mnist_fc_iwae.yaml +++ b/examples-configs/latent/mnist/mnist_fc_iwae.yaml @@ -32,7 +32,9 @@ loader: optimizer: name: vel.optimizer.radam lr: 1.0e-3 - eps: 1.0e-4 + eps: 1.0e- + max_grad_norm: 1.0 + scheduler: diff --git a/examples-configs/latent/mnist/mnist_fc_vae.yaml b/examples-configs/latent/mnist/mnist_fc_vae.yaml index d9a64b4b..e61c2028 100644 --- a/examples-configs/latent/mnist/mnist_fc_vae.yaml +++ b/examples-configs/latent/mnist/mnist_fc_vae.yaml @@ -14,6 +14,7 @@ model: source: name: vel.data.source.vision.mnist + loader: name: vel.data.loader.dataset_loader batch_size: 128 @@ -29,8 +30,9 @@ loader: optimizer: name: vel.optimizer.radam - lr: 1.0e-3 + lr: 1.0e-2 eps: 1.0e-4 + max_grad_norm: 1.0 scheduler: diff --git a/vel/model/latent/fc_vae.py b/vel/model/latent/fc_vae.py index 701c5717..8c5fbad4 100644 --- a/vel/model/latent/fc_vae.py +++ b/vel/model/latent/fc_vae.py @@ -1,7 +1,10 @@ +import itertools as it + import torch import torch.distributions as dist import torch.nn as nn import torch.nn.functional as F +import torch.nn.init as init from vel.api import ModuleFactory from vel.module.layers import Flatten, Reshape @@ -75,20 +78,19 @@ def decoder_sample(self, decoded: torch.Tensor) -> torch.Tensor: """ Sample from a decoder distribution - we ignore that since it's so weak in this case """ return decoded -# import torch.nn.init as init -# @staticmethod -# def _weight_initializer(tensor): -# init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('tanh')) -# init.constant_(tensor.bias, 0.01) -# -# def reset_weights(self): -# for m in it.chain(self.encoder, self.decoder): -# if isinstance(m, nn.Conv2d): -# self._weight_initializer(m) -# elif isinstance(m, nn.ConvTranspose2d): -# self._weight_initializer(m) -# elif isinstance(m, nn.Linear): -# self._weight_initializer(m) + @staticmethod + def _weight_initializer(tensor): + init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('tanh')) + init.constant_(tensor.bias, 0.01) + + def reset_weights(self): + for m in it.chain(self.encoder, self.decoder): + if isinstance(m, nn.Conv2d): + self._weight_initializer(m) + elif isinstance(m, nn.ConvTranspose2d): + self._weight_initializer(m) + elif isinstance(m, nn.Linear): + self._weight_initializer(m) def create(img_rows, img_cols, img_channels, layers=None, representation_length=32, From a6acbbad7367cfe815fe690e9d49b985ea581442 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 20 Oct 2019 12:50:49 -0700 Subject: [PATCH 138/162] Moved configs around. --- .../{gan => generative-adversarial}/mnist/mnist_gan.yaml | 0 .../{latent => generative-likelihood}/mnist/mnist_cnn_iwae.yaml | 0 .../{latent => generative-likelihood}/mnist/mnist_cnn_vae.yaml | 0 .../{latent => generative-likelihood}/mnist/mnist_cnn_vq_vae.yaml | 0 .../{latent => generative-likelihood}/mnist/mnist_fc_iwae.yaml | 0 .../{latent => generative-likelihood}/mnist/mnist_fc_vae.yaml | 0 .../cats_vs_dogs}/cats_vs_dogs_resnet34.yaml | 0 .../cifar10/cifar10_cnn_01.yaml | 0 .../cifar10/cifar10_resnetv1_110.yaml | 0 .../cifar10/cifar10_resnetv1_32.yaml | 0 .../cifar10/cifar10_resnetv2_110.yaml | 0 .../cifar10/cifar10_resnetv2_164_bottleneck.yaml | 0 .../cifar10/cifar10_resnetv2_32.yaml | 0 .../cifar10/cifar10_resnext_29_c1.yaml | 0 .../cifar10/cifar10_resnext_29_c8.yaml | 0 .../mnist/mnist_cnn_01.yaml | 0 .../shakespeare}/gen_shakespeare.yaml | 0 .../imdb_sentiment_gru.yaml | 0 18 files changed, 0 insertions(+), 0 deletions(-) rename examples-configs/{gan => generative-adversarial}/mnist/mnist_gan.yaml (100%) rename examples-configs/{latent => generative-likelihood}/mnist/mnist_cnn_iwae.yaml (100%) rename examples-configs/{latent => generative-likelihood}/mnist/mnist_cnn_vae.yaml (100%) rename examples-configs/{latent => generative-likelihood}/mnist/mnist_cnn_vq_vae.yaml (100%) rename examples-configs/{latent => generative-likelihood}/mnist/mnist_fc_iwae.yaml (100%) rename examples-configs/{latent => generative-likelihood}/mnist/mnist_fc_vae.yaml (100%) rename examples-configs/{classification/imagenet_transfer => image-classification/cats_vs_dogs}/cats_vs_dogs_resnet34.yaml (100%) rename examples-configs/{classification => image-classification}/cifar10/cifar10_cnn_01.yaml (100%) rename examples-configs/{classification => image-classification}/cifar10/cifar10_resnetv1_110.yaml (100%) rename examples-configs/{classification => image-classification}/cifar10/cifar10_resnetv1_32.yaml (100%) rename examples-configs/{classification => image-classification}/cifar10/cifar10_resnetv2_110.yaml (100%) rename examples-configs/{classification => image-classification}/cifar10/cifar10_resnetv2_164_bottleneck.yaml (100%) rename examples-configs/{classification => image-classification}/cifar10/cifar10_resnetv2_32.yaml (100%) rename examples-configs/{classification => image-classification}/cifar10/cifar10_resnext_29_c1.yaml (100%) rename examples-configs/{classification => image-classification}/cifar10/cifar10_resnext_29_c8.yaml (100%) rename examples-configs/{classification => image-classification}/mnist/mnist_cnn_01.yaml (100%) rename examples-configs/{nlp/generation => language-modeling/shakespeare}/gen_shakespeare.yaml (100%) rename examples-configs/{nlp/classification => text-classification}/imdb_sentiment_gru.yaml (100%) diff --git a/examples-configs/gan/mnist/mnist_gan.yaml b/examples-configs/generative-adversarial/mnist/mnist_gan.yaml similarity index 100% rename from examples-configs/gan/mnist/mnist_gan.yaml rename to examples-configs/generative-adversarial/mnist/mnist_gan.yaml diff --git a/examples-configs/latent/mnist/mnist_cnn_iwae.yaml b/examples-configs/generative-likelihood/mnist/mnist_cnn_iwae.yaml similarity index 100% rename from examples-configs/latent/mnist/mnist_cnn_iwae.yaml rename to examples-configs/generative-likelihood/mnist/mnist_cnn_iwae.yaml diff --git a/examples-configs/latent/mnist/mnist_cnn_vae.yaml b/examples-configs/generative-likelihood/mnist/mnist_cnn_vae.yaml similarity index 100% rename from examples-configs/latent/mnist/mnist_cnn_vae.yaml rename to examples-configs/generative-likelihood/mnist/mnist_cnn_vae.yaml diff --git a/examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml b/examples-configs/generative-likelihood/mnist/mnist_cnn_vq_vae.yaml similarity index 100% rename from examples-configs/latent/mnist/mnist_cnn_vq_vae.yaml rename to examples-configs/generative-likelihood/mnist/mnist_cnn_vq_vae.yaml diff --git a/examples-configs/latent/mnist/mnist_fc_iwae.yaml b/examples-configs/generative-likelihood/mnist/mnist_fc_iwae.yaml similarity index 100% rename from examples-configs/latent/mnist/mnist_fc_iwae.yaml rename to examples-configs/generative-likelihood/mnist/mnist_fc_iwae.yaml diff --git a/examples-configs/latent/mnist/mnist_fc_vae.yaml b/examples-configs/generative-likelihood/mnist/mnist_fc_vae.yaml similarity index 100% rename from examples-configs/latent/mnist/mnist_fc_vae.yaml rename to examples-configs/generative-likelihood/mnist/mnist_fc_vae.yaml diff --git a/examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml b/examples-configs/image-classification/cats_vs_dogs/cats_vs_dogs_resnet34.yaml similarity index 100% rename from examples-configs/classification/imagenet_transfer/cats_vs_dogs_resnet34.yaml rename to examples-configs/image-classification/cats_vs_dogs/cats_vs_dogs_resnet34.yaml diff --git a/examples-configs/classification/cifar10/cifar10_cnn_01.yaml b/examples-configs/image-classification/cifar10/cifar10_cnn_01.yaml similarity index 100% rename from examples-configs/classification/cifar10/cifar10_cnn_01.yaml rename to examples-configs/image-classification/cifar10/cifar10_cnn_01.yaml diff --git a/examples-configs/classification/cifar10/cifar10_resnetv1_110.yaml b/examples-configs/image-classification/cifar10/cifar10_resnetv1_110.yaml similarity index 100% rename from examples-configs/classification/cifar10/cifar10_resnetv1_110.yaml rename to examples-configs/image-classification/cifar10/cifar10_resnetv1_110.yaml diff --git a/examples-configs/classification/cifar10/cifar10_resnetv1_32.yaml b/examples-configs/image-classification/cifar10/cifar10_resnetv1_32.yaml similarity index 100% rename from examples-configs/classification/cifar10/cifar10_resnetv1_32.yaml rename to examples-configs/image-classification/cifar10/cifar10_resnetv1_32.yaml diff --git a/examples-configs/classification/cifar10/cifar10_resnetv2_110.yaml b/examples-configs/image-classification/cifar10/cifar10_resnetv2_110.yaml similarity index 100% rename from examples-configs/classification/cifar10/cifar10_resnetv2_110.yaml rename to examples-configs/image-classification/cifar10/cifar10_resnetv2_110.yaml diff --git a/examples-configs/classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml b/examples-configs/image-classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml similarity index 100% rename from examples-configs/classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml rename to examples-configs/image-classification/cifar10/cifar10_resnetv2_164_bottleneck.yaml diff --git a/examples-configs/classification/cifar10/cifar10_resnetv2_32.yaml b/examples-configs/image-classification/cifar10/cifar10_resnetv2_32.yaml similarity index 100% rename from examples-configs/classification/cifar10/cifar10_resnetv2_32.yaml rename to examples-configs/image-classification/cifar10/cifar10_resnetv2_32.yaml diff --git a/examples-configs/classification/cifar10/cifar10_resnext_29_c1.yaml b/examples-configs/image-classification/cifar10/cifar10_resnext_29_c1.yaml similarity index 100% rename from examples-configs/classification/cifar10/cifar10_resnext_29_c1.yaml rename to examples-configs/image-classification/cifar10/cifar10_resnext_29_c1.yaml diff --git a/examples-configs/classification/cifar10/cifar10_resnext_29_c8.yaml b/examples-configs/image-classification/cifar10/cifar10_resnext_29_c8.yaml similarity index 100% rename from examples-configs/classification/cifar10/cifar10_resnext_29_c8.yaml rename to examples-configs/image-classification/cifar10/cifar10_resnext_29_c8.yaml diff --git a/examples-configs/classification/mnist/mnist_cnn_01.yaml b/examples-configs/image-classification/mnist/mnist_cnn_01.yaml similarity index 100% rename from examples-configs/classification/mnist/mnist_cnn_01.yaml rename to examples-configs/image-classification/mnist/mnist_cnn_01.yaml diff --git a/examples-configs/nlp/generation/gen_shakespeare.yaml b/examples-configs/language-modeling/shakespeare/gen_shakespeare.yaml similarity index 100% rename from examples-configs/nlp/generation/gen_shakespeare.yaml rename to examples-configs/language-modeling/shakespeare/gen_shakespeare.yaml diff --git a/examples-configs/nlp/classification/imdb_sentiment_gru.yaml b/examples-configs/text-classification/imdb_sentiment_gru.yaml similarity index 100% rename from examples-configs/nlp/classification/imdb_sentiment_gru.yaml rename to examples-configs/text-classification/imdb_sentiment_gru.yaml From d826300ed08415f9f04e46f98b388eda9958005b Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 20 Oct 2019 12:56:24 -0700 Subject: [PATCH 139/162] Improving WANDB bindings. --- vel/storage/streaming/wandb.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vel/storage/streaming/wandb.py b/vel/storage/streaming/wandb.py index 24bf0297..3db02c75 100644 --- a/vel/storage/streaming/wandb.py +++ b/vel/storage/streaming/wandb.py @@ -7,13 +7,14 @@ class WandbStreaming(Callback): """ Stream live results from training to WandB """ - def __init__(self, model_config: ModelConfig): + def __init__(self, model_config: ModelConfig, project: str): self.model_config = model_config + self.project = project def on_train_begin(self, training_info: TrainingInfo) -> None: wandb.init( job_type='train', - project='vel', + project=self.project, dir=self.model_config.output_dir('wandb'), group=self.model_config.name, name=self.model_config.run_name, @@ -27,6 +28,6 @@ def on_epoch_end(self, epoch_info): wandb.log(row=result, step=epoch_info.global_epoch_idx) -def create(model_config): +def create(model_config, project: str = 'vel'): """ Vel factory function """ - return WandbStreaming(model_config) + return WandbStreaming(model_config, project=project) From 7734186be55a071c19eb6d5a976cd45ac267f116 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 20 Oct 2019 13:19:51 -0700 Subject: [PATCH 140/162] Fixing a bug in metric key initialization. --- vel/api/info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vel/api/info.py b/vel/api/info.py index e76cec24..4177d592 100644 --- a/vel/api/info.py +++ b/vel/api/info.py @@ -122,7 +122,7 @@ def _reset_metrics(self): def value(self, dataset=None): """ Return current dictionary value of the metrics """ from vel.metric import MetricKey - return {MetricKey(dataset, m.name, m.scope): m.value() for m in self.metrics} + return {MetricKey(m.name, m.scope, dataset): m.value() for m in self.metrics} def intermediate_value(self, metric): """ Return an intermediate (inter-epoch) value of a metric """ From 883d95d0917232967b5bc0cbc78612b8d34dc6e4 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 24 Oct 2019 17:53:00 -0700 Subject: [PATCH 141/162] Misc changes to logging and stuff. --- .velproject.yaml | 3 +++ .../mnist/mnist_fc_vae.yaml | 6 ++--- vel/api/callback.py | 4 +-- vel/api/info.py | 4 +-- vel/api/model_config.py | 9 +++++-- vel/callback/time_tracker.py | 4 +-- vel/command/phase_train_command.py | 6 ++--- vel/command/train_command.py | 2 +- vel/model/latent/fc_vae.py | 26 +++++++++---------- vel/model/latent/vae_base.py | 7 ++--- vel/rl/command/rl_train_command.py | 2 +- vel/storage/classic.py | 4 +++ vel/storage/streaming/tensorboard.py | 4 +-- vel/storage/streaming/wandb.py | 26 ++++++++++++++----- 14 files changed, 64 insertions(+), 43 deletions(-) diff --git a/.velproject.yaml b/.velproject.yaml index 5e25ba22..87ba09d0 100644 --- a/.velproject.yaml +++ b/.velproject.yaml @@ -1,3 +1,5 @@ +project_name: 'vel' + storage: name: vel.storage.classic @@ -13,6 +15,7 @@ storage: - name: vel.storage.streaming.tensorboard # - name: vel.storage.streaming.visdom - name: vel.storage.streaming.stdout +# - name: vel.storage.streaming.wandb checkpoint_strategy: diff --git a/examples-configs/generative-likelihood/mnist/mnist_fc_vae.yaml b/examples-configs/generative-likelihood/mnist/mnist_fc_vae.yaml index e61c2028..b9910753 100644 --- a/examples-configs/generative-likelihood/mnist/mnist_fc_vae.yaml +++ b/examples-configs/generative-likelihood/mnist/mnist_fc_vae.yaml @@ -30,9 +30,9 @@ loader: optimizer: name: vel.optimizer.radam - lr: 1.0e-2 - eps: 1.0e-4 - max_grad_norm: 1.0 + lr: 1.0e-3 + eps: 1.0e-3 +# max_grad_norm: 10.0 scheduler: diff --git a/vel/api/callback.py b/vel/api/callback.py index 6a4e7819..372ac699 100644 --- a/vel/api/callback.py +++ b/vel/api/callback.py @@ -15,10 +15,10 @@ def on_initialization(self, training_info: TrainingInfo) -> None: """ pass - def on_train_begin(self, training_info: TrainingInfo) -> None: + def on_train_begin(self, training_info: TrainingInfo, model) -> None: """ Beginning of a training process - is run every time a training process is started, even if it's restarted from - a checkpoint. + a checkpoint. Can access the model that is used for this training. """ pass diff --git a/vel/api/info.py b/vel/api/info.py index 4177d592..7dd8e996 100644 --- a/vel/api/info.py +++ b/vel/api/info.py @@ -54,13 +54,13 @@ def initialize(self): for callback in self.callbacks: callback.on_initialization(self) - def on_train_begin(self): + def on_train_begin(self, model): """ Beginning of a training process - is run every time a training process is started, even if it's restarted from a checkpoint. """ for callback in self.callbacks: - callback.on_train_begin(self) + callback.on_train_begin(self, model) def on_train_end(self): """ diff --git a/vel/api/model_config.py b/vel/api/model_config.py index 62bcbdbc..7e091bed 100644 --- a/vel/api/model_config.py +++ b/vel/api/model_config.py @@ -213,13 +213,19 @@ def output_dir(self, *args) -> str: return os.path.join(self.project_dir, self.output_directory_name, *args) def meta_dir(self, *args) -> str: - """ Return directory for openai output files for this model """ + """ Return directory for metadata output files for this model """ return self.output_dir('meta', self.run_name, *args) def data_dir(self, *args) -> str: """ Directory where to store data """ return os.path.normpath(os.path.join(self.project_dir, 'data', *args)) + def model_output_dir(self, *args): + """ Return an output directory of given kind for given kind only """ + fname = os.path.join(self.project_dir, self.output_directory_name, args[0], self.run_name, *(args[1:])) + os.makedirs(os.path.dirname(fname), exist_ok=True) + return fname + def checkpoint_dir(self, *args) -> str: """ Return checkpoint directory for this model """ return self.output_dir('checkpoints', self.run_name, *args) @@ -227,7 +233,6 @@ def checkpoint_dir(self, *args) -> str: def openai_dir(self, *args) -> str: """ Return directory for openai output files for this model """ return self.output_dir('openai', self.run_name, *args) - #################################################################################################################### # NAME UTILITIES @property diff --git a/vel/callback/time_tracker.py b/vel/callback/time_tracker.py index df280213..6e298c50 100644 --- a/vel/callback/time_tracker.py +++ b/vel/callback/time_tracker.py @@ -1,6 +1,6 @@ import time -from vel.api import BatchInfo, TrainingInfo, Callback +from vel.api import BatchInfo, TrainingInfo, Callback, Model class TimeTracker(Callback): @@ -12,7 +12,7 @@ def __init__(self): def on_initialization(self, training_info: TrainingInfo): training_info['time'] = 0.0 - def on_train_begin(self, training_info: TrainingInfo): + def on_train_begin(self, training_info: TrainingInfo, model: Model): self.start_time = time.time() def on_batch_end(self, batch_info: BatchInfo, dataset=None): diff --git a/vel/command/phase_train_command.py b/vel/command/phase_train_command.py index 2670fcac..7992364a 100644 --- a/vel/command/phase_train_command.py +++ b/vel/command/phase_train_command.py @@ -3,9 +3,9 @@ import typing import vel.api as api -import vel.data as data import vel.train as train +from vel.data.loader import DatasetLoader from vel.metric.samples_per_sec import SamplesPerSec from vel.callback.time_tracker import TimeTracker from vel.callback.sample_tracker import SampleTracker @@ -14,7 +14,7 @@ class PhaseTrainCommand: """ Training command - learn according to a set of phases """ - def __init__(self, model_config: api.ModelConfig, model_factory: api.ModuleFactory, loader: data.DatasetLoader, + def __init__(self, model_config: api.ModelConfig, model_factory: api.ModuleFactory, loader: DatasetLoader, storage: api.Storage, phases: typing.List[train.TrainPhase], callbacks=None, restart=True): self.model_config = model_config @@ -72,7 +72,7 @@ def run(self): if training_info.start_epoch_idx > 0: current_phase.restore(training_info, local_idx, trainer.model, hidden_state) - training_info.on_train_begin() + training_info.on_train_begin(trainer.model) for global_epoch_idx in range(training_info.start_epoch_idx + 1, self.full_number_of_epochs + 1): iteration_phase_idx = self._select_phase_right_bound(global_epoch_idx-1) diff --git a/vel/command/train_command.py b/vel/command/train_command.py index bbe45e8b..3ea825e6 100644 --- a/vel/command/train_command.py +++ b/vel/command/train_command.py @@ -37,7 +37,7 @@ def run(self): # Check if training was already started and potentially continue where we left off training_info = self.start_training(trainer, optimizer) - training_info.on_train_begin() + training_info.on_train_begin(trainer.model) for global_epoch_idx in range(training_info.start_epoch_idx + 1, self.epochs + 1): epoch_info = api.EpochInfo( diff --git a/vel/model/latent/fc_vae.py b/vel/model/latent/fc_vae.py index 8c5fbad4..739443d8 100644 --- a/vel/model/latent/fc_vae.py +++ b/vel/model/latent/fc_vae.py @@ -78,19 +78,19 @@ def decoder_sample(self, decoded: torch.Tensor) -> torch.Tensor: """ Sample from a decoder distribution - we ignore that since it's so weak in this case """ return decoded - @staticmethod - def _weight_initializer(tensor): - init.xavier_uniform_(tensor.weight, gain=init.calculate_gain('tanh')) - init.constant_(tensor.bias, 0.01) - - def reset_weights(self): - for m in it.chain(self.encoder, self.decoder): - if isinstance(m, nn.Conv2d): - self._weight_initializer(m) - elif isinstance(m, nn.ConvTranspose2d): - self._weight_initializer(m) - elif isinstance(m, nn.Linear): - self._weight_initializer(m) + # @staticmethod + # def _weight_initializer(tensor): + # init.xavier_normal_(tensor.weight, gain=init.calculate_gain('tanh')) + # init.zeros_(tensor.bias) + # + # def reset_weights(self): + # for m in it.chain(self.encoder.modules(), self.decoder.modules()): + # if isinstance(m, nn.Conv2d): + # self._weight_initializer(m) + # elif isinstance(m, nn.ConvTranspose2d): + # self._weight_initializer(m) + # elif isinstance(m, nn.Linear): + # self._weight_initializer(m) def create(img_rows, img_cols, img_channels, layers=None, representation_length=32, diff --git a/vel/model/latent/vae_base.py b/vel/model/latent/vae_base.py index 4c3509ab..be480e50 100644 --- a/vel/model/latent/vae_base.py +++ b/vel/model/latent/vae_base.py @@ -160,8 +160,5 @@ def nll(self, sample: torch.Tensor, num_posterior_samples: int = 1): # Utility methods def log_mean_exp(self, inputs, dim=1): """ Perform log(mean(exp(data))) in a numerically stable way """ - if inputs.size(dim) == 1: - return inputs - else: - input_max = inputs.max(dim, keepdim=True)[0] - return (inputs - input_max).exp().mean(dim).log() + input_max.squeeze(dim=dim) + input_max = inputs.max(dim, keepdim=True)[0] + return (inputs - input_max).exp().mean(dim).log() + input_max.squeeze(dim=dim) diff --git a/vel/rl/command/rl_train_command.py b/vel/rl/command/rl_train_command.py index b63807fb..b4d10758 100644 --- a/vel/rl/command/rl_train_command.py +++ b/vel/rl/command/rl_train_command.py @@ -70,7 +70,7 @@ def run(self): training_info = self.start_training(reinforcer, optimizer) reinforcer.initialize_training(training_info) - training_info.on_train_begin() + training_info.on_train_begin(reinforcer.policy) global_epoch_idx = training_info.start_epoch_idx + 1 diff --git a/vel/storage/classic.py b/vel/storage/classic.py index de130c52..e61dd91b 100644 --- a/vel/storage/classic.py +++ b/vel/storage/classic.py @@ -135,6 +135,10 @@ def checkpoint_hidden_filename(self, epoch_idx) -> str: """ Return checkpoint filename for this model - hidden state """ return self.model_config.checkpoint_dir('checkpoint_hidden_{:08}.data'.format(epoch_idx)) + def last_checkpoint_filename(self) -> str: + """ return checkpoint filename for the last saved checkpoint """ + return self.checkpoint_filename(self.last_epoch_idx()) + #################################################################################################################### # Internal interface def _persisted_last_epoch(self) -> int: diff --git a/vel/storage/streaming/tensorboard.py b/vel/storage/streaming/tensorboard.py index f75ca570..dde270a7 100644 --- a/vel/storage/streaming/tensorboard.py +++ b/vel/storage/streaming/tensorboard.py @@ -1,7 +1,7 @@ import os import shutil -from vel.api import ModelConfig, Callback, TrainingInfo, EpochInfo +from vel.api import ModelConfig, Callback, TrainingInfo, EpochInfo, Model from torch.utils.tensorboard import SummaryWriter @@ -12,7 +12,7 @@ def __init__(self, model_config: ModelConfig): self.model_config = model_config self.logdir = self.model_config.output_dir('tensorboard', self.model_config.run_name) - def on_train_begin(self, training_info: TrainingInfo) -> None: + def on_train_begin(self, training_info: TrainingInfo, model: Model) -> None: """ Potentially cleanup previous runs """ if training_info.start_epoch_idx == 0: if os.path.exists(self.logdir): diff --git a/vel/storage/streaming/wandb.py b/vel/storage/streaming/wandb.py index 3db02c75..ed104fd3 100644 --- a/vel/storage/streaming/wandb.py +++ b/vel/storage/streaming/wandb.py @@ -1,33 +1,45 @@ import wandb +import yaml -from vel.api import ModelConfig, Callback, TrainingInfo +from vel.api import ModelConfig, Callback, TrainingInfo, Model class WandbStreaming(Callback): """ Stream live results from training to WandB """ - def __init__(self, model_config: ModelConfig, project: str): + def __init__(self, model_config: ModelConfig, register_model: bool = False, write_hyperparams: bool = True): self.model_config = model_config - self.project = project + self.project = self.model_config.provide('project_name') + self.register_model = register_model + self.write_hyperparams = write_hyperparams - def on_train_begin(self, training_info: TrainingInfo) -> None: + def on_train_begin(self, training_info: TrainingInfo, model: Model) -> None: wandb.init( job_type='train', project=self.project, - dir=self.model_config.output_dir('wandb'), + dir=self.model_config.model_output_dir('wandb'), group=self.model_config.name, name=self.model_config.run_name, resume=training_info.start_epoch_idx > 0, tags=[self.model_config.tag] if self.model_config.tag else [] ) + if self.register_model: + wandb.watch(model) + + if self.write_hyperparams: + path = self.model_config.model_output_dir('wandb', 'vel-config.yaml') + with open(path, 'wt') as fp: + yaml.dump(self.model_config.render_configuration(), fp) + wandb.save(path) + def on_epoch_end(self, epoch_info): """ Send data to wandb """ result = {k.format(): v for k, v in epoch_info.result.items()} wandb.log(row=result, step=epoch_info.global_epoch_idx) -def create(model_config, project: str = 'vel'): +def create(model_config, register_model: bool = False): """ Vel factory function """ - return WandbStreaming(model_config, project=project) + return WandbStreaming(model_config, register_model=register_model) From 81e742a9264549510b9ce73afdedc3227a1e0257 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 31 Oct 2019 13:17:28 -0700 Subject: [PATCH 142/162] Example how to run config from file. --- examples-scripts/run_config.py | 17 +++++++++++++++++ vel/api/model_config.py | 11 ++++++++++- vel/launcher.py | 5 +---- 3 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 examples-scripts/run_config.py diff --git a/examples-scripts/run_config.py b/examples-scripts/run_config.py new file mode 100644 index 00000000..18200680 --- /dev/null +++ b/examples-scripts/run_config.py @@ -0,0 +1,17 @@ +import os.path + + +from vel.api import ModelConfig + +project_dir = ModelConfig.find_project_directory(os.getcwd()) + +model_config = ModelConfig.from_file( + filename=os.path.join( + project_dir, 'examples-configs/generative-likelihood/mnist/mnist_cnn_iwae.yaml', + ), + run_number=2, +) + +model_config.set_seed() +model_config.banner('train') +model_config.run_command('train') diff --git a/vel/api/model_config.py b/vel/api/model_config.py index 7e091bed..a33dc9f7 100644 --- a/vel/api/model_config.py +++ b/vel/api/model_config.py @@ -197,8 +197,11 @@ def get_command(self, command_name): """ Return object for given command """ return self.provider.instantiate_from_data(self.command_descriptors[command_name]) - def run_command(self, command_name, varargs): + def run_command(self, command_name, varargs=None): """ Instantiate model class """ + if varargs is None: + varargs = [] + command_descriptor = self.get_command(command_name) return command_descriptor.run(*varargs) @@ -333,3 +336,9 @@ def load_trained_model(self): model.load_state_dict(model_state) return model + + def set_seed(self): + """ Set random seeds """ + # Set seed already in the launcher + from vel.util.random import set_seed + set_seed(self.seed) diff --git a/vel/launcher.py b/vel/launcher.py index 8b537415..cd3a08de 100644 --- a/vel/launcher.py +++ b/vel/launcher.py @@ -51,10 +51,7 @@ def main(): # This needs to be called before any of PyTorch module is imported multiprocessing.set_start_method(multiprocessing_setting) - # Set seed already in the launcher - from vel.util.random import set_seed - set_seed(model_config.seed) - + model_config.set_seed() model_config.banner(args.command) if args.profile: From 73edf972cc08b634fb8a66120913dc3018299f8e Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 2 Nov 2019 20:25:31 -0700 Subject: [PATCH 143/162] Reordering default velproject. --- .velproject.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.velproject.yaml b/.velproject.yaml index 87ba09d0..b9ba7d20 100644 --- a/.velproject.yaml +++ b/.velproject.yaml @@ -13,8 +13,8 @@ storage: streaming: - name: vel.storage.streaming.tensorboard -# - name: vel.storage.streaming.visdom - name: vel.storage.streaming.stdout +# - name: vel.storage.streaming.visdom # - name: vel.storage.streaming.wandb From c21d608b1128fb67a34fc5213d0830aacc461568 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 2 Nov 2019 20:28:11 -0700 Subject: [PATCH 144/162] Updated bibliography --- docs/Bibliography.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/Bibliography.md b/docs/Bibliography.md index 31cfef82..3453e484 100644 --- a/docs/Bibliography.md +++ b/docs/Bibliography.md @@ -14,6 +14,11 @@ on this library: - (Sep 2015) **Importance Weighted Autoencoders** Yuri Burda, Roger Grosse, Ruslan Salakhutdinov https://arxiv.org/abs/1509.00519 + + +- (Nov 2017) **Neural Discrete Representation Learning** + Aaron van den Oord, Oriol Vinyals, Koray Kavukcuoglu + https://arxiv.org/abs/1711.00937 ### Learning rate and optimization @@ -91,10 +96,19 @@ on this library: https://arxiv.org/abs/1707.06887 - (Oct 2017) **Rainbow: Combining Improvements in Deep Reinforcement Learning** - Matteo Hessel, Joseph Modayil, Hado van Hasselt, Tom Schaul, Georg Ostrovski, Will Dabney, Dan Horgan, Bilal Piot, Mohammad Azar, David Silver + Matteo Hessel, Joseph Modayil, Hado van Hasselt, Tom Schaul, Georg Ostrovski, Will Dabney, Dan Horgan, + Bilal Piot, Mohammad Azar, David Silver https://arxiv.org/abs/1710.02298 +### Generative adversarial networks + +- (Jun 2014) **Generative Adversarial Networks** + Ian J. Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, Aaron Courville, + Yoshua Bengio + https://arxiv.org/abs/1406.2661 + + ### Policy gradient methods From 2a8949a345006131f9c02fdec61f2bdfc01ab10c Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 2 Nov 2019 23:50:31 -0700 Subject: [PATCH 145/162] Aggregate metrics from optimizers in multi-optimzier setup. --- vel/api/optimizer.py | 17 +++++++++++++++-- vel/metric/base/base_metric.py | 5 +++++ vel/model/gan/simple_gan.py | 30 +++++++++++++++++++++++------- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/vel/api/optimizer.py b/vel/api/optimizer.py index e5d8a50c..4b926f53 100644 --- a/vel/api/optimizer.py +++ b/vel/api/optimizer.py @@ -178,7 +178,16 @@ def step(self, closure=None) -> dict: for name, optimizer in self.optimizers.items(): metrics = optimizer.step() - flatten_dict(metrics, output, name) + output[name] = metrics + + return output + + def aggregate_metrics(self, metrics) -> dict: + """ Aggregate metrics from multiple optimizers """ + output = {} + + for key, value in metrics.items(): + flatten_dict(value, output, key) return output @@ -195,7 +204,11 @@ def add_param_group(self, param_group: dict) -> None: def metrics(self) -> list: """ Set of metrics for this model """ # TODO(jerry): aggregate metrics - return [] + return [ + metric.prefix(name) + for name, optimizer in self.optimizers.items() + for metric in optimizer.metrics() + ] class OptimizerFactory: diff --git a/vel/metric/base/base_metric.py b/vel/metric/base/base_metric.py index 8f073247..93bcb7b2 100644 --- a/vel/metric/base/base_metric.py +++ b/vel/metric/base/base_metric.py @@ -42,6 +42,11 @@ def write_state_dict(self, training_info: TrainingInfo, hidden_state_dict: dict) """ Potentially store some metric state to the checkpoint """ pass + def prefix(self, prefix: str): + """ Prepend a prefix to the name of the metric """ + self.name = f"{prefix}.{self.name}" + return self + def load_state_dict(self, training_info: TrainingInfo, hidden_state_dict: dict) -> None: """ Potentially load some metric state from the checkpoint """ pass diff --git a/vel/model/gan/simple_gan.py b/vel/model/gan/simple_gan.py index a8f56044..eb22e09c 100644 --- a/vel/model/gan/simple_gan.py +++ b/vel/model/gan/simple_gan.py @@ -32,7 +32,7 @@ def block(in_feat, out_feat, normalize=True): *block(256, 512), *block(512, 1024), nn.Linear(1024, int(np.prod(img_shape))), - nn.Tanh() + nn.Sigmoid() ) def forward(self, z): @@ -114,7 +114,7 @@ def optimize(self, data: dict, optimizer: VelMultiOptimizer) -> dict: g_loss = self.adversarial_loss(self.discriminator(gen_imgs), valid) g_loss.backward() - optimizer_G.step() + g_metrics = optimizer_G.step() # --------------------- # Train Discriminator @@ -123,16 +123,28 @@ def optimize(self, data: dict, optimizer: VelMultiOptimizer) -> dict: optimizer_D.zero_grad() # Measure discriminator's ability to classify real from generated samples - real_loss = self.adversarial_loss(self.discriminator(input_data), valid) - fake_loss = self.adversarial_loss(self.discriminator(gen_imgs.detach()), fake) + input_data_prob = self.discriminator(input_data) + generated_images_prob = self.discriminator(gen_imgs.detach()) + + real_loss = self.adversarial_loss(input_data_prob, valid) + fake_loss = self.adversarial_loss(generated_images_prob, fake) + d_loss = (real_loss + fake_loss) / 2 d_loss.backward() - optimizer_D.step() + d_metrics = optimizer_D.step() + + optimizer_metrics = optimizer.aggregate_metrics({ + 'generator': g_metrics, + 'discriminator': d_metrics + }) return { + **optimizer_metrics, 'gen_loss': g_loss.item(), - 'disc_loss': d_loss.item() + 'disc_loss': d_loss.item(), + 'discriminator_real_accuracy': (input_data_prob > 0.5).float().mean().item(), + 'discriminator_fake_accuracy': (generated_images_prob < 0.5).float().mean().item(), } def validate(self, data: dict) -> dict: @@ -142,7 +154,9 @@ def validate(self, data: dict) -> dict: """ return { 'gen_loss': 0.0, - 'disc_loss': 0.0 + 'disc_loss': 0.0, + 'discriminator_real_accuracy': 0.0, + 'discriminator_fake_accuracy': 0.0, } def metrics(self): @@ -150,6 +164,8 @@ def metrics(self): return [ AveragingNamedMetric('gen_loss', scope="train"), AveragingNamedMetric('disc_loss', scope="train"), + AveragingNamedMetric('discriminator_real_accuracy', scope="train"), + AveragingNamedMetric('discriminator_fake_accuracy', scope="train"), ] From d09942fdbc97bbd0180687d3ee6664bd4d7b3ced Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sun, 3 Nov 2019 18:18:59 -0800 Subject: [PATCH 146/162] Add image metrics to tensorboard and somehow stabilize the MNIST GAN. --- .../mnist/mnist_gan.yaml | 6 ++- vel/api/info.py | 2 +- vel/metric/__init__.py | 1 + vel/metric/base/base_metric.py | 5 +++ vel/metric/base/image_metric.py | 40 +++++++++++++++++++ vel/model/gan/simple_gan.py | 22 ++++++++-- vel/storage/streaming/stdout.py | 2 + vel/storage/streaming/tensorboard.py | 26 ++++++++---- vel/storage/streaming/visdom.py | 3 +- vel/storage/streaming/wandb.py | 2 +- 10 files changed, 92 insertions(+), 17 deletions(-) create mode 100644 vel/metric/base/image_metric.py diff --git a/examples-configs/generative-adversarial/mnist/mnist_gan.yaml b/examples-configs/generative-adversarial/mnist/mnist_gan.yaml index 6a3ea519..5a52deae 100644 --- a/examples-configs/generative-adversarial/mnist/mnist_gan.yaml +++ b/examples-configs/generative-adversarial/mnist/mnist_gan.yaml @@ -41,7 +41,9 @@ loader: optimizer: name: vel.optimizer.radam lr: 2.0e-4 - eps: 1.0e-4 + eps: 1.0e-3 + betas: [0.5, 0.999] + max_grad_norm: 5.0 #scheduler: @@ -58,4 +60,4 @@ commands: train: name: vel.command.train_command - epochs: 200 + epochs: 400 diff --git a/vel/api/info.py b/vel/api/info.py index 7dd8e996..48d25883 100644 --- a/vel/api/info.py +++ b/vel/api/info.py @@ -122,7 +122,7 @@ def _reset_metrics(self): def value(self, dataset=None): """ Return current dictionary value of the metrics """ from vel.metric import MetricKey - return {MetricKey(m.name, m.scope, dataset): m.value() for m in self.metrics} + return {MetricKey(m.name, m.scope, dataset, m.metric_type()): m.value() for m in self.metrics} def intermediate_value(self, metric): """ Return an intermediate (inter-epoch) value of a metric """ diff --git a/vel/metric/__init__.py b/vel/metric/__init__.py index 29d9f310..244c8acd 100644 --- a/vel/metric/__init__.py +++ b/vel/metric/__init__.py @@ -3,3 +3,4 @@ AveragingMetric, AveragingNamedMetric, AveragingSupervisedMetric, DefaultAveragingNamedMetric # noqa ) from .base.value_metric import ValueMetric # noqa +from .base.image_metric import RandomImageMetric # noqa diff --git a/vel/metric/base/base_metric.py b/vel/metric/base/base_metric.py index 93bcb7b2..5ff1c10a 100644 --- a/vel/metric/base/base_metric.py +++ b/vel/metric/base/base_metric.py @@ -10,6 +10,7 @@ class MetricKey: name: str scope: str dataset: typing.Optional[str] = None + metric_type: str = 'scalar' def format(self): """ Format a metric key into a string """ @@ -50,3 +51,7 @@ def prefix(self, prefix: str): def load_state_dict(self, training_info: TrainingInfo, hidden_state_dict: dict) -> None: """ Potentially load some metric state from the checkpoint """ pass + + def metric_type(self) -> str: + """ Type of the metric """ + return 'scalar' diff --git a/vel/metric/base/image_metric.py b/vel/metric/base/image_metric.py new file mode 100644 index 00000000..c4ca40da --- /dev/null +++ b/vel/metric/base/image_metric.py @@ -0,0 +1,40 @@ +import numpy as np + +from .base_metric import BaseMetric + + +class ImageMetric(BaseMetric): + """ Metric that logs an image """ + + def metric_type(self) -> str: + return 'image' + + +class RandomImageMetric(ImageMetric): + """ Just pick a random image from the supplied list """ + + def __init__(self, name, scope="general"): + super().__init__(name, scope=scope) + + self.image = None + + def calculate(self, batch_info): + batch = batch_info[self.name] + + if batch is not None: + if len(batch.shape) > 3: + image = batch[np.random.choice(batch.shape[0])] + else: + image = batch + + if image.shape[2] == 1: + image = np.broadcast_to(image, shape=(image.shape[0], image.shape[1], 3)) + + self.image = image + + def reset(self): + self.image = None + + def value(self): + return self.image + diff --git a/vel/model/gan/simple_gan.py b/vel/model/gan/simple_gan.py index eb22e09c..310c41d1 100644 --- a/vel/model/gan/simple_gan.py +++ b/vel/model/gan/simple_gan.py @@ -9,7 +9,7 @@ from vel.api import OptimizedModel, ModuleFactory, OptimizerFactory from vel.api.optimizer import VelMultiOptimizer -from vel.metric import AveragingNamedMetric +from vel.metric import AveragingNamedMetric, RandomImageMetric class Generator(nn.Module): @@ -46,11 +46,18 @@ def __init__(self, img_shape, latent_dim): super(Discriminator, self).__init__() self.model = nn.Sequential( - nn.Linear(int(np.prod(img_shape)), 512), + # nn.Linear(int(np.prod(img_shape)), 512), + # nn.LeakyReLU(0.2, inplace=True), + # nn.Linear(512, 256), + # nn.LeakyReLU(0.2, inplace=True), + # nn.Linear(256, 1), + nn.Linear(int(np.prod(img_shape)), 256), nn.LeakyReLU(0.2, inplace=True), - nn.Linear(512, 256), + nn.Dropout(0.2), + nn.Linear(256, 128), nn.LeakyReLU(0.2, inplace=True), - nn.Linear(256, 1), + nn.Dropout(0.2), + nn.Linear(128, 1), nn.Sigmoid(), ) @@ -139,12 +146,17 @@ def optimize(self, data: dict, optimizer: VelMultiOptimizer) -> dict: 'discriminator': d_metrics }) + # Log images to see how we're doing + np_image = gen_imgs[0].detach().cpu().numpy() + np_image = np.transpose(np_image, (2, 1, 0)) + return { **optimizer_metrics, 'gen_loss': g_loss.item(), 'disc_loss': d_loss.item(), 'discriminator_real_accuracy': (input_data_prob > 0.5).float().mean().item(), 'discriminator_fake_accuracy': (generated_images_prob < 0.5).float().mean().item(), + 'generated_image': np_image } def validate(self, data: dict) -> dict: @@ -157,6 +169,7 @@ def validate(self, data: dict) -> dict: 'disc_loss': 0.0, 'discriminator_real_accuracy': 0.0, 'discriminator_fake_accuracy': 0.0, + 'generated_image': None } def metrics(self): @@ -166,6 +179,7 @@ def metrics(self): AveragingNamedMetric('disc_loss', scope="train"), AveragingNamedMetric('discriminator_real_accuracy', scope="train"), AveragingNamedMetric('discriminator_fake_accuracy', scope="train"), + RandomImageMetric('generated_image', scope='train') ] diff --git a/vel/storage/streaming/stdout.py b/vel/storage/streaming/stdout.py index 7dc18f8e..fc53f23f 100644 --- a/vel/storage/streaming/stdout.py +++ b/vel/storage/streaming/stdout.py @@ -31,11 +31,13 @@ def _print_metrics_line(metrics, dataset=None): metrics_list = [ "{}/{} {:.04f}".format(k.scope, k.name, metrics[k]) for k in sorted([k for k in metrics.keys() if k.dataset is None]) + if k.metric_type == 'scalar' ] else: metrics_list = [ "{}/{} {:.04f}".format(k.scope, k.name, metrics[k]) for k in sorted([k for k in metrics.keys() if k.dataset == dataset]) + if k.metric_type == 'scalar' ] print('{0: <10}'.format(dataset.capitalize()), " ".join(metrics_list)) diff --git a/vel/storage/streaming/tensorboard.py b/vel/storage/streaming/tensorboard.py index dde270a7..52462d90 100644 --- a/vel/storage/streaming/tensorboard.py +++ b/vel/storage/streaming/tensorboard.py @@ -8,9 +8,10 @@ class TensorboardStreaming(Callback): """ Stream results to tensorboard """ - def __init__(self, model_config: ModelConfig): + def __init__(self, model_config: ModelConfig, record_images=True): self.model_config = model_config self.logdir = self.model_config.output_dir('tensorboard', self.model_config.run_name) + self.record_images = record_images def on_train_begin(self, training_info: TrainingInfo, model: Model) -> None: """ Potentially cleanup previous runs """ @@ -32,15 +33,24 @@ def on_epoch_end(self, epoch_info: EpochInfo): if key.dataset == head: tag = '{}/{}'.format(key.scope, key.name) - summary_writer.add_scalar( - tag=tag, - scalar_value=value, - global_step=epoch_info.global_epoch_idx, - ) + if key.metric_type == 'scalar': + summary_writer.add_scalar( + tag=tag, + scalar_value=value, + global_step=epoch_info.global_epoch_idx, + ) + elif key.metric_type == 'image' and self.record_images: + if value is not None: + summary_writer.add_image( + tag=tag, + img_tensor=value, + global_step=epoch_info.global_epoch_idx, + dataformats='WHC' + ) summary_writer.close() -def create(model_config): +def create(model_config, record_images=True): """ Vel factory function """ - return TensorboardStreaming(model_config) + return TensorboardStreaming(model_config, record_images=record_images) diff --git a/vel/storage/streaming/visdom.py b/vel/storage/streaming/visdom.py index b0c31277..c7b95f29 100644 --- a/vel/storage/streaming/visdom.py +++ b/vel/storage/streaming/visdom.py @@ -21,7 +21,8 @@ def __init__(self, model_config: ModelConfig, visdom_settings: VisdomSettings): def on_epoch_end(self, epoch_info): """ Update data in visdom on push """ - metrics_df = pd.DataFrame([epoch_info.result]).set_index('epoch_idx') + result = {k.format(): v for k, v in epoch_info.result.items() if k.metric_type == 'scalar'} + metrics_df = pd.DataFrame([result], index=[epoch_info.global_epoch_idx]) visdom_append_metrics( self.vis, diff --git a/vel/storage/streaming/wandb.py b/vel/storage/streaming/wandb.py index ed104fd3..a0c11d9c 100644 --- a/vel/storage/streaming/wandb.py +++ b/vel/storage/streaming/wandb.py @@ -36,7 +36,7 @@ def on_train_begin(self, training_info: TrainingInfo, model: Model) -> None: def on_epoch_end(self, epoch_info): """ Send data to wandb """ - result = {k.format(): v for k, v in epoch_info.result.items()} + result = {k.format(): v for k, v in epoch_info.result.items() if k.metric_type == 'scalar'} wandb.log(row=result, step=epoch_info.global_epoch_idx) From bda0c39d09707d538712bf58a9729f9f9e758395 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 9 Nov 2019 11:06:52 -0800 Subject: [PATCH 147/162] Updated VAE configs. --- .../generative-likelihood/mnist/mnist_cnn_iwae.yaml | 9 +++++++-- .../generative-likelihood/mnist/mnist_cnn_vae.yaml | 12 ++++++------ .../generative-likelihood/mnist/mnist_fc_iwae.yaml | 11 ++++++++--- .../generative-likelihood/mnist/mnist_fc_vae.yaml | 4 ++-- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/examples-configs/generative-likelihood/mnist/mnist_cnn_iwae.yaml b/examples-configs/generative-likelihood/mnist/mnist_cnn_iwae.yaml index fc3d39fd..735f2b7a 100644 --- a/examples-configs/generative-likelihood/mnist/mnist_cnn_iwae.yaml +++ b/examples-configs/generative-likelihood/mnist/mnist_cnn_iwae.yaml @@ -33,7 +33,7 @@ optimizer: name: vel.optimizer.radam lr: 1.0e-3 eps: 1.0e-4 - max_grad_norm: 1.0 + max_grad_norm: 10.0 scheduler: @@ -45,4 +45,9 @@ scheduler: commands: train: name: vel.command.train_command - epochs: 3280 \ No newline at end of file + epochs: 3280 + + nll: + name: vel.command.latent.vae_nll + max_batch: 1_000 + samples: !param samples = 10 diff --git a/examples-configs/generative-likelihood/mnist/mnist_cnn_vae.yaml b/examples-configs/generative-likelihood/mnist/mnist_cnn_vae.yaml index 56340d6b..dc3b20b0 100644 --- a/examples-configs/generative-likelihood/mnist/mnist_cnn_vae.yaml +++ b/examples-configs/generative-likelihood/mnist/mnist_cnn_vae.yaml @@ -45,7 +45,7 @@ optimizer: name: vel.optimizer.radam lr: 1.0e-3 eps: 1.0e-4 - max_grad_norm: 1.0 + max_grad_norm: 10.0 scheduler: @@ -55,11 +55,11 @@ scheduler: commands: - augvis: - name: vel.command.augvis_command - samples: 10 - cases: 5 - train: name: vel.command.train_command epochs: 3280 + + nll: + name: vel.command.latent.vae_nll + max_batch: 1_000 + samples: !param samples = 10 diff --git a/examples-configs/generative-likelihood/mnist/mnist_fc_iwae.yaml b/examples-configs/generative-likelihood/mnist/mnist_fc_iwae.yaml index 48fa116f..83ea2399 100644 --- a/examples-configs/generative-likelihood/mnist/mnist_fc_iwae.yaml +++ b/examples-configs/generative-likelihood/mnist/mnist_fc_iwae.yaml @@ -32,8 +32,8 @@ loader: optimizer: name: vel.optimizer.radam lr: 1.0e-3 - eps: 1.0e- - max_grad_norm: 1.0 + eps: 1.0e-4 + max_grad_norm: 10.0 @@ -46,4 +46,9 @@ scheduler: commands: train: name: vel.command.train_command - epochs: 3280 \ No newline at end of file + epochs: 3280 + + nll: + name: vel.command.latent.vae_nll + max_batch: 10_000 + samples: !param samples = 10 diff --git a/examples-configs/generative-likelihood/mnist/mnist_fc_vae.yaml b/examples-configs/generative-likelihood/mnist/mnist_fc_vae.yaml index b9910753..56a3991d 100644 --- a/examples-configs/generative-likelihood/mnist/mnist_fc_vae.yaml +++ b/examples-configs/generative-likelihood/mnist/mnist_fc_vae.yaml @@ -31,8 +31,8 @@ loader: optimizer: name: vel.optimizer.radam lr: 1.0e-3 - eps: 1.0e-3 -# max_grad_norm: 10.0 + eps: 1.0e-4 + max_grad_norm: 10.0 scheduler: From cb11b38516ba0ad1b908da8eac1447f3b63cb1cf Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 9 Nov 2019 11:31:54 -0800 Subject: [PATCH 148/162] VAE benchmarks. --- docs/Benchmarks.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/Benchmarks.md b/docs/Benchmarks.md index 274224fa..c6555e69 100644 --- a/docs/Benchmarks.md +++ b/docs/Benchmarks.md @@ -17,10 +17,9 @@ Each metric I'll try to average over six runs and provide mean and standard devi For VAE models, I'll include upper bound for Negative Log Likelihood (NLL) for given number of importance samples (IS). -| Model | NLL (IS=1) | NLL (IS=100) | NLL (IS=5000) | -| ----- | ---------- | ------------ | ------------- | -| FC VAE | 90.98 ± 0.14 | 87.07 ± 0.18 | 86.93 ± 0.18 | -| CNN VAE | -| FC IWAE | -| CNN IWAE | - +| Model | NLL (IS=1) |NLL (IS=100)|NLL (IS=5000)| +|-------------:|------------:|-----------:|------------:| +| FC VAE| 90.85 ± 0.20|87.00 ± 0.28| 86.83 ± 0.26| +|FC IWAE (k=50)|100.53 ± 0.62|82.41 ± 0.05| 80.73 ± 0.09| +| CNN VAE| 86.47 ± 0.11|81.33 ± 0.05| 81.02 ± 0.05| +|CNN IWAE (k=5)| 88.44 ± 0.25|78.78 ± 0.05| 77.77 ± 0.06| From b1524bbc4605d50ab302e9e1d5648ccd151d0179 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 9 Nov 2019 12:05:26 -0800 Subject: [PATCH 149/162] Update to the WANDB stream. --- vel/storage/streaming/wandb.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/vel/storage/streaming/wandb.py b/vel/storage/streaming/wandb.py index a0c11d9c..78ea775c 100644 --- a/vel/storage/streaming/wandb.py +++ b/vel/storage/streaming/wandb.py @@ -8,11 +8,13 @@ class WandbStreaming(Callback): """ Stream live results from training to WandB """ - def __init__(self, model_config: ModelConfig, register_model: bool = False, write_hyperparams: bool = True): + def __init__(self, model_config: ModelConfig, register_model: bool = False, write_hyperparams: bool = True, + wandb_config=None): self.model_config = model_config self.project = self.model_config.provide('project_name') self.register_model = register_model self.write_hyperparams = write_hyperparams + self.wandb_config = {} if wandb_config is None else wandb_config def on_train_begin(self, training_info: TrainingInfo, model: Model) -> None: wandb.init( @@ -20,6 +22,7 @@ def on_train_begin(self, training_info: TrainingInfo, model: Model) -> None: project=self.project, dir=self.model_config.model_output_dir('wandb'), group=self.model_config.name, + config=self.wandb_config, name=self.model_config.run_name, resume=training_info.start_epoch_idx > 0, tags=[self.model_config.tag] if self.model_config.tag else [] @@ -40,6 +43,6 @@ def on_epoch_end(self, epoch_info): wandb.log(row=result, step=epoch_info.global_epoch_idx) -def create(model_config, register_model: bool = False): +def create(model_config, register_model: bool = False, wandb_config=None): """ Vel factory function """ - return WandbStreaming(model_config, register_model=register_model) + return WandbStreaming(model_config, register_model=register_model, wandb_config=wandb_config) From 4636bba3f5e95afbd608107466166d80b3a5be85 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 9 Nov 2019 12:05:36 -0800 Subject: [PATCH 150/162] Fixed evaluation command. --- vel/rl/command/evaluate_env_command.py | 39 ++++++++------------------ 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/vel/rl/command/evaluate_env_command.py b/vel/rl/command/evaluate_env_command.py index 0a27aec6..9596e109 100644 --- a/vel/rl/command/evaluate_env_command.py +++ b/vel/rl/command/evaluate_env_command.py @@ -2,24 +2,22 @@ import pandas as pd import torch import tqdm -import typing from vel.api import ModelConfig, TrainingInfo, Storage, ModuleFactory from vel.rl.api import VecEnvFactory +from vel.rl.util.actor import PolicyActor class EvaluateEnvCommand: """ Record environment playthrough as a game """ def __init__(self, model_config: ModelConfig, env_factory: VecEnvFactory, model_factory: ModuleFactory, - storage: Storage, parallel_envs: int, action_noise: typing.Optional[ModuleFactory], takes: int, - sample_args: dict = None): + storage: Storage, parallel_envs: int, takes: int, sample_args: dict = None): self.model_config = model_config self.model_factory = model_factory self.env_factory = env_factory self.storage = storage self.takes = takes self.parallel_envs = parallel_envs - self.action_noise_factory = action_noise self.sample_args = sample_args if sample_args is not None else {} @@ -30,12 +28,9 @@ def run(self): env = self.env_factory.instantiate( parallel_envs=self.parallel_envs, preset='record', seed=self.model_config.seed ) - model = self.model_factory.instantiate(action_space=env.action_space).to(device) - - if self.action_noise_factory is not None: - action_noise = self.action_noise_factory.instantiate(environment=env).to(device) - else: - action_noise = None + model = self.model_factory.instantiate( + action_space=env.action_space, observation_space=env.observation_space + ).to(device) training_info = TrainingInfo( start_epoch_idx=self.storage.last_epoch_idx() @@ -48,26 +43,17 @@ def run(self): model.eval() + actor = PolicyActor(num_envs=self.parallel_envs, policy=model, device=device) + episode_rewards = [] episode_lengths = [] observations = env.reset() observations_tensor = torch.from_numpy(observations).to(device) - if model.is_stateful: - hidden_state = model.zero_state(observations.shape[0]).to(device) - with tqdm.tqdm(total=self.takes) as progress_bar: while len(episode_rewards) < self.takes: - if model.is_stateful: - output = model.step(observations_tensor, hidden_state, **self.sample_args) - hidden_state = output['state'] - actions = output['actions'] - else: - actions = model.step(observations_tensor, **self.sample_args)['actions'] - - if action_noise is not None: - actions = action_noise(actions) + actions = actor.act(observations_tensor, **self.sample_args)['actions'] observations, rewards, dones, infos = env.step(actions.cpu().numpy()) observations_tensor = torch.from_numpy(observations).to(device) @@ -78,22 +64,19 @@ def run(self): episode_lengths.append(info['episode']['l']) progress_bar.update(1) - if model.is_stateful: - # Zero state belongiong to finished episodes - dones_tensor = torch.from_numpy(dones.astype(np.float32)).to(device) - hidden_state = hidden_state * (1.0 - dones_tensor.unsqueeze(-1)) + dones_tensor = torch.from_numpy(dones.astype(np.float32)).to(device) + actor.reset_states(dones_tensor) print(pd.DataFrame({'lengths': episode_lengths, 'rewards': episode_rewards}).describe()) -def create(model_config, model, vec_env, storage, takes, parallel_envs, action_noise=None, sample_args=None): +def create(model_config, model, vec_env, storage, takes, parallel_envs, sample_args=None): """ Vel factory function """ return EvaluateEnvCommand( model_config=model_config, model_factory=model, env_factory=vec_env, parallel_envs=parallel_envs, - action_noise=action_noise, storage=storage, takes=takes, sample_args=sample_args From 37e1dea2d4d0683669a9451b0cdee9c8c8c1ce6c Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 9 Nov 2019 15:13:55 -0800 Subject: [PATCH 151/162] Make ACER work again. --- vel/rl/api/policy.py | 4 ++++ vel/rl/env_roller/step_env_roller.py | 3 --- vel/rl/env_roller/trajectory_replay_env_roller.py | 7 ++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/vel/rl/api/policy.py b/vel/rl/api/policy.py index d9215180..b69813f0 100644 --- a/vel/rl/api/policy.py +++ b/vel/rl/api/policy.py @@ -71,3 +71,7 @@ def reset_state(self, state, dones): def action(self, observation, state=None, deterministic=False): """ Return policy action for given observation """ return self.act(observation, state=state, deterministic=deterministic)['actions'] + + def value(self, observation, state=None, deterministic=False): + """ Return policy action for given observation """ + return self.act(observation, state=state, deterministic=deterministic)['values'] diff --git a/vel/rl/env_roller/step_env_roller.py b/vel/rl/env_roller/step_env_roller.py index 44782283..cd8b5cd1 100644 --- a/vel/rl/env_roller/step_env_roller.py +++ b/vel/rl/env_roller/step_env_roller.py @@ -72,9 +72,6 @@ def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: flatten_dict(cpu_final_obs, rollout_tensors, root='final') - # for key, value in final_obs.items(): - # rollout_tensors[f"final_{key}"] = value.cpu() - return Trajectories( num_steps=accumulated_tensors['observations'].size(0), num_envs=accumulated_tensors['observations'].size(1), diff --git a/vel/rl/env_roller/trajectory_replay_env_roller.py b/vel/rl/env_roller/trajectory_replay_env_roller.py index 1a788025..be516637 100644 --- a/vel/rl/env_roller/trajectory_replay_env_roller.py +++ b/vel/rl/env_roller/trajectory_replay_env_roller.py @@ -7,7 +7,8 @@ Trajectories, Rollout, ReplayEnvRollerBase, ReplayEnvRollerFactoryBase, ReplayBuffer, ReplayBufferFactory, RlPolicy ) from vel.rl.util.actor import PolicyActor -from vel.util.tensor_util import TensorAccumulator +from vel.util.tensor_util import TensorAccumulator, to_device +from vel.util.datastructure import flatten_dict class TrajectoryReplayEnvRoller(ReplayEnvRollerBase): @@ -85,11 +86,11 @@ def rollout(self, batch_info: BatchInfo, number_of_steps: int) -> Rollout: accumulated_tensors = accumulator.result() final_obs = self.actor.act(self.last_observation.to(self.device), advance_state=False) + cpu_final_obs = to_device(final_obs, torch.device('cpu')) rollout_tensors = {} - for key, value in final_obs.items(): - rollout_tensors[f"final_{key}"] = value.cpu() + flatten_dict(cpu_final_obs, rollout_tensors, root='final') return Trajectories( num_steps=accumulated_tensors['observations'].size(0), From c85205739fef3988de627e6c4f4552abb3295d76 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 9 Nov 2019 16:25:10 -0800 Subject: [PATCH 152/162] Fixing rainbow. --- vel/net/modular.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vel/net/modular.py b/vel/net/modular.py index ca9efecc..dc2ebda0 100644 --- a/vel/net/modular.py +++ b/vel/net/modular.py @@ -84,8 +84,7 @@ def reset_state(self, state, dones): """ Reset the state after the episode has been terminated """ raise NotImplementedError - def forward(self, input_data, state=None): - context = {} + def forward(self, input_data, state=None, context: dict = None): return self.layers(input_data, state=None, context=context) def grouped_parameters(self): From 8277f60a06beddd1e86b34487a61208faee7c786 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 9 Nov 2019 16:56:32 -0800 Subject: [PATCH 153/162] Vel Research - Rubik's cube project --- requirements.txt | 4 +- vel/module/input/flatten.py | 14 ------- vel/net/layer/nlp/alphabet_one_hot_encode.py | 16 ++++++-- vel/net/layer/util/flatten.py | 39 ++++++++++++++++++++ 4 files changed, 53 insertions(+), 20 deletions(-) delete mode 100644 vel/module/input/flatten.py create mode 100644 vel/net/layer/util/flatten.py diff --git a/requirements.txt b/requirements.txt index 5776b12b..2ff5c561 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile # To update, run: # -# pip-compile +# pip-compile requirements.in # atari-py==0.2.6 # via gym atomicwrites==1.3.0 # via pytest @@ -55,4 +55,4 @@ websocket-client==0.56.0 # via visdom zipp==0.6.0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -# setuptools==41.4.0 # via kiwisolver +# setuptools==41.6.0 # via kiwisolver diff --git a/vel/module/input/flatten.py b/vel/module/input/flatten.py deleted file mode 100644 index 7a1e5246..00000000 --- a/vel/module/input/flatten.py +++ /dev/null @@ -1,14 +0,0 @@ -from vel.module.layers import Flatten - - -from vel.api import VModule - - -class FlattenInput(VModule): - """ Sequence input """ - def __init__(self): - super().__init__() - self.model = Flatten() - - def forward(self, input_data): - return self.model(input_data) diff --git a/vel/net/layer/nlp/alphabet_one_hot_encode.py b/vel/net/layer/nlp/alphabet_one_hot_encode.py index 1198369b..a1f27f7f 100644 --- a/vel/net/layer/nlp/alphabet_one_hot_encode.py +++ b/vel/net/layer/nlp/alphabet_one_hot_encode.py @@ -23,7 +23,11 @@ def size_hints(self) -> SizeHints: class AlphabetOneHotEncodeLayerFactory(LayerFactory): - """ Factory class for the AlphabetoneHotEncode layer """ + """ Factory class for the AlphabetOneHotEncode layer """ + + def __init__(self, alphabet_size): + super().__init__() + self.alphabet_size = alphabet_size @property def name_base(self) -> str: @@ -32,7 +36,11 @@ def name_base(self) -> str: def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: """ Create a given layer object """ - alphabet_size = extra_args['alphabet_size'] + if 'alphabet_size' in extra_args: + alphabet_size = extra_args['alphabet_size'] + else: + alphabet_size = self.alphabet_size + return AlphabetOneHotEncodeLayer( info=self.make_info(context), alphabet_size=alphabet_size, @@ -40,6 +48,6 @@ def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, ext ) -def create(label=None, group=None): +def create(alphabet_size=None, label=None, group=None): """ Vel factory function """ - return AlphabetOneHotEncodeLayerFactory().with_given_name(label).with_given_group(group) + return AlphabetOneHotEncodeLayerFactory(alphabet_size=alphabet_size).with_given_name(label).with_given_group(group) diff --git a/vel/net/layer/util/flatten.py b/vel/net/layer/util/flatten.py new file mode 100644 index 00000000..3b4a9cfd --- /dev/null +++ b/vel/net/layer/util/flatten.py @@ -0,0 +1,39 @@ +import numpy as np + +from vel.api import SizeHints, SizeHint +from vel.net.layer_base import LayerFactory, Layer, LayerFactoryContext, LayerInfo + + +class Flatten(Layer): + """ Flatten single tensor to a unit shape """ + + def __init__(self, info: LayerInfo, size_hint: SizeHint): + super().__init__(info) + + self._size_hints = SizeHints(SizeHint(None, np.prod(size_hint[1:]))) + + def forward(self, direct, state: dict = None, context: dict = None): + return direct.view(direct.size(0), -1) + + def size_hints(self) -> SizeHints: + return self._size_hints + + +class FlattenFactory(LayerFactory): + """ Factory for Concat Layer """ + @property + def name_base(self) -> str: + """ Base of layer name """ + return "flatten" + + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: + """ Create a given layer object """ + return Flatten( + info=self.make_info(context), + size_hint=direct_input.assert_single() + ) + + +def create(label=None, group=None): + """ Vel factory function """ + return FlattenFactory().with_given_name(label).with_given_group(group) From 13f86fe0d1e5340b8e5e54d59e37bc49d5fdac17 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Sat, 9 Nov 2019 17:14:05 -0800 Subject: [PATCH 154/162] Dependencies update. --- Makefile | 5 +---- requirements.in | 2 +- requirements.txt | 28 ++++++++++++++-------------- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 9725ace3..87c23a87 100644 --- a/Makefile +++ b/Makefile @@ -37,10 +37,7 @@ test: pytest . requirements.txt: requirements.in - pip-compile requirements.in - -requpgrade: - pip-compile --upgrade + pip-compile --upgrade requirements.in lint: flake8 vel diff --git a/requirements.in b/requirements.in index 416b5ede..979d9ef7 100644 --- a/requirements.in +++ b/requirements.in @@ -12,6 +12,6 @@ pyyaml scikit-learn torchtext torchvision -torch~=1.3 +torch~=1.3.1 tqdm visdom diff --git a/requirements.txt b/requirements.txt index 2ff5c561..2447817b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,15 +6,15 @@ # atari-py==0.2.6 # via gym atomicwrites==1.3.0 # via pytest -attrs==19.2.0 +attrs==19.3.0 box2d-py==2.3.8 # via gym certifi==2019.9.11 # via requests chardet==3.0.4 # via requests cloudpickle==1.2.2 cycler==0.10.0 # via matplotlib dnspython==1.16.0 -future==0.18.0 # via pyglet -gym[atari,box2d,classic_control]==0.15.3 +future==0.18.2 # via pyglet +gym[atari,box2d,classic_control]==0.15.4 idna==2.8 # via requests importlib-metadata==0.23 # via pluggy, pytest joblib==0.14.0 # via scikit-learn @@ -23,31 +23,31 @@ jsonpointer==2.0 # via jsonpatch kiwisolver==1.1.0 # via matplotlib matplotlib==3.1.1 more-itertools==7.2.0 # via pytest, zipp -numpy==1.17.2 +numpy==1.17.3 opencv-python==4.1.1.26 packaging==19.2 # via pytest -pandas==0.25.1 -pillow==6.2.0 # via gym, torchvision, visdom +pandas==0.25.3 +pillow==6.2.1 # via gym, torchvision, visdom pluggy==0.13.0 # via pytest py==1.8.0 # via pytest pyglet==1.3.2 # via gym pymongo==3.9.0 -pyparsing==2.4.2 # via matplotlib, packaging -pytest==5.2.1 -python-dateutil==2.8.0 # via matplotlib, pandas +pyparsing==2.4.4 # via matplotlib, packaging +pytest==5.2.2 +python-dateutil==2.8.1 # via matplotlib, pandas pytz==2019.3 # via pandas pyyaml==5.1.2 pyzmq==18.1.0 # via visdom requests==2.22.0 # via torchtext, visdom scikit-learn==0.21.3 -scipy==1.3.1 # via gym, scikit-learn, visdom -six==1.12.0 # via atari-py, cycler, gym, packaging, python-dateutil, torchtext, torchvision, visdom, websocket-client -torch==1.3.0 +scipy==1.3.2 # via gym, scikit-learn, visdom +six==1.13.0 # via atari-py, cycler, gym, packaging, python-dateutil, torchtext, torchvision, visdom, websocket-client +torch==1.3.1 torchfile==0.1.0 # via visdom torchtext==0.4.0 -torchvision==0.4.1 +torchvision==0.4.2 tornado==6.0.3 # via visdom -tqdm==4.36.1 +tqdm==4.38.0 urllib3==1.25.6 # via requests visdom==0.1.8.9 wcwidth==0.1.7 # via pytest From 4e94cc511f0831674c74c569339895651a20ae51 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Sun, 10 Nov 2019 22:26:21 -0800 Subject: [PATCH 155/162] Implemented skip-connection layer. --- vel/net/layer/arch/skip_connection.py | 81 ++++++++++++++++++++++++++ vel/net/modular.py | 83 +++++++++++++++++---------- vel/net/sequence.py | 76 ------------------------ 3 files changed, 134 insertions(+), 106 deletions(-) create mode 100644 vel/net/layer/arch/skip_connection.py delete mode 100644 vel/net/sequence.py diff --git a/vel/net/layer/arch/skip_connection.py b/vel/net/layer/arch/skip_connection.py new file mode 100644 index 00000000..f627664b --- /dev/null +++ b/vel/net/layer/arch/skip_connection.py @@ -0,0 +1,81 @@ +import collections + +from vel.api import SizeHints, SizeHint +from vel.net.layer_base import LayerFactory, Layer, LayerInfo, LayerFactoryContext +from vel.net.modular import LayerList + + +class SkipConnectionLayer(Layer): + """ Container around a skip connection """ + + def __init__(self, info: LayerInfo, layers: [Layer], size_hint: SizeHint): + super().__init__(info) + + self.layers = LayerList(layers) + self._size_hints = SizeHints(size_hint) + + @property + def is_stateful(self) -> bool: + return self.layers.is_stateful + + def zero_state(self, batch_size): + return self.layers.zero_state(batch_size) + + def size_hints(self) -> SizeHints: + """ Size hints for this network """ + return self._size_hints + + def forward(self, direct, state: dict = None, context: dict = None): + """ Forward propagation of a single layer """ + if self.is_stateful: + result, out_state = self.layers(direct, state=state, context=context) + return direct + result, out_state + else: + result = self.layers(direct, state=state, context=context) + return direct + result + + +class SkipConnectionLayerFactory(LayerFactory): + """ Factory for skip connection layers """ + + def __init__(self, layers: [LayerFactory]): + super().__init__() + self.layers = layers + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "skip_connection" + + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: + """ Create a given layer object """ + size_hint = direct_input.assert_single() + + layers = collections.OrderedDict() + + info = self.make_info(context) + + for idx, layer_factory in enumerate(self.layers): + counter = idx + 1 + + child_context = LayerFactoryContext( + idx=counter, + parent_group=info.group, + parent_name=info.name, + data=context.data + ) + + layer = layer_factory.instantiate( + direct_input=SizeHints(size_hint), + context=child_context, + extra_args=extra_args + ) + + layers[layer.name] = layer + + return SkipConnectionLayer(info, layers=layers, size_hint=size_hint) + + +def create(layers: [LayerFactory], label=None, group=None): + """ Vel factory function """ + return SkipConnectionLayerFactory(layers=layers).with_given_name(label).with_given_group(group) diff --git a/vel/net/modular.py b/vel/net/modular.py index dc2ebda0..a4e548c8 100644 --- a/vel/net/modular.py +++ b/vel/net/modular.py @@ -8,7 +8,7 @@ from .layer_base import LayerFactory, LayerFactoryContext -class ModularSequential(nn.Module): +class LayerList(BackboneModule): """ Modification of nn.Sequential for the purpose of modular networks """ def __init__(self, layers: collections.OrderedDict): super().__init__() @@ -19,6 +19,32 @@ def __init__(self, layers: collections.OrderedDict): self.add_module(key, module) self._layers.append(module) + self._is_stateful = any(l.is_stateful for l in self._layers) + + def reset_weights(self): + for l in self._layers: + l.reset_weights() + + def size_hints(self) -> SizeHints: + return self._layers[-1].size_hints() + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return self._is_stateful + + def zero_state(self, batch_size): + """ Potential state for the model """ + zero_state = {} + + for l in self._layers: + if l.is_stateful: + layer_zero_state = l.zero_state(batch_size) + if layer_zero_state is not None: + zero_state.update(layer_zero_state) + + return zero_state + def __len__(self): return len(self._layers) @@ -26,9 +52,22 @@ def __getitem__(self, item): return self._layers[item] def forward(self, direct, state: dict = None, context: dict = None): - for layer in self._layers: - direct = layer(direct, state=state, context=context) - return direct + if not self._is_stateful: + for layer in self._layers: + direct = layer(direct, state=state, context=context) + return direct + else: + data = direct + output_state = {} + + for layer in self._layers: + if layer.is_stateful: + data, new_state = layer(data, state=state, context=context) + output_state.update(new_state) + else: + data = layer(data, state=state, context=context) + + return data, output_state def instantiate_layers(layers: [LayerFactory], group: str, size_hint: SizeHints, extra_args: dict) -> nn.Module: @@ -51,22 +90,21 @@ def instantiate_layers(layers: [LayerFactory], group: str, size_hint: SizeHints, module_dict[layer.name] = layer - return ModularSequential(module_dict) + return LayerList(module_dict) class ModularNetwork(BackboneModule): """ Network that is built from layers """ - def __init__(self, layers: nn.Module): + def __init__(self, layers: LayerList): super().__init__() self.layers = layers - assert not any(l.is_stateful for l in self.layers), "Does not support stateful layers" + assert not self.layers.is_stateful def reset_weights(self): """ Call proper initializers for the weights """ - for l in self.layers: - l.reset_weights() + self.layers.reset_weights() @property def is_stateful(self) -> bool: @@ -74,7 +112,7 @@ def is_stateful(self) -> bool: return False def size_hints(self) -> SizeHints: - return self.layers[-1].size_hints() + return self.layers.size_hints() def zero_state(self, batch_size): """ Potential state for the model """ @@ -95,15 +133,14 @@ def grouped_parameters(self): class StatefulModularNetwork(BackboneModule): """ Modular network handling the state between the episodes """ - def __init__(self, layers: nn.Module): + def __init__(self, layers: LayerList): super().__init__() self.layers = layers def reset_weights(self): """ Call proper initializers for the weights """ - for l in self.layers: - l.reset_weights() + self.layers.reset_weights() @property def is_stateful(self) -> bool: @@ -111,18 +148,11 @@ def is_stateful(self) -> bool: return True def size_hints(self) -> SizeHints: - return self.layers[-1].size_hints() + return self.layers.size_hints() def zero_state(self, batch_size): """ Potential state for the model """ - zero_state = {} - - for l in self.layers: - layer_zero_state = l.zero_state(batch_size) - if layer_zero_state is not None: - zero_state.update(layer_zero_state) - - return zero_state + return self.layers.zero_state(batch_size) def reset_state(self, state, dones): """ Reset the state after the episode has been terminated """ @@ -130,20 +160,13 @@ def reset_state(self, state, dones): def forward(self, input_data, state=None): data = input_data - context = {} - output_state = {} if state is None: # input_data.device here may break. Should be fixed at some point state = to_device(self.zero_state(input_data.size(0)), input_data.device) - for layer in self.layers: - if layer.is_stateful: - data, new_state = layer(data, state=state, context=context) - output_state.update(new_state) - else: - data = layer(data, state=state, context=context) + data, output_state = self.layers(data, state=state, context=context) return data, output_state diff --git a/vel/net/sequence.py b/vel/net/sequence.py deleted file mode 100644 index f759051c..00000000 --- a/vel/net/sequence.py +++ /dev/null @@ -1,76 +0,0 @@ -import collections -import typing - -from vel.api import BackboneModule, SizeHints -from vel.exception import VelException -from vel.util.tensor_util import to_device - - -class GenericModularSequential(BackboneModule): - """ Modification of nn.Sequential for the purpose of modular networks """ - - def __init__(self, layers: typing.Union[collections.OrderedDict, collections.Sequence]): - super().__init__() - self._layers = [] - - if isinstance(layers, collections.OrderedDict): - for key, module in layers.items(): - self.add_module(key, module) - self._layers.append(module) - elif isinstance(layers, collections.Sequence): - for idx, module in enumerate(layers): - key = str(idx) - self.add_module(key, module) - self._layers.append(module) - else: - raise VelException("Incorrectly specified layers, must be a sequence or an ordered dict") - - self._is_stateful = any(l.is_stateful() for l in self._layers) - - def size_hints(self) -> SizeHints: - return self._layers[-1].size_hints() - - @property - def is_stateful(self) -> bool: - """ If the model has a state that needs to be fed between individual observations """ - return self._is_stateful - - def zero_state(self, batch_size): - """ Potential state for the model """ - zero_state = {} - - for l in self.layers: - layer_zero_state = l.zero_state(batch_size) - if layer_zero_state is not None: - zero_state.update(layer_zero_state) - - return zero_state - - def __len__(self): - return len(self._layers) - - def __getitem__(self, item): - return self._layers[item] - - def forward(self, direct, state: dict = None, context: dict = None): - if not self.is_stateful: - for layer in self._layers: - direct = layer(direct, state=state, context=context) - return direct - else: - output_state = {} - - if state is None: - # direct.device here may break. Should be fixed at some point - state = to_device(self.zero_state(direct.size(0)), direct.device) - - data = direct - - for layer in self.layers: - if layer.is_stateful: - data, new_state = layer(data, state=state, context=context) - output_state.update(new_state) - else: - data = layer(data, state=state, context=context) - - return data, output_state From 965903c1829c8ab9d4219b4c839243abe5a0a85a Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Mon, 11 Nov 2019 09:11:19 -0800 Subject: [PATCH 156/162] Fixing an error in TRPO config. --- examples-configs/rl/atari/atari_trpo.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples-configs/rl/atari/atari_trpo.yaml b/examples-configs/rl/atari/atari_trpo.yaml index adc7850a..2fad825b 100644 --- a/examples-configs/rl/atari/atari_trpo.yaml +++ b/examples-configs/rl/atari/atari_trpo.yaml @@ -20,7 +20,7 @@ model: improvement_acceptance_ratio: 0.1 cg_damping: 0.001 vf_iters: 3 - entropy_coefficient: 0.1 + entropy_coefficient: 0.01 discount_factor: 0.99 gae_lambda: 1.00 # Generalized Advantage Estimator Lambda parameter From e2d78146d159169c09903fd5003a31a0995e70fb Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 14 Nov 2019 13:35:45 -0800 Subject: [PATCH 157/162] Improved summary command --- vel/api/model.py | 3 +++ vel/rl/command/rl_summary_command.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 vel/rl/command/rl_summary_command.py diff --git a/vel/api/model.py b/vel/api/model.py index ac3fe547..259145ce 100644 --- a/vel/api/model.py +++ b/vel/api/model.py @@ -39,6 +39,9 @@ def summary(self): """ Print a model summary """ print(self) print("-" * 100) + for name, module in self.named_parameters(): + print("> {} {:,}".format(name, module.numel())) + print("-" * 100) number = sum(p.numel() for p in self.parameters()) print("Number of model parameters: {:,}".format(number)) print("-" * 100) diff --git a/vel/rl/command/rl_summary_command.py b/vel/rl/command/rl_summary_command.py new file mode 100644 index 00000000..108a038b --- /dev/null +++ b/vel/rl/command/rl_summary_command.py @@ -0,0 +1,16 @@ +class ModelSummary: + """ Just print model summary """ + def __init__(self, model, vec_env): + self.model_factory = model + self.vec_env = vec_env + + def run(self, *args): + """ Print model summary """ + env = self.vec_env.instantiate(parallel_envs=1, seed=1) + model = self.model_factory.instantiate(action_space=env.action_space, observation_space=env.observation_space) + model.summary() + + +def create(model, vec_env): + """ Vel factory function """ + return ModelSummary(model, vec_env) From ed4d804aab12e2da558fe36a5996edc957e2d614 Mon Sep 17 00:00:00 2001 From: Jerry Tworek Date: Thu, 14 Nov 2019 13:40:47 -0800 Subject: [PATCH 158/162] Enhancements to the modular network code. --- vel/net/layer/arch/parallel.py | 35 ++++++++- vel/net/layer/arch/sequence.py | 77 ++++++++++++++++++++ vel/net/layer/arch/skip_connection.py | 6 +- vel/net/layer/residual/__init__.py | 0 vel/net/layer/residual/fc_res.py | 101 ++++++++++++++++++++++++++ vel/net/layer_base.py | 2 +- vel/net/layer_list.py | 65 +++++++++++++++++ vel/net/modular.py | 64 +--------------- 8 files changed, 282 insertions(+), 68 deletions(-) create mode 100644 vel/net/layer/arch/sequence.py create mode 100644 vel/net/layer/residual/__init__.py create mode 100644 vel/net/layer/residual/fc_res.py create mode 100644 vel/net/layer_list.py diff --git a/vel/net/layer/arch/parallel.py b/vel/net/layer/arch/parallel.py index cebe1e5e..241933f5 100644 --- a/vel/net/layer/arch/parallel.py +++ b/vel/net/layer/arch/parallel.py @@ -13,6 +13,12 @@ def __init__(self, info: LayerInfo, layers: [Layer]): self.layers = nn.ModuleList(layers) self._size_hints = SizeHints(tuple(layer.size_hints().unwrap() for layer in self.layers)) + self._is_stateful = any(l.is_stateful for l in self.layers) + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return self._is_stateful def size_hints(self) -> SizeHints: """ Size hints for this network """ @@ -20,10 +26,33 @@ def size_hints(self) -> SizeHints: def forward(self, direct, state: dict = None, context: dict = None): """ Forward propagation of a single layer """ - results = [layer(x, state, context) for layer, x in zip(self.layers, direct)] - return tuple(results) + if self._is_stateful: + results = [] + output_state = {} + + for layer, layer_input in zip(self.layers, direct): + data, new_state = layer(layer_input, state=state, context=context) + results.append(data) + output_state.update(new_state) + + return tuple(results), output_state + else: + results = [layer(x, state, context) for layer, x in zip(self.layers, direct)] + return tuple(results) + + def zero_state(self, batch_size): + """ Potential state for the model """ + zero_state = {} + + for l in self.layers: + if l.is_stateful: + layer_zero_state = l.zero_state(batch_size) + if layer_zero_state is not None: + zero_state.update(layer_zero_state) + + return zero_state - def grouped_parameters(self) -> typing.Iterable[(str, object)]: + def grouped_parameters(self) -> typing.Iterable[typing.Tuple[str, object]]: """ Return iterable of pairs (group, parameters) """ raise NotImplementedError diff --git a/vel/net/layer/arch/sequence.py b/vel/net/layer/arch/sequence.py new file mode 100644 index 00000000..6c2beb30 --- /dev/null +++ b/vel/net/layer/arch/sequence.py @@ -0,0 +1,77 @@ +import collections + +from vel.api import SizeHints +from vel.net.layer_base import LayerFactory, Layer, LayerInfo, LayerFactoryContext +from vel.net.modular import LayerList + + +class SequenceLayer(Layer): + """ Container around a skip connection """ + + def __init__(self, info: LayerInfo, layers: [Layer]): + super().__init__(info) + + self.layers = LayerList(layers) + + @property + def is_stateful(self) -> bool: + return self.layers.is_stateful + + def zero_state(self, batch_size): + return self.layers.zero_state(batch_size) + + def size_hints(self) -> SizeHints: + """ Size hints for this network """ + return self.layers[-1].size_hints() + + def forward(self, direct, state: dict = None, context: dict = None): + """ Forward propagation of a single layer """ + return self.layers(direct, state=state, context=context) + + +class SequenceFactory(LayerFactory): + """ Factory for skip connection layers """ + + def __init__(self, layers: [LayerFactory]): + super().__init__() + self.layers = layers + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "skip_connection" + + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: + """ Create a given layer object """ + loop_size_hints = direct_input + + layers = collections.OrderedDict() + + info = self.make_info(context) + + for idx, layer_factory in enumerate(self.layers): + counter = idx + 1 + + child_context = LayerFactoryContext( + idx=counter, + parent_group=info.group, + parent_name=info.name, + data=context.data + ) + + layer = layer_factory.instantiate( + direct_input=loop_size_hints, + context=child_context, + extra_args=extra_args + ) + + loop_size_hints = layer.size_hints() + + layers[layer.name] = layer + + return SequenceLayer(info, layers=layers) + + +def create(layers: [LayerFactory], label=None, group=None): + """ Vel factory function """ + return SequenceFactory(layers=layers).with_given_name(label).with_given_group(group) diff --git a/vel/net/layer/arch/skip_connection.py b/vel/net/layer/arch/skip_connection.py index f627664b..bc7cab8a 100644 --- a/vel/net/layer/arch/skip_connection.py +++ b/vel/net/layer/arch/skip_connection.py @@ -49,7 +49,7 @@ def name_base(self) -> str: def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: """ Create a given layer object """ - size_hint = direct_input.assert_single() + size_hint = loop_size_hint = direct_input.assert_single() layers = collections.OrderedDict() @@ -66,11 +66,13 @@ def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, ext ) layer = layer_factory.instantiate( - direct_input=SizeHints(size_hint), + direct_input=SizeHints(loop_size_hint), context=child_context, extra_args=extra_args ) + loop_size_hint = layer.size_hints().assert_single() + layers[layer.name] = layer return SkipConnectionLayer(info, layers=layers, size_hint=size_hint) diff --git a/vel/net/layer/residual/__init__.py b/vel/net/layer/residual/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vel/net/layer/residual/fc_res.py b/vel/net/layer/residual/fc_res.py new file mode 100644 index 00000000..393dd5e3 --- /dev/null +++ b/vel/net/layer/residual/fc_res.py @@ -0,0 +1,101 @@ +import typing +import torch.nn as nn +import torch.nn.init as init +import numpy as np + +from vel.api import SizeHint, SizeHints +from vel.net.layer_base import Layer, LayerFactory, LayerInfo, LayerFactoryContext + + +class FcResidual(Layer): + """ Residual fully-connected block """ + + def __init__(self, info: LayerInfo, input_shape: SizeHint, divisor: int = 1, activation: str = 'relu', + normalization: typing.Optional[str] = None): + super().__init__(info) + + self._size_hints = SizeHints(input_shape) + + self.trunk_shape = input_shape[-1] + self.bottleneck_shape = self.trunk_shape // divisor + + self.f1 = nn.Linear(self.trunk_shape, self.bottleneck_shape) + + if normalization == 'layer': + self.n1 = nn.LayerNorm(self.bottleneck_shape) + elif normalization is None: + self.n1 = nn.Identity() + else: + raise NotImplementedError + + if activation == 'relu': + self.a1 = nn.ReLU(inplace=True) + else: + raise NotImplementedError + + self.f2 = nn.Linear(self.bottleneck_shape, self.trunk_shape) + + if normalization == 'layer': + self.n2 = nn.LayerNorm(self.trunk_shape) + elif normalization is None: + self.n2 = nn.Identity() + else: + raise NotImplementedError + + if activation == 'relu': + self.a2 = nn.ReLU(inplace=True) + else: + raise NotImplementedError + + def reset_weights(self): + """ Call proper initializers for the weights """ + for m in self.modules(): + if isinstance(m, nn.Linear): + # init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + init.orthogonal_(m.weight, gain=np.sqrt(2)) + init.constant_(m.bias, 0.0) + + def size_hints(self) -> SizeHints: + """ Size hints for this network """ + return self._size_hints + + def forward(self, direct, state: dict, context: dict): + residual = direct + + residual = self.a1(self.n1(self.f1(residual))) + residual = self.a2(self.n2(self.f2(residual))) + + return residual + direct + + +class FcResidualFactory(LayerFactory): + """ Factory for fully-connected residual layers """ + def __init__(self, divisor: int, activation: str, normalization: typing.Optional[str] = None): + super().__init__() + self.divisor = divisor + self.activation = activation + self.normalization = normalization + + @property + def name_base(self) -> str: + """ Base of layer name """ + return "fc_residual" + + def instantiate(self, direct_input: SizeHints, context: LayerFactoryContext, extra_args: dict) -> Layer: + """ Create a given layer object """ + size_hint = direct_input.assert_single() + info = self.make_info(context) + + return FcResidual( + info=info, + input_shape=size_hint, + divisor=self.divisor, + activation=self.activation, + normalization=self.normalization + ) + + +def create(divisor: int, activation: str = 'relu', normalization: typing.Optional[str] = None, + label=None, group=None): + return FcResidualFactory(divisor, activation, normalization) + diff --git a/vel/net/layer_base.py b/vel/net/layer_base.py index 591041c6..e974a8f1 100644 --- a/vel/net/layer_base.py +++ b/vel/net/layer_base.py @@ -38,7 +38,7 @@ def forward(self, direct, state: dict, context: dict): """ Forward propagation of a single layer """ raise NotImplementedError - def grouped_parameters(self): + def grouped_parameters(self) -> typing.Iterable[typing.Tuple[str, object]]: """ Return iterable of pairs (group, parameters) """ return [(self.group, self.parameters())] diff --git a/vel/net/layer_list.py b/vel/net/layer_list.py new file mode 100644 index 00000000..24d44e7d --- /dev/null +++ b/vel/net/layer_list.py @@ -0,0 +1,65 @@ +import collections + +from vel.api import BackboneModule, SizeHints + + +class LayerList(BackboneModule): + """ Modification of nn.Sequential for the purpose of modular networks """ + def __init__(self, layers: collections.OrderedDict): + super().__init__() + + self._layers = [] + + for key, module in layers.items(): + self.add_module(key, module) + self._layers.append(module) + + self._is_stateful = any(l.is_stateful for l in self._layers) + + def reset_weights(self): + for l in self._layers: + l.reset_weights() + + def size_hints(self) -> SizeHints: + return self._layers[-1].size_hints() + + @property + def is_stateful(self) -> bool: + """ If the model has a state that needs to be fed between individual observations """ + return self._is_stateful + + def zero_state(self, batch_size): + """ Potential state for the model """ + zero_state = {} + + for l in self._layers: + if l.is_stateful: + layer_zero_state = l.zero_state(batch_size) + if layer_zero_state is not None: + zero_state.update(layer_zero_state) + + return zero_state + + def __len__(self): + return len(self._layers) + + def __getitem__(self, item): + return self._layers[item] + + def forward(self, direct, state: dict = None, context: dict = None): + if not self._is_stateful: + for layer in self._layers: + direct = layer(direct, state=state, context=context) + return direct + else: + data = direct + output_state = {} + + for layer in self._layers: + if layer.is_stateful: + data, new_state = layer(data, state=state, context=context) + output_state.update(new_state) + else: + data = layer(data, state=state, context=context) + + return data, output_state diff --git a/vel/net/modular.py b/vel/net/modular.py index a4e548c8..82573056 100644 --- a/vel/net/modular.py +++ b/vel/net/modular.py @@ -5,69 +5,9 @@ from vel.api import BackboneModule, ModuleFactory, SizeHints from vel.util.tensor_util import to_device -from .layer_base import LayerFactory, LayerFactoryContext - - -class LayerList(BackboneModule): - """ Modification of nn.Sequential for the purpose of modular networks """ - def __init__(self, layers: collections.OrderedDict): - super().__init__() - - self._layers = [] - - for key, module in layers.items(): - self.add_module(key, module) - self._layers.append(module) - - self._is_stateful = any(l.is_stateful for l in self._layers) - - def reset_weights(self): - for l in self._layers: - l.reset_weights() - - def size_hints(self) -> SizeHints: - return self._layers[-1].size_hints() - - @property - def is_stateful(self) -> bool: - """ If the model has a state that needs to be fed between individual observations """ - return self._is_stateful - - def zero_state(self, batch_size): - """ Potential state for the model """ - zero_state = {} - - for l in self._layers: - if l.is_stateful: - layer_zero_state = l.zero_state(batch_size) - if layer_zero_state is not None: - zero_state.update(layer_zero_state) - - return zero_state - def __len__(self): - return len(self._layers) - - def __getitem__(self, item): - return self._layers[item] - - def forward(self, direct, state: dict = None, context: dict = None): - if not self._is_stateful: - for layer in self._layers: - direct = layer(direct, state=state, context=context) - return direct - else: - data = direct - output_state = {} - - for layer in self._layers: - if layer.is_stateful: - data, new_state = layer(data, state=state, context=context) - output_state.update(new_state) - else: - data = layer(data, state=state, context=context) - - return data, output_state +from .layer_base import LayerFactory, LayerFactoryContext +from .layer_list import LayerList def instantiate_layers(layers: [LayerFactory], group: str, size_hint: SizeHints, extra_args: dict) -> nn.Module: From fc554a02f90b0be2ec950e1b213991c1e61b78c7 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 14 Nov 2019 17:18:33 -0800 Subject: [PATCH 159/162] Fixing O-U noise --- vel/rl/module/noise/ou_noise.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/vel/rl/module/noise/ou_noise.py b/vel/rl/module/noise/ou_noise.py index 10c154f0..66bcb065 100644 --- a/vel/rl/module/noise/ou_noise.py +++ b/vel/rl/module/noise/ou_noise.py @@ -20,25 +20,29 @@ def __init__(self, std_dev: float, action_space: gym.Space): self.register_buffer('low_tensor', torch.from_numpy(self.action_space.low).unsqueeze(0)) self.register_buffer('high_tensor', torch.from_numpy(self.action_space.high).unsqueeze(0)) + def _expand_processes(self, shape): + while len(self.processes) < shape: + len_action_space = self.action_space.shape[-1] + + self.processes.append( + OrnsteinUhlenbeckNoiseProcess( + np.zeros(len_action_space), float(self.std_dev) * np.ones(len_action_space) + ) + ) + def reset_episodic_state(self, dones): """ A hook for a model to react when during training episode is finished """ + self._expand_processes(dones.shape[0]) + for idx, done in enumerate(dones.cpu()): if done > 0.5: self.processes[idx].reset() def forward(self, actions): """ Return model step after applying noise """ - while len(self.processes) < actions.shape[0]: - len_action_space = self.action_space.shape[-1] - - self.processes.append( - OrnsteinUhlenbeckNoiseProcess( - np.zeros(len_action_space), float(self.std_dev) * np.ones(len_action_space) - ) - ) + self._expand_processes(actions.shape[0]) noise = torch.from_numpy(np.stack([x() for x in self.processes])).float().to(actions.device) - return torch.min(torch.max(actions + noise, self.low_tensor), self.high_tensor) From 3e0dd6ac07005cca7a9ac63a5f10f1a8e9156d25 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 14 Nov 2019 17:27:52 -0800 Subject: [PATCH 160/162] Tiny updates to EWMA normalization. --- vel/module/input/normalize_ewma.py | 4 ++-- vel/net/layer/input/normalize_ewma.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/vel/module/input/normalize_ewma.py b/vel/module/input/normalize_ewma.py index 3219e358..0e59b792 100644 --- a/vel/module/input/normalize_ewma.py +++ b/vel/module/input/normalize_ewma.py @@ -21,18 +21,18 @@ def __init__(self, input_shape, beta=0.99, per_element_update=False, epsilon=1e- def reset_weights(self): self.running_mean.zero_() self.running_var.fill_(1.0) - self.count.fill_(self.epsilon) + self.debiasing_term.fill_(self.epsilon) def forward(self, input_vector): # Make sure input is float32 input_vector = input_vector.to(torch.float) if self.training: - batch_size = input_vector.size(0) batch_mean = input_vector.mean(dim=0) batch_var = input_vector.var(dim=0, unbiased=False) if self.per_element_update: + batch_size = input_vector.size(0) weight = self.beta ** batch_size else: weight = self.beta diff --git a/vel/net/layer/input/normalize_ewma.py b/vel/net/layer/input/normalize_ewma.py index 50b6dec5..64e1cbb3 100644 --- a/vel/net/layer/input/normalize_ewma.py +++ b/vel/net/layer/input/normalize_ewma.py @@ -22,6 +22,9 @@ def __init__(self, info: LayerInfo, input_shape: SizeHints, beta: float = 0.99, input_shape=self.input_shape.assert_single()[1:] # Remove batch axis ) + def reset_weights(self): + self.normalize.reset_weights() + def forward(self, direct, state: dict = None, context: dict = None): return self.normalize(direct) From 174c505ac57ff5b2ec1eff6bb47d12773d448833 Mon Sep 17 00:00:00 2001 From: Million Integrals Date: Thu, 14 Nov 2019 19:41:25 -0800 Subject: [PATCH 161/162] Registering env for the iteration. --- vel/rl/reinforcer/on_policy_iteration_reinforcer.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py index 13b9853f..b9455fd7 100644 --- a/vel/rl/reinforcer/on_policy_iteration_reinforcer.py +++ b/vel/rl/reinforcer/on_policy_iteration_reinforcer.py @@ -5,6 +5,7 @@ import tqdm from vel.api import ModuleFactory, TrainingInfo, EpochInfo, BatchInfo +from vel.openai.baselines.common.vec_env import VecEnv from vel.rl.api import ( Reinforcer, ReinforcerFactory, VecEnvFactory, EnvRollerFactoryBase, EnvRollerBase, RlPolicy @@ -36,9 +37,10 @@ class OnPolicyIterationReinforcer(Reinforcer): May split the sample into multiple batches and may replay batches a few times. """ def __init__(self, device: torch.device, settings: OnPolicyIterationReinforcerSettings, policy: RlPolicy, - env_roller: EnvRollerBase) -> None: + env: VecEnv, env_roller: EnvRollerBase) -> None: self.device = device self.settings = settings + self.env = env self.env_roller = env_roller self._model: RlPolicy = policy.to(self.device) @@ -67,6 +69,9 @@ def initialize_training(self, training_info: TrainingInfo, model_state=None, hid self.policy.load_state_dict(model_state) else: self.policy.reset_weights() + + # Register env in the training info + training_info['env'] = self.env def train_epoch(self, epoch_info: EpochInfo, interactive=True) -> None: """ Train model on an epoch of a fixed number of batch updates """ @@ -160,7 +165,7 @@ def instantiate(self, device: torch.device) -> Reinforcer: env = self.env_factory.instantiate(parallel_envs=self.parallel_envs, seed=self.seed) policy = self.model_factory.instantiate(action_space=env.action_space, observation_space=env.observation_space) env_roller = self.env_roller_factory.instantiate(environment=env, policy=policy, device=device) - return OnPolicyIterationReinforcer(device, self.settings, policy, env_roller) + return OnPolicyIterationReinforcer(device, self.settings, policy, env, env_roller) def create(model_config, model, vec_env, env_roller, parallel_envs, number_of_steps, From 4582c4e743dbb7c9e5ae109868bbad40bb6ab14c Mon Sep 17 00:00:00 2001 From: Mattia Rigotti Date: Fri, 21 Feb 2020 12:21:40 -0500 Subject: [PATCH 162/162] Minor fixes to rl.command.record_movie_command and evaluate (#55) * A couple of minor bugfixes * Minor fix * Load checkpoint on correct device (cpu or cuda) * Fix openai logging --- examples-configs/rl/atari/atari_dqn.yaml | 1 + vel/rl/command/record_movie_command.py | 6 +++--- vel/rl/command/rl_train_command.py | 6 +++--- vel/storage/classic.py | 5 +++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/examples-configs/rl/atari/atari_dqn.yaml b/examples-configs/rl/atari/atari_dqn.yaml index c2fd5cde..2406ea1e 100644 --- a/examples-configs/rl/atari/atari_dqn.yaml +++ b/examples-configs/rl/atari/atari_dqn.yaml @@ -74,3 +74,4 @@ commands: evaluate: name: vel.rl.command.evaluate_env_command takes: 100 + parallel_envs: 1 diff --git a/vel/rl/command/record_movie_command.py b/vel/rl/command/record_movie_command.py index 79598e30..12eee428 100644 --- a/vel/rl/command/record_movie_command.py +++ b/vel/rl/command/record_movie_command.py @@ -28,7 +28,7 @@ def run(self): device = self.model_config.torch_device() env = self.env_factory.instantiate_single(preset='record', seed=self.model_config.seed) - model = self.model_factory.instantiate(action_space=env.action_space).to(device) + model = self.model_factory.instantiate(action_space=env.action_space, observation_space=env.observation_space).to(device) training_info = TrainingInfo( start_epoch_idx=self.storage.last_epoch_idx() @@ -61,11 +61,11 @@ def record_take(self, model, env_instance, device, take_number): observation_tensor = torch.from_numpy(observation_array).to(device) if model.is_stateful: - output = model.step(observation_tensor, hidden_state, **self.sample_args) + output = model.act(observation_tensor, hidden_state, **self.sample_args) hidden_state = output['state'] actions = output['actions'] else: - actions = model.step(observation_tensor, **self.sample_args)['actions'] + actions = model.act(observation_tensor, **self.sample_args)['actions'] actions = actions.detach().cpu().numpy() diff --git a/vel/rl/command/rl_train_command.py b/vel/rl/command/rl_train_command.py index b4d10758..6d6f0303 100644 --- a/vel/rl/command/rl_train_command.py +++ b/vel/rl/command/rl_train_command.py @@ -141,11 +141,11 @@ def start_training(self, reinforcer: Reinforcer, optimizer: VelOptimizer) -> Tra def _openai_logging(self, epoch_result): """ Use OpenAI logging facilities for the same type of logging """ for key in sorted(epoch_result.keys()): - if key == 'fps': + if key.name == 'fps': # Not super elegant, but I like nicer display of FPS - openai_logger.record_tabular(key, int(epoch_result[key])) + openai_logger.record_tabular(key.name, int(epoch_result[key])) else: - openai_logger.record_tabular(key, epoch_result[key]) + openai_logger.record_tabular(key.name, epoch_result[key]) openai_logger.dump_tabular() diff --git a/vel/storage/classic.py b/vel/storage/classic.py index e61dd91b..5815dca4 100644 --- a/vel/storage/classic.py +++ b/vel/storage/classic.py @@ -36,9 +36,10 @@ def load(self, train_info: TrainingInfo) -> (dict, dict): Resume learning process and return loaded hidden state dictionary """ last_epoch = train_info.start_epoch_idx + device = self.model_config.torch_device() - model_state = torch.load(self.checkpoint_filename(last_epoch)) - hidden_state = torch.load(self.checkpoint_hidden_filename(last_epoch)) + model_state = torch.load(self.checkpoint_filename(last_epoch), map_location=device) + hidden_state = torch.load(self.checkpoint_hidden_filename(last_epoch), map_location=device) self.checkpoint_strategy.restore(hidden_state) train_info.restore(hidden_state)