From f3612deb844fd1abedcf1cff93e2da000272ed48 Mon Sep 17 00:00:00 2001 From: Gabriel Tseng Date: Tue, 27 Aug 2019 11:45:26 -0400 Subject: [PATCH 1/4] Get rid of the new EALSTM cell, since the original one works fine --- src/models/neural_networks/ealstm.py | 128 +------------------- tests/models/neural_networks/test_ealstm.py | 54 +-------- 2 files changed, 5 insertions(+), 177 deletions(-) diff --git a/src/models/neural_networks/ealstm.py b/src/models/neural_networks/ealstm.py index 0c0cbf06d..f71cd3c23 100644 --- a/src/models/neural_networks/ealstm.py +++ b/src/models/neural_networks/ealstm.py @@ -152,10 +152,10 @@ def __init__(self, features_per_month, dense_features, hidden_size, ea_static_size += 12 self.dropout = nn.Dropout(rnn_dropout) - self.rnn = OrgEALSTMCell(input_size_dyn=features_per_month, - input_size_stat=ea_static_size, - hidden_size=hidden_size, - batch_first=True) + self.rnn = EALSTMCell(input_size_dyn=features_per_month, + input_size_stat=ea_static_size, + hidden_size=hidden_size, + batch_first=True) self.hidden_size = hidden_size self.rnn_dropout = nn.Dropout(rnn_dropout) @@ -214,126 +214,6 @@ def forward(self, x, pred_month=None, latlons=None, current=None, yearly_aggs=No class EALSTMCell(nn.Module): - """See below. Implemented using modules so it can be explained with shap - """ - def __init__(self, - input_size_dyn: int, - input_size_stat: int, - hidden_size: int, - batch_first: bool = True): - super().__init__() - - self.input_size_dyn = input_size_dyn - self.input_size_stat = input_size_stat - self.hidden_size = hidden_size - self.batch_first = batch_first - - self.forget_gate_i = nn.Linear(in_features=input_size_dyn, - out_features=hidden_size, bias=False) - self.forget_gate_h = nn.Linear(in_features=hidden_size, out_features=hidden_size, - bias=True) - - self.update_gate = nn.Sequential(*[ - nn.Linear(in_features=input_size_stat, out_features=hidden_size), - nn.Sigmoid() - ]) - - self.update_candidates_i = nn.Linear(in_features=input_size_dyn, out_features=hidden_size, - bias=False) - self.update_candidates_h = nn.Linear(in_features=hidden_size, out_features=hidden_size, - bias=True) - - self.output_gate_i = nn.Linear(in_features=input_size_dyn, out_features=hidden_size, - bias=False) - self.output_gate_h = nn.Linear(in_features=hidden_size, out_features=hidden_size, - bias=True) - - self.sigmoid = nn.Sigmoid() - self.tanh = nn.Tanh() - - self.reset_parameters() - - def reset_parameters(self): - self._reset_i(self.forget_gate_i) - self._reset_i(self.update_candidates_i) - self._reset_i(self.output_gate_i) - self._reset_i(self.update_gate[0]) - nn.init.constant_(self.update_gate[0].bias.data, val=0) - - self._reset_h(self.forget_gate_h, self.hidden_size) - self._reset_h(self.update_candidates_h, self.hidden_size) - self._reset_h(self.output_gate_h, self.hidden_size) - - @staticmethod - def _reset_i(layer): - nn.init.orthogonal(layer.weight.data) - - @staticmethod - def _reset_h(layer, hidden_size): - weight_hh_data = torch.eye(hidden_size) - layer.weight.data = weight_hh_data - nn.init.constant_(layer.bias.data, val=0) - - def forward(self, x_d, x_s): - """[summary] - Parameters - ---------- - x_d : torch.Tensor - Tensor, containing a batch of sequences of the dynamic features. Shape has to match - the format specified with batch_first. - x_s : torch.Tensor - Tensor, containing a batch of static features. - Returns - ------- - h_n : torch.Tensor - The hidden states of each time step of each sample in the batch. - c_n : torch.Tensor - The cell states of each time step of each sample in the batch. - """ - if self.batch_first: - x_d = x_d.transpose(0, 1) - - seq_len, batch_size, _ = x_d.size() - - h_0 = x_d.data.new(batch_size, self.hidden_size).zero_() - c_0 = x_d.data.new(batch_size, self.hidden_size).zero_() - h_x = (h_0, c_0) - - # empty lists to temporally store all intermediate hidden/cell states - h_n, c_n = [], [] - - # calculate input gate only once because inputs are static - i = self.update_gate(x_s) - - # perform forward steps over input sequence - for t in range(seq_len): - h_0, c_0 = h_x - - forget_state = self.sigmoid(self.forget_gate_i(x_d[t]) + self.forget_gate_h(h_0)) - cell_candidates = self.tanh(self.update_candidates_i(x_d[t]) + - self.update_candidates_h(h_0)) - output_state = self.sigmoid(self.output_gate_i(x_d[t]) + self.output_gate_h(h_0)) - - c_1 = forget_state * c_0 + i * cell_candidates - h_1 = output_state * self.tanh(c_1) - - # store intermediate hidden/cell state in list - h_n.append(h_1) - c_n.append(c_1) - - h_x = (h_1, c_1) - - h_n = torch.stack(h_n, 0) - c_n = torch.stack(c_n, 0) - - if self.batch_first: - h_n = h_n.transpose(0, 1) - c_n = c_n.transpose(0, 1) - - return h_n, c_n - - -class OrgEALSTMCell(nn.Module): """Implementation of the Entity-Aware-LSTM (EA-LSTM) This code was copied from diff --git a/tests/models/neural_networks/test_ealstm.py b/tests/models/neural_networks/test_ealstm.py index d5a877f6e..e16675528 100644 --- a/tests/models/neural_networks/test_ealstm.py +++ b/tests/models/neural_networks/test_ealstm.py @@ -1,12 +1,10 @@ import pickle import pytest from copy import copy -import numpy as np import torch -from torch import nn -from src.models.neural_networks.ealstm import EALSTM, EALSTMCell, OrgEALSTMCell +from src.models.neural_networks.ealstm import EALSTM from src.models import EARecurrentNetwork from tests.utils import _make_dataset @@ -151,53 +149,3 @@ def test_predict(self, tmp_path, use_pred_months): # _make_dataset with const=True returns all ones assert (test_arrays_dict['hello']['y'] == 1).all() - - -class TestEALSTMCell: - @staticmethod - def test_ealstm(monkeypatch): - """ - We implement our own unrolled RNN, so that it can be explained with - shap. This test makes sure it roughly mirrors the behaviour of the pytorch - LSTM. - """ - - batch_size, hidden_size, timesteps, dyn_input, static_input = 3, 5, 2, 6, 4 - - @staticmethod - def i_init(layer): - nn.init.constant_(layer.weight.data, val=1) - monkeypatch.setattr(EALSTMCell, '_reset_i', i_init) - - def org_init(self): - """Initialize all learnable parameters of the LSTM""" - nn.init.constant_(self.weight_ih.data, val=1) - nn.init.constant_(self.weight_sh, val=1) - - weight_hh_data = torch.eye(self.hidden_size) - weight_hh_data = weight_hh_data.repeat(1, 3) - self.weight_hh.data = weight_hh_data - - nn.init.constant_(self.bias.data, val=0) - nn.init.constant_(self.bias_s.data, val=0) - monkeypatch.setattr(OrgEALSTMCell, 'reset_parameters', org_init) - - org_ealstm = OrgEALSTMCell(input_size_dyn=dyn_input, - input_size_stat=static_input, - hidden_size=hidden_size) - - our_ealstm = EALSTMCell(input_size_dyn=dyn_input, - input_size_stat=static_input, - hidden_size=hidden_size) - - static = torch.rand(batch_size, static_input) - dynamic = torch.rand(batch_size, timesteps, dyn_input) - - with torch.no_grad(): - org_hn, org_cn = org_ealstm(dynamic, static) - our_hn, our_cn = our_ealstm(dynamic, static) - - assert np.isclose(org_hn.numpy(), our_hn.numpy(), 0.01).all(), \ - "Difference in hidden state" - assert np.isclose(org_cn.numpy(), our_cn.numpy(), 0.01).all(), \ - "Difference in cell state" From 09823700bb3ba85c1903114a715aeea6b16e6cc8 Mon Sep 17 00:00:00 2001 From: Gabriel Tseng Date: Tue, 27 Aug 2019 11:48:00 -0400 Subject: [PATCH 2/4] Add an embedding for the months --- src/models/neural_networks/base.py | 15 +++++++++++++++ src/models/neural_networks/ealstm.py | 5 +++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/models/neural_networks/base.py b/src/models/neural_networks/base.py index 80d4d5288..148b26f80 100644 --- a/src/models/neural_networks/base.py +++ b/src/models/neural_networks/base.py @@ -5,6 +5,7 @@ import math import torch +from torch import nn from torch.nn import functional as F import shap @@ -348,3 +349,17 @@ def make_shap_input(self, x: TrainData, start_idx: int = 0, else: output_tensors.append(x.static[start_idx: start_idx + num_inputs]) return output_tensors + + +class OneHotMonthEncoder(nn.Module): + """Since the months are one hot encoded, using a linear layer + is equivalent to the lookup action of an embedding layer. + """ + def __init__(self, embedding_size: int = 12) -> None: + super().__init__() + + self.encoder = nn.Linear(in_features=12, out_features=embedding_size, + bias=True) + + def forward(self, x): + return self.encoder(x) diff --git a/src/models/neural_networks/ealstm.py b/src/models/neural_networks/ealstm.py index f71cd3c23..256640ba6 100644 --- a/src/models/neural_networks/ealstm.py +++ b/src/models/neural_networks/ealstm.py @@ -6,7 +6,7 @@ from typing import Dict, List, Optional, Tuple -from .base import NNBase +from .base import NNBase, OneHotMonthEncoder class EARecurrentNetwork(NNBase): @@ -149,6 +149,7 @@ def __init__(self, features_per_month, dense_features, hidden_size, self.include_static = True ea_static_size += static_size if include_pred_month: + self.month_encoder = OneHotMonthEncoder(12) ea_static_size += 12 self.dropout = nn.Dropout(rnn_dropout) @@ -198,7 +199,7 @@ def forward(self, x, pred_month=None, latlons=None, current=None, yearly_aggs=No if self.include_static: static_x.append(static) if self.include_pred_month: - static_x.append(pred_month) + static_x.append(self.month_encoder(pred_month)) hidden_state, cell_state = self.rnn(x, torch.cat(static_x, dim=-1)) From 40edaf93e2dcfc3d7dcca879f95ddfefc0fa381c Mon Sep 17 00:00:00 2001 From: Gabriel Tseng Date: Tue, 27 Aug 2019 12:04:57 -0400 Subject: [PATCH 3/4] Add test to ensure encoder behaves as expected --- tests/models/neural_networks/test_base.py | 25 +++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 tests/models/neural_networks/test_base.py diff --git a/tests/models/neural_networks/test_base.py b/tests/models/neural_networks/test_base.py new file mode 100644 index 000000000..c9a740367 --- /dev/null +++ b/tests/models/neural_networks/test_base.py @@ -0,0 +1,25 @@ +import torch + +from src.models.neural_networks.base import OneHotMonthEncoder + + +class TestOneHotMonthEncoder: + + def test_embedding_likeness(self): + + model = OneHotMonthEncoder(15) + + init_weights = torch.stack([torch.arange(1, 13)] * 15).float() + init_bias = torch.zeros(15).float() + model.encoder.weight.data = init_weights + model.encoder.bias.data = init_bias + + for month in range(1, 13): + indices = torch.tensor([month, month, month]) + month_tensor = torch.eye(14)[indices.long()][:, 1:-1].float() + + model.eval() + with torch.no_grad(): + output_vals = model(month_tensor) + + assert (output_vals == month).all() From 4f6a2624468ef18d78d00b299819ba66a7c0079b Mon Sep 17 00:00:00 2001 From: Gabriel Tseng Date: Wed, 28 Aug 2019 09:17:30 +0800 Subject: [PATCH 4/4] Don't hardcode the embedding size --- src/models/neural_networks/ealstm.py | 24 +++++++++++++-------- tests/models/neural_networks/test_ealstm.py | 8 +++---- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/models/neural_networks/ealstm.py b/src/models/neural_networks/ealstm.py index 256640ba6..ace74eb33 100644 --- a/src/models/neural_networks/ealstm.py +++ b/src/models/neural_networks/ealstm.py @@ -21,19 +21,23 @@ def __init__(self, hidden_size: int, experiment: str = 'one_month_forecast', pred_months: Optional[List[int]] = None, include_latlons: bool = False, - include_pred_month: bool = True, + pred_month_embedding_size: Optional[int] = 12, include_monthly_aggs: bool = True, include_yearly_aggs: bool = True, surrounding_pixels: Optional[int] = None, ignore_vars: Optional[List[str]] = None, include_static: bool = True, device: str = 'cuda:0') -> None: + include_pred_month = False + if pred_month_embedding_size is not None: + include_pred_month = True super().__init__(data_folder, batch_size, experiment, pred_months, include_pred_month, include_latlons, include_monthly_aggs, include_yearly_aggs, surrounding_pixels, ignore_vars, include_static, device) # to initialize and save the model self.hidden_size = hidden_size + self.pred_month_embedding_size = pred_month_embedding_size self.rnn_dropout = rnn_dropout self.input_dense = copy(dense_features) # this is to make sure we can reload the model if dense_features is None: dense_features = [] @@ -58,11 +62,11 @@ def save_model(self): 'hidden_size': self.hidden_size, 'rnn_dropout': self.rnn_dropout, 'dense_features': self.input_dense, - 'include_pred_month': self.include_pred_month, 'include_latlons': self.include_latlons, 'surrounding_pixels': self.surrounding_pixels, 'include_monthly_aggs': self.include_monthly_aggs, 'include_yearly_aggs': self.include_yearly_aggs, + 'pred_month_embedding_size': self.pred_month_embedding_size, 'experiment': self.experiment, 'ignore_vars': self.ignore_vars, 'include_static': self.include_static, @@ -82,7 +86,7 @@ def load(self, state_dict: Dict, features_per_month: int, current_size: Optional dense_features=self.dense_features, hidden_size=self.hidden_size, rnn_dropout=self.rnn_dropout, - include_pred_month=self.include_pred_month, + pred_month_embedding_size=self.pred_month_embedding_size, experiment=self.experiment, current_size=self.current_size, yearly_agg_size=self.yearly_agg_size, @@ -114,7 +118,7 @@ def _initialize_model(self, x_ref: Optional[Tuple[torch.Tensor, ...]]) -> nn.Mod dense_features=self.dense_features, hidden_size=self.hidden_size, rnn_dropout=self.rnn_dropout, - include_pred_month=self.include_pred_month, + pred_month_embedding_size=self.pred_month_embedding_size, experiment=self.experiment, yearly_agg_size=self.yearly_agg_size, current_size=self.current_size, @@ -126,13 +130,14 @@ def _initialize_model(self, x_ref: Optional[Tuple[torch.Tensor, ...]]) -> nn.Mod class EALSTM(nn.Module): def __init__(self, features_per_month, dense_features, hidden_size, - rnn_dropout, include_latlons, include_pred_month, + rnn_dropout, include_latlons, pred_month_embedding_size, experiment, yearly_agg_size=None, current_size=None, static_size=None): super().__init__() self.experiment = experiment - self.include_pred_month = include_pred_month + self.pred_month_embedding_size = pred_month_embedding_size + self.include_pred_month = False self.include_latlons = include_latlons self.include_yearly_agg = False self.include_static = False @@ -148,9 +153,10 @@ def __init__(self, features_per_month, dense_features, hidden_size, if static_size is not None: self.include_static = True ea_static_size += static_size - if include_pred_month: - self.month_encoder = OneHotMonthEncoder(12) - ea_static_size += 12 + if pred_month_embedding_size is not None: + self.include_pred_month = True + self.month_encoder = OneHotMonthEncoder(pred_month_embedding_size) + ea_static_size += pred_month_embedding_size self.dropout = nn.Dropout(rnn_dropout) self.rnn = EALSTMCell(input_size_dyn=features_per_month, diff --git a/tests/models/neural_networks/test_ealstm.py b/tests/models/neural_networks/test_ealstm.py index e16675528..9eab997f3 100644 --- a/tests/models/neural_networks/test_ealstm.py +++ b/tests/models/neural_networks/test_ealstm.py @@ -20,13 +20,13 @@ def test_save(self, tmp_path, monkeypatch): hidden_size = 128 rnn_dropout = 0.25 include_latlons = True - include_pred_month = True + pred_month_embedding_size = 12 include_yearly_aggs = True yearly_agg_size = 3 def mocktrain(self): self.model = EALSTM(features_per_month, dense_features, hidden_size, - rnn_dropout, include_latlons, include_pred_month, + rnn_dropout, include_latlons, pred_month_embedding_size, experiment='one_month_forecast', yearly_agg_size=yearly_agg_size) self.features_per_month = features_per_month self.yearly_agg_size = yearly_agg_size @@ -34,7 +34,7 @@ def mocktrain(self): monkeypatch.setattr(EARecurrentNetwork, 'train', mocktrain) model = EARecurrentNetwork(hidden_size=hidden_size, dense_features=dense_features, - include_pred_month=include_pred_month, + pred_month_embedding_size=pred_month_embedding_size, include_latlons=include_latlons, rnn_dropout=rnn_dropout, data_folder=tmp_path, include_yearly_aggs=include_yearly_aggs) @@ -54,7 +54,7 @@ def mocktrain(self): assert model_dict['hidden_size'] == hidden_size assert model_dict['rnn_dropout'] == rnn_dropout assert model_dict['dense_features'] == input_dense_features - assert model_dict['include_pred_month'] == include_pred_month + assert model_dict['pred_month_embedding_size'] == pred_month_embedding_size assert model_dict['include_latlons'] == include_latlons assert model_dict['include_yearly_aggs'] == include_yearly_aggs assert model_dict['experiment'] == 'one_month_forecast'