NIS/couplings.py at master · givinar/NIS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
import os
import sys
import torch
import torch.nn as nn

import transform
import splines

class CouplingTransform(transform.Transform):
    """
    A base class for coupling layers. Supports 2D inputs (NxD), as well as 4D inputs for
    images (NxCxHxW). For images the splitting is done on the channel dimension, using the
    provided 1D mask.
    Free inspiration from :
        https://github.com/bayesiains/nsf, arXiv:1906.04032 (PyTorch)
        https://gitlab.com/i-flow/i-flow/, arXiv:2001.05486 (Tensorflow)
    """

    def __init__(self, mask, transform_net_create_fn, blob=None, num_context_features=0):
        """
        Constructor.
        Args:
            mask: a 1-dim tensor, tuple or list. It indexes inputs as follows:
                * If `mask[i] > 0`, `input[i]` will be transformed.
                * If `mask[i] <= 0`, `input[i]` will be passed unchanged.
            transform_net_create_fn : lambda defining a network based on in_features and out_features
                TODO : might want to include options in the transform definition
            blob : int for number of bins to include in the input one-blob encoding
            num_context_features: number of context features in transform net
        """
        mask = torch.as_tensor(mask)
        if mask.dim() != 1:
            raise ValueError('Mask must be a 1-dim tensor.')
        if mask.numel() <= 0:
            raise ValueError('Mask can\'t be empty.')

        super().__init__()
        self.features = len(mask)
        self.num_context_features = num_context_features
        features_vector = torch.arange(self.features)

        self.register_buffer('identity_features', features_vector.masked_select(mask <= 0))
        self.register_buffer('transform_features', features_vector.masked_select(mask > 0))

        assert self.num_identity_features + self.num_transform_features == self.features

        self.blob = bool(blob)
        if self.blob:
            if not isinstance(blob, int):
                raise ValueError('Blob encoding requires a number of bins')
            self.nbins_in = int(blob)

        if self.blob:
            self.transform_net = transform_net_create_fn(
                self.num_identity_features * self.nbins_in + num_context_features,
                self.num_transform_features * self._transform_dim_multiplier()
            )
        else:
            self.transform_net = transform_net_create_fn(
                self.num_identity_features + num_context_features,
                self.num_transform_features * self._transform_dim_multiplier()
            )

    @property
    def num_identity_features(self):
        return len(self.identity_features)

    @property
    def num_transform_features(self):
        return len(self.transform_features)

    def one_blob(self,xd):
        device = xd.get_device() if xd.is_cuda else torch.device('cpu')
        binning = (0.5/self.nbins_in) + torch.arange(0., 1.,1./self.nbins_in, device=device).repeat(xd.numel())
        binning = binning.reshape(-1,self.num_identity_features,self.nbins_in)
        x = xd.unsqueeze(-1)
        res = torch.exp(((-self.nbins_in*self.nbins_in)/2.) * (binning-x)**2)
        return res.reshape(-1,self.num_identity_features*self.nbins_in)

    def forward(self, inputs, context=None):
        if inputs.dim() not in [2, 4]:
            raise ValueError('Inputs must be a 2D or a 4D tensor.')

        if inputs.shape[1] != self.features:
            raise ValueError('Expected features = {}, got {}.'.format(
                self.features, inputs.shape[1]))

        identity_split = inputs[:, self.identity_features, ...]
        transform_split = inputs[:, self.transform_features, ...]

        if self.blob:
            identity_split_blob = self.one_blob(identity_split)
            transform_params = self.transform_net(identity_split_blob, context)
        else:
            transform_params = self.transform_net(identity_split, context)

        transform_split, absdet = self._coupling_transform_forward(
            inputs=transform_split,
            transform_params=transform_params
        )

        outputs = torch.empty_like(inputs)
        outputs[:, self.identity_features, ...] = identity_split
        outputs[:, self.transform_features, ...] = transform_split

        return outputs, absdet

    def inverse(self, inputs, context=None):
        if inputs.dim() not in [2, 4]:
            raise ValueError('Inputs must be a 2D or a 4D tensor.')

        if inputs.shape[1] != self.features:
            raise ValueError('Expected features = {}, got {}.'.format(
                self.features, inputs.shape[1]))

        identity_split = inputs[:, self.identity_features, ...]
        transform_split = inputs[:, self.transform_features, ...]

        if self.blob:
            identity_split_blob = self.one_blob(identity_split)
            transform_params = self.transform_net(identity_split_blob, context)
        else:
            transform_params = self.transform_net(identity_split, context)

        transform_split, absdet = self._coupling_transform_inverse(
            inputs=transform_split,
            transform_params=transform_params
        )
        outputs = torch.empty_like(inputs)
        outputs[:, self.identity_features] = identity_split
        outputs[:, self.transform_features] = transform_split

        return outputs, absdet

    def _transform_dim_multiplier(self):
        """Number of features to output for each transform dimension."""
        raise NotImplementedError()

    def _coupling_transform_forward(self, inputs, transform_params):
        """Forward pass of the coupling transform."""
        raise NotImplementedError()

    def _coupling_transform_inverse(self, inputs, transform_params):
        """Inverse of the coupling transform."""
        raise NotImplementedError()

class AffineCouplingTransform(CouplingTransform):
    """An affine coupling layer that scales and shifts part of the variables.
    Reference:
    > L. Dinh et al., Density estimation using Real NVP, ICLR 2017.
    """
    def _transform_dim_multiplier(self):
        return 2

    def _scale_and_shift(self, transform_params):
        unconstrained_scale = transform_params[:, self.num_transform_features:, ...]
        shift = transform_params[:, :self.num_transform_features, ...]
        scale = torch.exp(nn.Tanh()(unconstrained_scale))
        return scale, shift

    def _coupling_transform_forward(self, inputs, transform_params):
        scale, shift = self._scale_and_shift(transform_params)
        outputs = inputs * scale + shift
        absdet = torch.prod(scale,axis=1)
        return outputs, absdet

    def _coupling_transform_inverse(self, inputs, transform_params):
        scale, shift = self._scale_and_shift(transform_params)
        outputs = (inputs - shift) / scale
        absdet = torch.prod(1/scale,axis=1)
        return outputs, absdet


class AdditiveCouplingTransform(AffineCouplingTransform):
    """An additive coupling layer, i.e. an affine coupling layer without scaling.
    Reference:
    > L. Dinh et al., NICE:  Non-linear  Independent  Components  Estimation,
    > arXiv:1410.8516, 2014.
    """
    def _transform_dim_multiplier(self):
        return 1

    def _scale_and_shift(self, transform_params):
        shift = transform_params
        scale = torch.ones_like(shift,requires_grad=True)
        return scale, shift


class PiecewiseCouplingTransform(CouplingTransform):
    def _coupling_transform_forward(self, inputs, transform_params):
        return self._coupling_transform(inputs, transform_params, inverse=False)

    def _coupling_transform_inverse(self, inputs, transform_params):
        return self._coupling_transform(inputs, transform_params, inverse=True)

    def _coupling_transform(self, inputs, transform_params, inverse=False):
        if inputs.dim() == 4:
            b, c, h, w = inputs.shape
            # For images, reshape transform_params from Bx(C*?)xHxW to BxCxHxWx?
            transform_params = transform_params.reshape(b, c, -1, h, w).permute(0, 1, 3, 4, 2)
        elif inputs.dim() == 2:
            b, d = inputs.shape
            # For 2D data, reshape transform_params from Bx(D*?) to BxDx?
            transform_params = transform_params.reshape(b, d, -1)

        outputs, absdet = self._piecewise_cdf(inputs, transform_params, inverse)
        return outputs, absdet.prod(1)

    def _piecewise_cdf(self, inputs, transform_params, inverse=False):
        raise NotImplementedError()


class PiecewiseLinearCouplingTransform(PiecewiseCouplingTransform):
    """
    Reference:
    > Müller et al., Neural Importance Sampling, arXiv:1808.03856, 2018.
    """
    def __init__(self,
                 mask,
                 transform_net_create_fn,
                 blob = None,
                 num_bins=10,
                 num_context_features=0):
        self.num_bins = num_bins

        super().__init__(mask, transform_net_create_fn,blob, num_context_features=num_context_features)

    def _transform_dim_multiplier(self):
        return self.num_bins

    def _piecewise_cdf(self, inputs, transform_params, inverse=False):
        unnormalized_pdf = transform_params

        return splines.linear_spline(
            inputs=inputs,
            unnormalized_pdf=unnormalized_pdf,
            inverse=inverse
        )

class PiecewiseQuadraticCouplingTransform(PiecewiseCouplingTransform):
    """
    Reference:
    > Müller et al., Neural Importance Sampling, arXiv:1808.03856, 2018.
    """
    def __init__(self,
                 mask,
                 transform_net_create_fn,
                 blob = None,
                 num_bins=10,
                 min_bin_width=splines.DEFAULT_MIN_BIN_WIDTH,
                 min_bin_height=splines.DEFAULT_MIN_BIN_HEIGHT,
                 num_context_features=0):
        self.num_bins = num_bins
        self.min_bin_width = min_bin_width
        self.min_bin_height = min_bin_height

        super().__init__(mask, transform_net_create_fn, blob, num_context_features=num_context_features)

    def _transform_dim_multiplier(self):
        return self.num_bins * 2 + 1

    def _piecewise_cdf(self, inputs, transform_params, inverse=False):
        unnormalized_widths = transform_params[..., :self.num_bins]
        unnormalized_heights = transform_params[..., self.num_bins:]

        if hasattr(self.transform_net, 'hidden_features'):
            unnormalized_widths /= np.sqrt(self.transform_net.hidden_features)
            unnormalized_heights /= np.sqrt(self.transform_net.hidden_features)

        spline_kwargs = {}
        return splines.quadratic_spline(
            inputs=inputs,
            unnormalized_widths=unnormalized_widths,
            unnormalized_heights=unnormalized_heights,
            inverse=inverse,
            min_bin_width=self.min_bin_width,
            min_bin_height=self.min_bin_height,
            **spline_kwargs
        )


class PiecewiseCubicCouplingTransform(PiecewiseCouplingTransform):
    def __init__(self,
                 mask,
                 transform_net_create_fn,
                 blob = None,
                 num_bins=10,
                 min_bin_width=splines.DEFAULT_MIN_BIN_WIDTH,
                 min_bin_height=splines.DEFAULT_MIN_BIN_HEIGHT,
                 num_context_features=0):

        self.num_bins = num_bins
        self.min_bin_width = min_bin_width
        self.min_bin_height = min_bin_height

        super().__init__(mask, transform_net_create_fn, blob, num_context_features=num_context_features)

    def _transform_dim_multiplier(self):
        return self.num_bins * 2 + 2

    def _piecewise_cdf(self, inputs, transform_params, inverse=False):
        unnormalized_widths = transform_params[..., :self.num_bins]
        unnormalized_heights = transform_params[..., self.num_bins:2*self.num_bins]
        unnorm_derivatives_left = transform_params[..., 2*self.num_bins][..., None]
        unnorm_derivatives_right = transform_params[..., 2*self.num_bins + 1][..., None]

        if hasattr(self.transform_net, 'hidden_features'):
            unnormalized_widths /= np.sqrt(self.transform_net.hidden_features)
            unnormalized_heights /= np.sqrt(self.transform_net.hidden_features)

        spline_kwargs = {}

        return splines.cubic_spline(
            inputs=inputs,
            unnormalized_widths=unnormalized_widths,
            unnormalized_heights=unnormalized_heights,
            unnorm_derivatives_left=unnorm_derivatives_left,
            unnorm_derivatives_right=unnorm_derivatives_right,
            inverse=inverse,
            min_bin_width=self.min_bin_width,
            min_bin_height=self.min_bin_height,
            **spline_kwargs
        )