tts_project/ConvTranspose1d.patch at main · HalcyonForest/tts_project · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
--- /opt/conda/lib/python3.6/site-packages/torch/nn/modules/conv.py
+++ /opt/conda/lib/python3.6/site-packages/torch/nn/modules/conv.py
@@ -1,18 +1,14 @@
-class ConvTranspose1d(_ConvTransposeNd):
-    __doc__ = r"""Applies a 1D transposed convolution operator over an input image
+class ConvTranspose1d(_ConvTransposeMixin, _ConvNd):
+    r"""Applies a 1D transposed convolution operator over an input image
     composed of several input planes.

     This module can be seen as the gradient of Conv1d with respect to its input.
     It is also known as a fractionally-strided convolution or
-    a deconvolution (although it is not an actual deconvolution operation as it does
-    not compute a true inverse of convolution). For more information, see the visualizations
-    `here`_ and the `Deconvolutional Networks`_ paper.
-
-    This module supports :ref:`TensorFloat32<tf32_on_ampere>`.
+    a deconvolution (although it is not an actual deconvolution operation).

     * :attr:`stride` controls the stride for the cross-correlation.

-    * :attr:`padding` controls the amount of implicit zero padding on both
+    * :attr:`padding` controls the amount of implicit zero-paddings on both
       sides for ``dilation * (kernel_size - 1) - padding`` number of points. See note
       below for details.

@@ -20,11 +16,29 @@
       of the output shape. See note below for details.

     * :attr:`dilation` controls the spacing between the kernel points; also known as the à trous algorithm.
-      It is harder to describe, but the link `here`_ has a nice visualization of what :attr:`dilation` does.
+      It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.

-    {groups_note}
+    * :attr:`groups` controls the connections between inputs and outputs.
+      :attr:`in_channels` and :attr:`out_channels` must both be divisible by
+      :attr:`groups`. For example,

-    Note:
+        * At groups=1, all inputs are convolved to all outputs.
+        * At groups=2, the operation becomes equivalent to having two conv
+          layers side by side, each seeing half the input channels,
+          and producing half the output channels, and both subsequently
+          concatenated.
+        * At groups= :attr:`in_channels`, each input channel is convolved with
+          its own set of filters (of size
+          :math:`\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor`).
+
+    .. note::
+
+         Depending of the size of your kernel, several (of the last)
+         columns of the input might be lost, because it is a valid `cross-correlation`_,
+         and not a full `cross-correlation`_.
+         It is up to the user to add proper padding.
+
+    .. note::
         The :attr:`padding` argument effectively adds ``dilation * (kernel_size - 1) - padding``
         amount of zero padding to both sizes of the input. This is set so that
         when a :class:`~torch.nn.Conv1d` and a :class:`~torch.nn.ConvTranspose1d`
@@ -36,14 +50,7 @@
         that :attr:`output_padding` is only used to find output shape, but does
         not actually add zero-padding to output.

-    Note:
-        In some circumstances when using the CUDA backend with CuDNN, this operator
-        may select a nondeterministic algorithm to increase performance. If this is
-        undesirable, you can try to make the operation deterministic (potentially at
-        a performance cost) by setting ``torch.backends.cudnn.deterministic =
-        True``.
-        Please see the notes on :doc:`/notes/randomness` for background.
-
+    .. include:: cudnn_deterministic.rst

     Args:
         in_channels (int): Number of channels in the input image
@@ -57,11 +64,10 @@
         groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
         bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
         dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
-    """.format(**reproducibility_notes, **convolution_notes) + r"""

     Shape:
-        - Input: :math:`(N, C_{in}, L_{in})` or :math:`(C_{in}, L_{in})`
-        - Output: :math:`(N, C_{out}, L_{out})` or :math:`(C_{out}, L_{out})`, where
+        - Input: :math:`(N, C_{in}, L_{in})`
+        - Output: :math:`(N, C_{out}, L_{out})` where

           .. math::
               L_{out} = (L_{in} - 1) \times \text{stride} - 2 \times \text{padding} + \text{dilation}
@@ -69,39 +75,18 @@

     Attributes:
         weight (Tensor): the learnable weights of the module of shape
-                         :math:`(\text{in\_channels}, \frac{\text{out\_channels}}{\text{groups}},`
-                         :math:`\text{kernel\_size})`.
-                         The values of these weights are sampled from
+                         :math:`(\text{in\_channels}, \frac{\text{out\_channels}}{\text{groups}},
+                               \text{kernel\_size})`. The values of these weights are sampled from
                          :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
-                         :math:`k = \frac{groups}{C_\text{out} * \text{kernel\_size}}`
+                         :math:`k = \frac{1}{C_\text{in} * \text{kernel\_size}}`
         bias (Tensor):   the learnable bias of the module of shape (out_channels).
                          If :attr:`bias` is ``True``, then the values of these weights are
                          sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
-                         :math:`k = \frac{groups}{C_\text{out} * \text{kernel\_size}}`
-
-    .. _`here`:
-        https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
-
-    .. _`Deconvolutional Networks`:
-        https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf
+                         :math:`k = \frac{1}{C_\text{in} * \text{kernel\_size}}`
     """

-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        kernel_size: _size_1_t,
-        stride: _size_1_t = 1,
-        padding: _size_1_t = 0,
-        output_padding: _size_1_t = 0,
-        groups: int = 1,
-        bias: bool = True,
-        dilation: _size_1_t = 1,
-        padding_mode: str = 'zeros',
-        device=None,
-        dtype=None
-    ) -> None:
-        factory_kwargs = {'device': device, 'dtype': dtype}
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, output_padding=0, groups=1, bias=True, dilation=1):
         kernel_size = _single(kernel_size)
         stride = _single(stride)
         padding = _single(padding)
@@ -109,17 +94,12 @@
         output_padding = _single(output_padding)
         super(ConvTranspose1d, self).__init__(
             in_channels, out_channels, kernel_size, stride, padding, dilation,
-            True, output_padding, groups, bias, padding_mode, **factory_kwargs)
+            True, output_padding, groups, bias)

-    def forward(self, input: Tensor, output_size: Optional[List[int]] = None) -> Tensor:
-        if self.padding_mode != 'zeros':
-            raise ValueError('Only `zeros` padding mode is supported for ConvTranspose1d')
-
-        assert isinstance(self.padding, tuple)
-        # One cannot replace List by Tuple or Sequence in "_output_padding" because
-        # TorchScript does not support `Sequence[T]` or `Tuple[T, ...]`.
-        output_padding = self._output_padding(
-            input, output_size, self.stride, self.padding, self.kernel_size, self.dilation)  # type: ignore[arg-type]
+    @weak_script_method
+    def forward(self, input, output_size=None):
+        # type: (Tensor, Optional[List[int]]) -> Tensor
+        output_padding = self._output_padding(input, output_size, self.stride, self.padding, self.kernel_size)
         return F.conv_transpose1d(
             input, self.weight, self.bias, self.stride, self.padding,
             output_padding, self.groups, self.dilation)