cuda runtime error (77) : an illegal memory access was encountered

Hi,

I was trying to replace Conv2d in a UNet with DeformConv2d and ran into some errors. I'm using `CUDA 10.0` and `pytorch 1.3.1`.
Below is the code for the UNet
```
class UNet(nn.Module):
    def __init__(self, in_channels=4, out_channels=4):
        super(UNet, self).__init__()
        
        self.lrelu =  nn.LeakyReLU(negative_slope=0.2, inplace=True)

        self.conv1_1 = nn.Conv2d(in_channels, 32, kernel_size=3, stride=1, padding=1)
        self.conv1_2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2)

        self.conv2_1 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv2_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.conv3_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv3_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2)

        self.conv4_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv4_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.pool4 = nn.MaxPool2d(kernel_size=2)

        self.conv5_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
        self.pool5 = nn.MaxPool2d(kernel_size=2)

        self.up6 = nn.ConvTranspose2d(512, 256, 2, stride=2)
        self.conv6_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1)
        self.conv6_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)

        self.up7 = nn.ConvTranspose2d(256, 128, 2, stride=2)
        self.conv7_1 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
        self.conv7_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)

        self.up8 = nn.ConvTranspose2d(128, 64, 2, stride=2)
        self.conv8_1 = nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1)
        self.conv8_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)

        self.up9 = nn.ConvTranspose2d(64, 32, 2, stride=2)
        self.conv9_1 = nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1)
        self.conv9_2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
        #self.conv9_2 = DeformConv2d(32, 32, kernel_size=3, stride=1, padding=1, groups=32)

        self.conv10 = nn.Conv2d(32, out_channels, kernel_size=1, stride=1, padding=0)

        # Initialize weight
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
                nn.init.kaiming_normal_(m.weight.data)
                nn.init.constant_(m.bias.data, 0)

    def forward(self, x):
        conv1 = self.lrelu(self.conv1_1(x))
        conv1 = self.lrelu(self.conv1_2(conv1))
        pool1 = self.pool1(conv1)

        conv2 = self.lrelu(self.conv2_1(pool1))
        conv2 = self.lrelu(self.conv2_2(conv2))
        pool2 = self.pool2(conv2)

        conv3 = self.lrelu(self.conv3_1(pool2))
        conv3 = self.lrelu(self.conv3_2(conv3))
        pool3 = self.pool3(conv3)

        conv4 = self.lrelu(self.conv4_1(pool3))
        conv4 = self.lrelu(self.conv4_2(conv4))
        pool4 = self.pool4(conv4)

        conv5 = self.lrelu(self.conv5_1(pool4))
        conv5 = self.lrelu(self.conv5_2(conv5))

        up6 = self.up6(conv5)
        up6 = torch.cat((up6, conv4), 1)
        conv6 = self.lrelu(self.conv6_1(up6))
        conv6 = self.lrelu(self.conv6_2(conv6))

        up7 = self.up7(conv6)
        up7 = torch.cat((up7, conv3), 1)
        conv7 = self.lrelu(self.conv7_1(up7))
        conv7 = self.lrelu(self.conv7_2(conv7))

        up8 = self.up8(conv7)
        up8 = torch.cat((up8, conv2), 1)
        conv8 = self.lrelu(self.conv8_1(up8))
        conv8 = self.lrelu(self.conv8_2(conv8))

        up9 = self.up9(conv8)
        up9 = torch.cat((up9, conv1), 1)
        conv9 = self.lrelu(self.conv9_1(up9))
        conv9 = self.lrelu(self.conv9_2(conv9))

        out = self.conv10(conv9)

        return out
```

This UNet model works fine. However, when I replace `nn.Conv2d` in `self.conv9_2` with `DeformConv2d` (commented in the above code snippet), some error occurs:
```
    105             self.optimizer.zero_grad()
    106 
--> 107             outputs = self.model(sources)
    108 
    109             loss = self.criterion(outputs, targets)

~/anaconda3/envs/deformable-kernels/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

<ipython-input-2-b3122d7e7f84> in forward(self, x)
     88         up9 = torch.cat((up9, conv1), 1)
     89         conv9 = self.lrelu(self.conv9_1(up9))
---> 90         conv9 = self.lrelu(self.conv9_2(conv9))
     91 
     92         out = self.conv10(conv9)

~/anaconda3/envs/deformable-kernels/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

~/anaconda3/envs/deformable-kernels/lib/python3.6/site-packages/torch/nn/modules/activation.py in forward(self, input)
    533 
    534     def forward(self, input):
--> 535         return F.leaky_relu(input, self.negative_slope, self.inplace)
    536 
    537     def extra_repr(self):

~/anaconda3/envs/deformable-kernels/lib/python3.6/site-packages/torch/nn/functional.py in leaky_relu(input, negative_slope, inplace)
   1059     """
   1060     if inplace:
-> 1061         result = torch._C._nn.leaky_relu_(input, negative_slope)
   1062     else:
   1063         result = torch._C._nn.leaky_relu(input, negative_slope)

RuntimeError: cuda runtime error (77) : an illegal memory access was encountered at /opt/conda/conda-bld/pytorch_1573049301898/work/aten/src/THCUNN/generic/LeakyReLU.cu:29
```
I also tried `DeformKernel2d` and `DeformKernelConv2d` and they both have the same issue.

Another thing worth mentioning is that the modified UNet is much slower than I expected. The running time for the original UNet is about 7 min per epoch, while the modified UNet needs almost twice the time to complete one epoch. Maybe there is something wrong when I install `apex` or this package.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

cuda runtime error (77) : an illegal memory access was encountered #5

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

cuda runtime error (77) : an illegal memory access was encountered #5

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions