Skip to content

cuda runtime error (77) : an illegal memory access was encountered #5

@zhichenggeng

Description

@zhichenggeng

Hi,

I was trying to replace Conv2d in a UNet with DeformConv2d and ran into some errors. I'm using CUDA 10.0 and pytorch 1.3.1.
Below is the code for the UNet

class UNet(nn.Module):
    def __init__(self, in_channels=4, out_channels=4):
        super(UNet, self).__init__()
        
        self.lrelu =  nn.LeakyReLU(negative_slope=0.2, inplace=True)

        self.conv1_1 = nn.Conv2d(in_channels, 32, kernel_size=3, stride=1, padding=1)
        self.conv1_2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2)

        self.conv2_1 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv2_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.conv3_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv3_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2)

        self.conv4_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv4_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.pool4 = nn.MaxPool2d(kernel_size=2)

        self.conv5_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
        self.pool5 = nn.MaxPool2d(kernel_size=2)

        self.up6 = nn.ConvTranspose2d(512, 256, 2, stride=2)
        self.conv6_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1)
        self.conv6_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)

        self.up7 = nn.ConvTranspose2d(256, 128, 2, stride=2)
        self.conv7_1 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
        self.conv7_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)

        self.up8 = nn.ConvTranspose2d(128, 64, 2, stride=2)
        self.conv8_1 = nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1)
        self.conv8_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)

        self.up9 = nn.ConvTranspose2d(64, 32, 2, stride=2)
        self.conv9_1 = nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1)
        self.conv9_2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
        #self.conv9_2 = DeformConv2d(32, 32, kernel_size=3, stride=1, padding=1, groups=32)

        self.conv10 = nn.Conv2d(32, out_channels, kernel_size=1, stride=1, padding=0)

        # Initialize weight
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
                nn.init.kaiming_normal_(m.weight.data)
                nn.init.constant_(m.bias.data, 0)

    def forward(self, x):
        conv1 = self.lrelu(self.conv1_1(x))
        conv1 = self.lrelu(self.conv1_2(conv1))
        pool1 = self.pool1(conv1)

        conv2 = self.lrelu(self.conv2_1(pool1))
        conv2 = self.lrelu(self.conv2_2(conv2))
        pool2 = self.pool2(conv2)

        conv3 = self.lrelu(self.conv3_1(pool2))
        conv3 = self.lrelu(self.conv3_2(conv3))
        pool3 = self.pool3(conv3)

        conv4 = self.lrelu(self.conv4_1(pool3))
        conv4 = self.lrelu(self.conv4_2(conv4))
        pool4 = self.pool4(conv4)

        conv5 = self.lrelu(self.conv5_1(pool4))
        conv5 = self.lrelu(self.conv5_2(conv5))

        up6 = self.up6(conv5)
        up6 = torch.cat((up6, conv4), 1)
        conv6 = self.lrelu(self.conv6_1(up6))
        conv6 = self.lrelu(self.conv6_2(conv6))

        up7 = self.up7(conv6)
        up7 = torch.cat((up7, conv3), 1)
        conv7 = self.lrelu(self.conv7_1(up7))
        conv7 = self.lrelu(self.conv7_2(conv7))

        up8 = self.up8(conv7)
        up8 = torch.cat((up8, conv2), 1)
        conv8 = self.lrelu(self.conv8_1(up8))
        conv8 = self.lrelu(self.conv8_2(conv8))

        up9 = self.up9(conv8)
        up9 = torch.cat((up9, conv1), 1)
        conv9 = self.lrelu(self.conv9_1(up9))
        conv9 = self.lrelu(self.conv9_2(conv9))

        out = self.conv10(conv9)

        return out

This UNet model works fine. However, when I replace nn.Conv2d in self.conv9_2 with DeformConv2d (commented in the above code snippet), some error occurs:

    105             self.optimizer.zero_grad()
    106 
--> 107             outputs = self.model(sources)
    108 
    109             loss = self.criterion(outputs, targets)

~/anaconda3/envs/deformable-kernels/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

<ipython-input-2-b3122d7e7f84> in forward(self, x)
     88         up9 = torch.cat((up9, conv1), 1)
     89         conv9 = self.lrelu(self.conv9_1(up9))
---> 90         conv9 = self.lrelu(self.conv9_2(conv9))
     91 
     92         out = self.conv10(conv9)

~/anaconda3/envs/deformable-kernels/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

~/anaconda3/envs/deformable-kernels/lib/python3.6/site-packages/torch/nn/modules/activation.py in forward(self, input)
    533 
    534     def forward(self, input):
--> 535         return F.leaky_relu(input, self.negative_slope, self.inplace)
    536 
    537     def extra_repr(self):

~/anaconda3/envs/deformable-kernels/lib/python3.6/site-packages/torch/nn/functional.py in leaky_relu(input, negative_slope, inplace)
   1059     """
   1060     if inplace:
-> 1061         result = torch._C._nn.leaky_relu_(input, negative_slope)
   1062     else:
   1063         result = torch._C._nn.leaky_relu(input, negative_slope)

RuntimeError: cuda runtime error (77) : an illegal memory access was encountered at /opt/conda/conda-bld/pytorch_1573049301898/work/aten/src/THCUNN/generic/LeakyReLU.cu:29

I also tried DeformKernel2d and DeformKernelConv2d and they both have the same issue.

Another thing worth mentioning is that the modified UNet is much slower than I expected. The running time for the original UNet is about 7 min per epoch, while the modified UNet needs almost twice the time to complete one epoch. Maybe there is something wrong when I install apex or this package.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions