pytorch/test/test_cuda_compatibility.py at main · gnosyslambda/pytorch · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# Owner(s): ["module: cuda"]

import warnings
from unittest.mock import patch

import torch
import torch.cuda
from torch.testing._internal.common_utils import run_tests, TestCase


class TestCodeCompatibleWithDevice(TestCase):
    def test_compatible_cases(self):
        self.assertTrue(
            torch.cuda._code_compatible_with_device(device_cc=80, code_cc=80)
        )
        self.assertTrue(
            torch.cuda._code_compatible_with_device(device_cc=86, code_cc=80)
        )

    def test_backward_incompatible(self):
        self.assertFalse(
            torch.cuda._code_compatible_with_device(device_cc=80, code_cc=86)
        )

    def test_cross_major_incompatible(self):
        self.assertFalse(
            torch.cuda._code_compatible_with_device(device_cc=90, code_cc=80)
        )
        self.assertFalse(
            torch.cuda._code_compatible_with_device(device_cc=75, code_cc=80)
        )

    def test_igpu_cases(self):
        self.assertFalse(
            torch.cuda._code_compatible_with_device(device_cc=53, code_cc=50)
        )
        self.assertFalse(
            torch.cuda._code_compatible_with_device(device_cc=87, code_cc=80)
        )
        self.assertTrue(
            torch.cuda._code_compatible_with_device(device_cc=53, code_cc=53)
        )

    def test_special_case_sm101_on_sm110(self):
        self.assertTrue(
            torch.cuda._code_compatible_with_device(device_cc=110, code_cc=101)
        )

    def test_unknown_code_cc(self):
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            result = torch.cuda._code_compatible_with_device(device_cc=990, code_cc=990)
            self.assertTrue(result)
            self.assertEqual(len(w), 1)
            self.assertIn("unknown compute capability", str(w[0].message))

        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            result = torch.cuda._code_compatible_with_device(device_cc=991, code_cc=990)
            self.assertTrue(result)
            self.assertEqual(len(w), 1)


@patch("torch.cuda.get_device_name", return_value="NVIDIA MOCK DEVICE")
@patch("torch.cuda.device_count", return_value=1)
@patch("torch.version.cuda", "12.6")
class TestCheckCapability(TestCase):
    def test_rocm_skips_check(self, *args):
        with (
            patch("torch.version.cuda", None),
            warnings.catch_warnings(),
        ):
            warnings.simplefilter("error")
            self.assertIsNone(torch.version.cuda)
            torch.cuda._check_capability()

    @patch("torch.cuda.get_arch_list", return_value=["sm_70", "sm_80", "sm_90"])
    @patch("torch.cuda.get_device_capability", return_value=(8, 0))
    def test_compatible_device_no_warning(self, *args):
        with warnings.catch_warnings():
            warnings.simplefilter("error")
            torch.cuda._check_capability()

    @patch("torch.cuda.get_arch_list", return_value=["sm_80"])
    @patch("torch.cuda.get_device_capability", return_value=(7, 5))
    def test_incompatible_device_warns(self, *args):
        with self.assertWarnsRegex(
            UserWarning, r"Found GPU0.*which is of compute capability.*7\.5"
        ):
            torch.cuda._check_capability()

    @patch("torch.cuda.get_arch_list", return_value=["sm_80"])
    @patch("torch.cuda.get_device_capability", return_value=(8, 7))
    def test_incompatible_device_warns_igpu(self, *args):
        with self.assertWarnsRegex(
            UserWarning, r"Found GPU0.*which is of compute capability.*8\.7"
        ):
            torch.cuda._check_capability()

    @patch("torch.cuda.get_arch_list", return_value=["sm_80", "sm_90"])
    def test_multiple_devices_mixed_compatibility(self, *args):
        caps = [(8, 0), (7, 5), (8, 6)]
        with (
            patch("torch.cuda.device_count", return_value=len(caps)),
            patch("torch.cuda.get_device_capability", side_effect=caps),
            warnings.catch_warnings(record=True) as w,
        ):
            warnings.simplefilter("always")
            torch.cuda._check_capability()
            self.assertEqual(len(w), 1)
            self.assertIn("GPU1", str(w[0].message))

    @patch("torch.cuda.get_arch_list", return_value=["sm_80", "sm_90"])
    @patch("torch.cuda.get_device_capability", return_value=(7, 5))
    def test_warning_message_contains_device_info(self, *args):
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            torch.cuda._check_capability()
            self.assertEqual(len(w), 1)
            msg = str(w[0].message)
            self.assertIn("GPU0", msg)
            self.assertIn("NVIDIA MOCK DEVICE", msg)
            self.assertIn("compute capability (CC) 7.5", msg)
            self.assertIn("8.0 which supports", msg)
            self.assertIn("9.0 which supports", msg)

    @patch("torch.cuda.get_arch_list", return_value=["sm_60"])
    @patch("torch.cuda.get_device_capability", return_value=(7, 0))
    @patch(
        "torch.cuda.PYTORCH_RELEASES_CODE_CC",
        {"12.6": {50, 60, 70}, "12.8": {70}, "13.0": {75}},
    )
    def test_warning_suggests_compatible_pytorch_release(self, *args):
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            torch.cuda._check_capability()
            self.assertEqual(len(w), 1)
            msg = str(w[0].message)
            self.assertIn("12.6", msg)
            self.assertIn("12.8", msg)
            self.assertNotIn("13.0", msg)

    @patch("torch.cuda.get_arch_list", return_value=["sm_80"])
    @patch("torch.cuda.get_device_capability", return_value=(5, 3))
    def test_warning_no_compatible_pytorch_release(self, *args):
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            torch.cuda._check_capability()
            self.assertEqual(len(w), 1)
            msg = str(w[0].message)
            self.assertNotIn(
                "install a PyTorch release that supports one of these CUDA versions",
                msg,
            )


if __name__ == "__main__":
    run_tests()