From 7db3995b4a886a630cb2246c32fd867a06a66726 Mon Sep 17 00:00:00 2001 From: Gustavo Lima Date: Fri, 27 Feb 2026 02:06:49 +0100 Subject: [PATCH 1/3] Add Kubernetes schemas and client protocol for SUNK monitoring [1/3] Introduce KubernetesPodRow/Payload and KubernetesNodeConditionRow/Payload schemas following the existing Row+Payload(DerivedCluster) pattern, plus a KubernetesClient Protocol mirroring SlurmClient for pluggable K8s data sources. This is the foundation for Kubernetes-layer observability in SUNK (Slurm-on-K8s) clusters. Ref: #63 --- gcm/monitoring/kubernetes/__init__.py | 2 + gcm/monitoring/kubernetes/client.py | 30 +++++++ gcm/schemas/kubernetes/__init__.py | 2 + gcm/schemas/kubernetes/node.py | 27 ++++++ gcm/schemas/kubernetes/pod.py | 29 +++++++ gcm/tests/test_kubernetes_schemas.py | 118 ++++++++++++++++++++++++++ 6 files changed, 208 insertions(+) create mode 100644 gcm/monitoring/kubernetes/__init__.py create mode 100644 gcm/monitoring/kubernetes/client.py create mode 100644 gcm/schemas/kubernetes/__init__.py create mode 100644 gcm/schemas/kubernetes/node.py create mode 100644 gcm/schemas/kubernetes/pod.py create mode 100644 gcm/tests/test_kubernetes_schemas.py diff --git a/gcm/monitoring/kubernetes/__init__.py b/gcm/monitoring/kubernetes/__init__.py new file mode 100644 index 0000000..ae1b0cf --- /dev/null +++ b/gcm/monitoring/kubernetes/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. diff --git a/gcm/monitoring/kubernetes/client.py b/gcm/monitoring/kubernetes/client.py new file mode 100644 index 0000000..60e15ae --- /dev/null +++ b/gcm/monitoring/kubernetes/client.py @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +from __future__ import annotations + +from typing import Iterable, Protocol + +from gcm.schemas.kubernetes.node import KubernetesNodeConditionRow +from gcm.schemas.kubernetes.pod import KubernetesPodRow + + +class KubernetesClient(Protocol): + """A low-level Kubernetes client for pod and node monitoring.""" + + def list_pods( + self, namespace: str = "", label_selector: str = "" + ) -> Iterable[KubernetesPodRow]: + """Get pod information from the Kubernetes API. + + Args: + namespace: Kubernetes namespace to filter pods. Empty string means all namespaces. + label_selector: Kubernetes label selector to filter pods. + + If an error occurs during execution, RuntimeError should be raised. + """ + + def list_node_conditions(self) -> Iterable[KubernetesNodeConditionRow]: + """Get node condition information from the Kubernetes API. + + If an error occurs during execution, RuntimeError should be raised. + """ diff --git a/gcm/schemas/kubernetes/__init__.py b/gcm/schemas/kubernetes/__init__.py new file mode 100644 index 0000000..ae1b0cf --- /dev/null +++ b/gcm/schemas/kubernetes/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. diff --git a/gcm/schemas/kubernetes/node.py b/gcm/schemas/kubernetes/node.py new file mode 100644 index 0000000..aed5067 --- /dev/null +++ b/gcm/schemas/kubernetes/node.py @@ -0,0 +1,27 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +from dataclasses import dataclass + +from gcm.schemas.slurm.derived_cluster import DerivedCluster + + +@dataclass +class KubernetesNodeConditionRow: + """Kubernetes node condition schema. + + Fields correspond to node condition data from the Kubernetes API. + """ + + name: str | None = None + condition_type: str | None = None + status: str | None = None + reason: str | None = None + message: str | None = None + + +@dataclass(kw_only=True) +class KubernetesNodePayload(DerivedCluster): + ds: str + collection_unixtime: int + cluster: str + node_condition: KubernetesNodeConditionRow diff --git a/gcm/schemas/kubernetes/pod.py b/gcm/schemas/kubernetes/pod.py new file mode 100644 index 0000000..ada3898 --- /dev/null +++ b/gcm/schemas/kubernetes/pod.py @@ -0,0 +1,29 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +from dataclasses import dataclass + +from gcm.schemas.slurm.derived_cluster import DerivedCluster + + +@dataclass +class KubernetesPodRow: + """Kubernetes pod status schema. + + Fields correspond to pod metadata and container status from the Kubernetes API. + """ + + name: str | None = None + namespace: str | None = None + node_name: str | None = None + phase: str | None = None + restart_count: int | None = None + container_name: str | None = None + slurm_job_id: str | None = None + + +@dataclass(kw_only=True) +class KubernetesPodPayload(DerivedCluster): + ds: str + collection_unixtime: int + cluster: str + pod: KubernetesPodRow diff --git a/gcm/tests/test_kubernetes_schemas.py b/gcm/tests/test_kubernetes_schemas.py new file mode 100644 index 0000000..e26dba5 --- /dev/null +++ b/gcm/tests/test_kubernetes_schemas.py @@ -0,0 +1,118 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +import unittest + +from gcm.monitoring.kubernetes.client import KubernetesClient +from gcm.schemas.kubernetes.node import ( + KubernetesNodeConditionRow, + KubernetesNodePayload, +) +from gcm.schemas.kubernetes.pod import KubernetesPodPayload, KubernetesPodRow + + +class TestKubernetesPodRow(unittest.TestCase): + def test_default_values(self) -> None: + row = KubernetesPodRow() + self.assertIsNone(row.name) + self.assertIsNone(row.namespace) + self.assertIsNone(row.node_name) + self.assertIsNone(row.phase) + self.assertIsNone(row.restart_count) + self.assertIsNone(row.container_name) + self.assertIsNone(row.slurm_job_id) + + def test_with_values(self) -> None: + row = KubernetesPodRow( + name="my-pod", + namespace="default", + node_name="node-1", + phase="Running", + restart_count=3, + container_name="main", + slurm_job_id="12345", + ) + self.assertEqual(row.name, "my-pod") + self.assertEqual(row.namespace, "default") + self.assertEqual(row.node_name, "node-1") + self.assertEqual(row.phase, "Running") + self.assertEqual(row.restart_count, 3) + self.assertEqual(row.container_name, "main") + self.assertEqual(row.slurm_job_id, "12345") + + +class TestKubernetesPodPayload(unittest.TestCase): + def test_payload_with_pod(self) -> None: + row = KubernetesPodRow(name="my-pod", phase="Running") + payload = KubernetesPodPayload( + ds="2026-02-27", + collection_unixtime=1740700000, + cluster="test-cluster", + pod=row, + ) + self.assertEqual(payload.ds, "2026-02-27") + self.assertEqual(payload.collection_unixtime, 1740700000) + self.assertEqual(payload.cluster, "test-cluster") + self.assertEqual(payload.pod.name, "my-pod") + self.assertIsNone(payload.derived_cluster) + + def test_payload_with_derived_cluster(self) -> None: + row = KubernetesPodRow(name="my-pod") + payload = KubernetesPodPayload( + ds="2026-02-27", + collection_unixtime=1740700000, + cluster="test-cluster", + derived_cluster="derived-1", + pod=row, + ) + self.assertEqual(payload.derived_cluster, "derived-1") + + +class TestKubernetesNodeConditionRow(unittest.TestCase): + def test_default_values(self) -> None: + row = KubernetesNodeConditionRow() + self.assertIsNone(row.name) + self.assertIsNone(row.condition_type) + self.assertIsNone(row.status) + self.assertIsNone(row.reason) + self.assertIsNone(row.message) + + def test_with_values(self) -> None: + row = KubernetesNodeConditionRow( + name="node-1", + condition_type="Ready", + status="True", + reason="KubeletReady", + message="kubelet is posting ready status", + ) + self.assertEqual(row.name, "node-1") + self.assertEqual(row.condition_type, "Ready") + self.assertEqual(row.status, "True") + self.assertEqual(row.reason, "KubeletReady") + + +class TestKubernetesNodePayload(unittest.TestCase): + def test_payload_with_node_condition(self) -> None: + row = KubernetesNodeConditionRow( + name="node-1", condition_type="Ready", status="True" + ) + payload = KubernetesNodePayload( + ds="2026-02-27", + collection_unixtime=1740700000, + cluster="test-cluster", + node_condition=row, + ) + self.assertEqual(payload.ds, "2026-02-27") + self.assertEqual(payload.cluster, "test-cluster") + self.assertEqual(payload.node_condition.name, "node-1") + self.assertIsNone(payload.derived_cluster) + + +class TestKubernetesClientProtocol(unittest.TestCase): + def test_protocol_is_importable(self) -> None: + """Verify the protocol can be imported and used as a type.""" + self.assertTrue(hasattr(KubernetesClient, "list_pods")) + self.assertTrue(hasattr(KubernetesClient, "list_node_conditions")) + + +if __name__ == "__main__": + unittest.main() From f0c6e42bc08ec6d74e38112cad8efdad6d3cb965 Mon Sep 17 00:00:00 2001 From: Gustavo Lima Date: Fri, 27 Feb 2026 02:08:33 +0100 Subject: [PATCH 2/3] Add Kubernetes API client with optional dependency [2/3] Implement KubernetesApiClient using the official kubernetes Python library as an optional dependency. Supports both in-cluster and kubeconfig auth, extracts slurm.coreweave.com/job-id annotations for Slurm-K8s correlation, and emits one KubernetesPodRow per container. Includes KubernetesFakeClient for testing with injectable data. Ref: #63 --- gcm/monitoring/kubernetes/api_client.py | 99 ++++++++++ gcm/monitoring/kubernetes/fake_client.py | 29 +++ gcm/tests/test_kubernetes_client.py | 242 +++++++++++++++++++++++ pyproject.toml | 1 + 4 files changed, 371 insertions(+) create mode 100644 gcm/monitoring/kubernetes/api_client.py create mode 100644 gcm/monitoring/kubernetes/fake_client.py create mode 100644 gcm/tests/test_kubernetes_client.py diff --git a/gcm/monitoring/kubernetes/api_client.py b/gcm/monitoring/kubernetes/api_client.py new file mode 100644 index 0000000..641c6e0 --- /dev/null +++ b/gcm/monitoring/kubernetes/api_client.py @@ -0,0 +1,99 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +from __future__ import annotations + +import logging +from typing import Iterable + +from gcm.monitoring.kubernetes.client import KubernetesClient +from gcm.schemas.kubernetes.node import KubernetesNodeConditionRow +from gcm.schemas.kubernetes.pod import KubernetesPodRow + +logger = logging.getLogger(__name__) + +_SLURM_JOB_ANNOTATION = "slurm.coreweave.com/job-id" + + +class KubernetesApiClient(KubernetesClient): + """Kubernetes client that queries the Kubernetes API via the official Python client. + + Requires the ``kubernetes`` package: ``pip install kubernetes``. + """ + + def __init__(self, *, in_cluster: bool = True) -> None: + try: + import kubernetes # noqa: F401 + except ImportError: + raise RuntimeError( + "The 'kubernetes' package is required for KubernetesApiClient. " + "Install it with: pip install 'gpucm[kubernetes]'" + ) + + from kubernetes import client, config + + if in_cluster: + config.load_incluster_config() + else: + config.load_kube_config() + + self._core_api = client.CoreV1Api() + + def list_pods( + self, namespace: str = "", label_selector: str = "" + ) -> Iterable[KubernetesPodRow]: + try: + if namespace: + response = self._core_api.list_namespaced_pod( + namespace=namespace, + label_selector=label_selector, + ) + else: + response = self._core_api.list_pod_for_all_namespaces( + label_selector=label_selector, + ) + except Exception as e: + raise RuntimeError(f"Failed to list pods: {e}") from e + + for pod in response.items: + annotations = pod.metadata.annotations or {} + slurm_job_id = annotations.get(_SLURM_JOB_ANNOTATION) + + container_statuses = pod.status.container_statuses or [] + if container_statuses: + for cs in container_statuses: + yield KubernetesPodRow( + name=pod.metadata.name, + namespace=pod.metadata.namespace, + node_name=pod.spec.node_name, + phase=pod.status.phase, + restart_count=cs.restart_count, + container_name=cs.name, + slurm_job_id=slurm_job_id, + ) + else: + yield KubernetesPodRow( + name=pod.metadata.name, + namespace=pod.metadata.namespace, + node_name=pod.spec.node_name, + phase=pod.status.phase, + restart_count=0, + container_name=None, + slurm_job_id=slurm_job_id, + ) + + def list_node_conditions(self) -> Iterable[KubernetesNodeConditionRow]: + try: + response = self._core_api.list_node() + except Exception as e: + raise RuntimeError(f"Failed to list nodes: {e}") from e + + for node in response.items: + conditions = node.status.conditions or [] + for condition in conditions: + yield KubernetesNodeConditionRow( + name=node.metadata.name, + condition_type=condition.type, + status=condition.status, + reason=condition.reason, + message=condition.message, + ) diff --git a/gcm/monitoring/kubernetes/fake_client.py b/gcm/monitoring/kubernetes/fake_client.py new file mode 100644 index 0000000..7523dcf --- /dev/null +++ b/gcm/monitoring/kubernetes/fake_client.py @@ -0,0 +1,29 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Iterable, List + +from gcm.monitoring.kubernetes.client import KubernetesClient +from gcm.schemas.kubernetes.node import KubernetesNodeConditionRow +from gcm.schemas.kubernetes.pod import KubernetesPodRow + + +@dataclass +class KubernetesFakeClient(KubernetesClient): + """A fake Kubernetes client for testing with injectable pod and node data.""" + + pods: List[KubernetesPodRow] = field(default_factory=list) + node_conditions: List[KubernetesNodeConditionRow] = field(default_factory=list) + + def list_pods( + self, namespace: str = "", label_selector: str = "" + ) -> Iterable[KubernetesPodRow]: + for pod in self.pods: + if namespace and pod.namespace != namespace: + continue + yield pod + + def list_node_conditions(self) -> Iterable[KubernetesNodeConditionRow]: + yield from self.node_conditions diff --git a/gcm/tests/test_kubernetes_client.py b/gcm/tests/test_kubernetes_client.py new file mode 100644 index 0000000..0e62873 --- /dev/null +++ b/gcm/tests/test_kubernetes_client.py @@ -0,0 +1,242 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +import unittest +from unittest.mock import MagicMock, patch + +from gcm.monitoring.kubernetes.fake_client import KubernetesFakeClient +from gcm.schemas.kubernetes.node import KubernetesNodeConditionRow +from gcm.schemas.kubernetes.pod import KubernetesPodRow + + +class TestKubernetesFakeClient(unittest.TestCase): + def test_list_pods_empty(self) -> None: + client = KubernetesFakeClient() + self.assertEqual(list(client.list_pods()), []) + + def test_list_pods_returns_all(self) -> None: + pods = [ + KubernetesPodRow(name="pod-1", namespace="ns-1", phase="Running"), + KubernetesPodRow(name="pod-2", namespace="ns-2", phase="Pending"), + ] + client = KubernetesFakeClient(pods=pods) + result = list(client.list_pods()) + self.assertEqual(len(result), 2) + self.assertEqual(result[0].name, "pod-1") + self.assertEqual(result[1].name, "pod-2") + + def test_list_pods_filters_by_namespace(self) -> None: + pods = [ + KubernetesPodRow(name="pod-1", namespace="ns-1", phase="Running"), + KubernetesPodRow(name="pod-2", namespace="ns-2", phase="Pending"), + ] + client = KubernetesFakeClient(pods=pods) + result = list(client.list_pods(namespace="ns-1")) + self.assertEqual(len(result), 1) + self.assertEqual(result[0].name, "pod-1") + + def test_list_node_conditions_empty(self) -> None: + client = KubernetesFakeClient() + self.assertEqual(list(client.list_node_conditions()), []) + + def test_list_node_conditions_returns_all(self) -> None: + conditions = [ + KubernetesNodeConditionRow( + name="node-1", condition_type="Ready", status="True" + ), + KubernetesNodeConditionRow( + name="node-1", condition_type="MemoryPressure", status="False" + ), + ] + client = KubernetesFakeClient(node_conditions=conditions) + result = list(client.list_node_conditions()) + self.assertEqual(len(result), 2) + self.assertEqual(result[0].condition_type, "Ready") + self.assertEqual(result[1].condition_type, "MemoryPressure") + + def test_list_pods_with_slurm_job_id(self) -> None: + pods = [ + KubernetesPodRow( + name="slurm-pod", + namespace="default", + phase="Running", + slurm_job_id="12345", + ), + ] + client = KubernetesFakeClient(pods=pods) + result = list(client.list_pods()) + self.assertEqual(result[0].slurm_job_id, "12345") + + +class TestKubernetesApiClient(unittest.TestCase): + def test_import_error_raises_runtime_error(self) -> None: + """Verify that missing kubernetes package raises RuntimeError.""" + with patch.dict("sys.modules", {"kubernetes": None}): + # Reload to pick up the patched import + import importlib + + from gcm.monitoring.kubernetes import api_client + + importlib.reload(api_client) + with self.assertRaises(RuntimeError) as ctx: + api_client.KubernetesApiClient(in_cluster=False) + self.assertIn("kubernetes", str(ctx.exception)) + + @patch("gcm.monitoring.kubernetes.api_client.KubernetesApiClient.__init__") + def test_list_pods_with_containers(self, mock_init: MagicMock) -> None: + """Test list_pods extracts pod data correctly from K8s API response.""" + mock_init.return_value = None + + from gcm.monitoring.kubernetes.api_client import ( + _SLURM_JOB_ANNOTATION, + KubernetesApiClient, + ) + + client = KubernetesApiClient.__new__(KubernetesApiClient) + client._core_api = MagicMock() + + # Build mock pod with container status + mock_container_status = MagicMock() + mock_container_status.name = "main" + mock_container_status.restart_count = 5 + + mock_pod = MagicMock() + mock_pod.metadata.name = "test-pod" + mock_pod.metadata.namespace = "default" + mock_pod.metadata.annotations = {_SLURM_JOB_ANNOTATION: "99999"} + mock_pod.spec.node_name = "node-1" + mock_pod.status.phase = "Running" + mock_pod.status.container_statuses = [mock_container_status] + + mock_response = MagicMock() + mock_response.items = [mock_pod] + client._core_api.list_pod_for_all_namespaces.return_value = mock_response + + result = list(client.list_pods()) + self.assertEqual(len(result), 1) + self.assertEqual(result[0].name, "test-pod") + self.assertEqual(result[0].namespace, "default") + self.assertEqual(result[0].node_name, "node-1") + self.assertEqual(result[0].phase, "Running") + self.assertEqual(result[0].restart_count, 5) + self.assertEqual(result[0].container_name, "main") + self.assertEqual(result[0].slurm_job_id, "99999") + + @patch("gcm.monitoring.kubernetes.api_client.KubernetesApiClient.__init__") + def test_list_pods_without_containers(self, mock_init: MagicMock) -> None: + """Test list_pods handles pods with no container statuses.""" + mock_init.return_value = None + + from gcm.monitoring.kubernetes.api_client import KubernetesApiClient + + client = KubernetesApiClient.__new__(KubernetesApiClient) + client._core_api = MagicMock() + + mock_pod = MagicMock() + mock_pod.metadata.name = "pending-pod" + mock_pod.metadata.namespace = "kube-system" + mock_pod.metadata.annotations = {} + mock_pod.spec.node_name = None + mock_pod.status.phase = "Pending" + mock_pod.status.container_statuses = None + + mock_response = MagicMock() + mock_response.items = [mock_pod] + client._core_api.list_pod_for_all_namespaces.return_value = mock_response + + result = list(client.list_pods()) + self.assertEqual(len(result), 1) + self.assertEqual(result[0].name, "pending-pod") + self.assertEqual(result[0].restart_count, 0) + self.assertIsNone(result[0].container_name) + self.assertIsNone(result[0].slurm_job_id) + + @patch("gcm.monitoring.kubernetes.api_client.KubernetesApiClient.__init__") + def test_list_pods_with_namespace_filter(self, mock_init: MagicMock) -> None: + """Test list_pods calls list_namespaced_pod when namespace is specified.""" + mock_init.return_value = None + + from gcm.monitoring.kubernetes.api_client import KubernetesApiClient + + client = KubernetesApiClient.__new__(KubernetesApiClient) + client._core_api = MagicMock() + + mock_response = MagicMock() + mock_response.items = [] + client._core_api.list_namespaced_pod.return_value = mock_response + + list(client.list_pods(namespace="my-ns", label_selector="app=test")) + client._core_api.list_namespaced_pod.assert_called_once_with( + namespace="my-ns", + label_selector="app=test", + ) + + @patch("gcm.monitoring.kubernetes.api_client.KubernetesApiClient.__init__") + def test_list_node_conditions(self, mock_init: MagicMock) -> None: + """Test list_node_conditions extracts conditions correctly.""" + mock_init.return_value = None + + from gcm.monitoring.kubernetes.api_client import KubernetesApiClient + + client = KubernetesApiClient.__new__(KubernetesApiClient) + client._core_api = MagicMock() + + mock_condition = MagicMock() + mock_condition.type = "Ready" + mock_condition.status = "True" + mock_condition.reason = "KubeletReady" + mock_condition.message = "kubelet is posting ready status" + + mock_node = MagicMock() + mock_node.metadata.name = "node-1" + mock_node.status.conditions = [mock_condition] + + mock_response = MagicMock() + mock_response.items = [mock_node] + client._core_api.list_node.return_value = mock_response + + result = list(client.list_node_conditions()) + self.assertEqual(len(result), 1) + self.assertEqual(result[0].name, "node-1") + self.assertEqual(result[0].condition_type, "Ready") + self.assertEqual(result[0].status, "True") + self.assertEqual(result[0].reason, "KubeletReady") + + @patch("gcm.monitoring.kubernetes.api_client.KubernetesApiClient.__init__") + def test_list_pods_api_error_raises_runtime_error( + self, mock_init: MagicMock + ) -> None: + """Test that API errors are wrapped in RuntimeError.""" + mock_init.return_value = None + + from gcm.monitoring.kubernetes.api_client import KubernetesApiClient + + client = KubernetesApiClient.__new__(KubernetesApiClient) + client._core_api = MagicMock() + client._core_api.list_pod_for_all_namespaces.side_effect = Exception( + "connection refused" + ) + + with self.assertRaises(RuntimeError) as ctx: + list(client.list_pods()) + self.assertIn("Failed to list pods", str(ctx.exception)) + + @patch("gcm.monitoring.kubernetes.api_client.KubernetesApiClient.__init__") + def test_list_node_conditions_api_error_raises_runtime_error( + self, mock_init: MagicMock + ) -> None: + """Test that node API errors are wrapped in RuntimeError.""" + mock_init.return_value = None + + from gcm.monitoring.kubernetes.api_client import KubernetesApiClient + + client = KubernetesApiClient.__new__(KubernetesApiClient) + client._core_api = MagicMock() + client._core_api.list_node.side_effect = Exception("forbidden") + + with self.assertRaises(RuntimeError) as ctx: + list(client.list_node_conditions()) + self.assertIn("Failed to list nodes", str(ctx.exception)) + + +if __name__ == "__main__": + unittest.main() diff --git a/pyproject.toml b/pyproject.toml index b16eefe..67ae6fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ license = { text = "MIT" } Homepage = "https://github.com/facebookresearch/gcm" [project.optional-dependencies] +kubernetes = ["kubernetes>=28.1.0"] dev = [ "parameterized", "virtualenv>=20.28.1", From 660b0b03058774a2837644e2233e807a4ad3c087 Mon Sep 17 00:00:00 2001 From: Gustavo Lima Date: Fri, 27 Feb 2026 02:21:45 +0100 Subject: [PATCH 3/3] Fix mypy type errors for optional kubernetes dependency Add type: ignore[import-not-found] comments to kubernetes imports since the library lacks type stubs and is an optional dependency. --- gcm/monitoring/kubernetes/api_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcm/monitoring/kubernetes/api_client.py b/gcm/monitoring/kubernetes/api_client.py index 641c6e0..bf655ec 100644 --- a/gcm/monitoring/kubernetes/api_client.py +++ b/gcm/monitoring/kubernetes/api_client.py @@ -22,14 +22,14 @@ class KubernetesApiClient(KubernetesClient): def __init__(self, *, in_cluster: bool = True) -> None: try: - import kubernetes # noqa: F401 + import kubernetes # type: ignore[import-not-found] # noqa: F401 except ImportError: raise RuntimeError( "The 'kubernetes' package is required for KubernetesApiClient. " "Install it with: pip install 'gpucm[kubernetes]'" ) - from kubernetes import client, config + from kubernetes import client, config # type: ignore[import-not-found] if in_cluster: config.load_incluster_config()