diff --git a/.gitignore b/.gitignore index ff02e9d9..950c9b99 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .idea/* charts/mlrun-ce/charts/* .DS_Store +**/.DS_Store +*.DS_Store diff --git a/charts/mlrun-ce/Chart.yaml b/charts/mlrun-ce/Chart.yaml index d79d5aa5..a4455808 100644 --- a/charts/mlrun-ce/Chart.yaml +++ b/charts/mlrun-ce/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v1 name: mlrun-ce -version: 0.10.1-rc3 +version: 0.10.1-rc4 description: MLRun Open Source Stack home: https://iguazio.com icon: https://www.iguazio.com/wp-content/uploads/2019/10/Iguazio-Logo.png diff --git a/charts/mlrun-ce/admin_installation_values.yaml b/charts/mlrun-ce/admin_installation_values.yaml index 7f148948..66d48a66 100644 --- a/charts/mlrun-ce/admin_installation_values.yaml +++ b/charts/mlrun-ce/admin_installation_values.yaml @@ -51,5 +51,9 @@ kube-prometheus-stack: tdengine: enabled: false +strimzi-kafka-operator: + enabled: true + watchAnyNamespace: true + kafka: enabled: false diff --git a/charts/mlrun-ce/requirements.lock b/charts/mlrun-ce/requirements.lock index c8a52980..929d4e4b 100644 --- a/charts/mlrun-ce/requirements.lock +++ b/charts/mlrun-ce/requirements.lock @@ -17,8 +17,8 @@ dependencies: - name: kube-prometheus-stack repository: https://prometheus-community.github.io/helm-charts version: 72.1.1 -- name: kafka - repository: https://charts.bitnami.com/bitnami - version: 31.3.1 -digest: sha256:d804805eb592a0f9a3be418429cee7c150170ed13f6a03b4c3152f3f8b6ae5ba -generated: "2025-12-02T13:37:34.112042+02:00" +- name: strimzi-kafka-operator + repository: https://strimzi.io/charts/ + version: 0.48.0 +digest: sha256:542955b13f2da791768eebe8e25092a8977fba987259ebe6ad85e4b977a7963b +generated: "2026-01-14T13:52:54.335415+02:00" diff --git a/charts/mlrun-ce/requirements.yaml b/charts/mlrun-ce/requirements.yaml index e16d672b..94669693 100644 --- a/charts/mlrun-ce/requirements.yaml +++ b/charts/mlrun-ce/requirements.yaml @@ -21,7 +21,7 @@ dependencies: repository: "https://prometheus-community.github.io/helm-charts" version: "72.1.1" condition: kube-prometheus-stack.enabled - - name: kafka - repository: "https://charts.bitnami.com/bitnami" - version: "31.3.1" - condition: kafka.enabled + - name: strimzi-kafka-operator + repository: "https://strimzi.io/charts/" + version: "0.48.0" + condition: strimzi-kafka-operator.enabled diff --git a/charts/mlrun-ce/templates/kafka/kafka-bootstrap-alias.yaml b/charts/mlrun-ce/templates/kafka/kafka-bootstrap-alias.yaml new file mode 100644 index 00000000..d6d9aacc --- /dev/null +++ b/charts/mlrun-ce/templates/kafka/kafka-bootstrap-alias.yaml @@ -0,0 +1,24 @@ +{{- if .Values.kafka.bootstrapAlias.enabled }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Values.kafka.bootstrapAlias.name }} + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: kafka + app.kubernetes.io/component: bootstrap-alias + {{- include "mlrun-ce.common.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - name: client + port: 9092 + targetPort: 9092 + protocol: TCP + selector: + strimzi.io/cluster: {{ .Values.kafka.name }} + strimzi.io/kind: Kafka + strimzi.io/name: {{ .Values.kafka.name }}-kafka +{{- end }} + diff --git a/charts/mlrun-ce/templates/kafka/kafka-cluster.yaml b/charts/mlrun-ce/templates/kafka/kafka-cluster.yaml new file mode 100644 index 00000000..4374b31e --- /dev/null +++ b/charts/mlrun-ce/templates/kafka/kafka-cluster.yaml @@ -0,0 +1,29 @@ +{{- if .Values.kafka.enabled }} +apiVersion: kafka.strimzi.io/v1beta2 +kind: Kafka +metadata: + name: {{ .Values.kafka.name }} + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: kafka + app.kubernetes.io/component: cluster + {{- include "mlrun-ce.common.labels" . | nindent 4 }} +spec: + kafka: + listeners: + {{- range .Values.kafka.listeners }} + - name: {{ .name }} + port: {{ .port }} + type: {{ .type }} + tls: {{ .tls }} + {{- end }} + config: + {{- toYaml .Values.kafka.config | nindent 6 }} + {{- if gt (.Values.kafka.zookeeper.replicas | int) 0 }} + zookeeper: + replicas: {{ .Values.kafka.zookeeper.replicas }} + storage: + type: persistent-claim + size: 8Gi + {{- end }} +{{- end }} diff --git a/charts/mlrun-ce/templates/kafka/kafka-network-policy.yaml b/charts/mlrun-ce/templates/kafka/kafka-network-policy.yaml new file mode 100644 index 00000000..daad44c7 --- /dev/null +++ b/charts/mlrun-ce/templates/kafka/kafka-network-policy.yaml @@ -0,0 +1,58 @@ +{{- if .Values.kafka.rbac.enabled -}} +{{- $operatorNamespace := .Values.kafka.rbac.operatorNamespace | default "controller" -}} +{{- $kafkaName := .Values.kafka.name | default "kafka-stream" -}} +{{- $currentNamespace := .Release.Namespace -}} +--- +# NetworkPolicy: Allow egress from this namespace to Kafka namespace +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-kafka-access + namespace: {{ $currentNamespace }} + labels: + app.kubernetes.io/name: mlrun-ce + app.kubernetes.io/component: kafka-rbac + app.kubernetes.io/managed-by: {{ .Release.Name }} +spec: + # Apply to all pods in this namespace + podSelector: {} + + policyTypes: + - Egress + + egress: + # Allow egress to Kafka namespace + - to: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: {{ $operatorNamespace }} + podSelector: + matchLabels: + strimzi.io/cluster: {{ $kafkaName }} + ports: + - protocol: TCP + port: 9092 # client listener + - protocol: TCP + port: 9093 # controller listener + - protocol: TCP + port: 9094 # internal listener + + # Allow DNS resolution (required for service discovery) + - to: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: kube-system + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 + - protocol: TCP + port: 53 + + # Allow egress within same namespace + - to: + - podSelector: {} +{{- end }} + diff --git a/charts/mlrun-ce/templates/kafka/kafka-nodepool.yaml b/charts/mlrun-ce/templates/kafka/kafka-nodepool.yaml new file mode 100644 index 00000000..98b754e6 --- /dev/null +++ b/charts/mlrun-ce/templates/kafka/kafka-nodepool.yaml @@ -0,0 +1,30 @@ +{{- if .Values.kafka.enabled }} +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaNodePool +metadata: + name: {{ .Values.kafka.name }}-pool + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: kafka + app.kubernetes.io/component: nodepool + strimzi.io/cluster: {{ .Values.kafka.name }} + {{- include "mlrun-ce.common.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.kafka.replicas }} + roles: + - controller + - broker + storage: + type: {{ .Values.kafka.storage.type }} + size: {{ .Values.kafka.storage.size }} + {{- if .Values.kafka.storage.class }} + class: {{ .Values.kafka.storage.class }} + {{- end }} + resources: + requests: + memory: {{ .Values.kafka.resources.requests.memory }} + cpu: {{ .Values.kafka.resources.requests.cpu }} + limits: + memory: {{ .Values.kafka.resources.limits.memory }} + cpu: {{ .Values.kafka.resources.limits.cpu }} +{{- end }} diff --git a/charts/mlrun-ce/templates/kafka/kafka-rbac.yaml b/charts/mlrun-ce/templates/kafka/kafka-rbac.yaml new file mode 100644 index 00000000..6dc9eb8a --- /dev/null +++ b/charts/mlrun-ce/templates/kafka/kafka-rbac.yaml @@ -0,0 +1,90 @@ +{{- if .Values.kafka.rbac.enabled -}} +{{- $operatorNamespace := .Values.kafka.rbac.operatorNamespace | default "controller" -}} +{{- $kafkaName := .Values.kafka.name | default "kafka-stream" -}} +{{- $currentNamespace := .Release.Namespace -}} +--- +# ServiceAccount for Kafka client applications +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kafka-client + namespace: {{ $currentNamespace }} + labels: + app.kubernetes.io/name: mlrun-ce + app.kubernetes.io/component: kafka-rbac + app.kubernetes.io/managed-by: {{ .Release.Name }} +--- +# Role: Allow managing Kafka resources via CRDs in the operator namespace +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ $currentNamespace }}-kafka-resource-manager + namespace: {{ $operatorNamespace }} + labels: + app.kubernetes.io/name: mlrun-ce + app.kubernetes.io/component: kafka-rbac + app.kubernetes.io/managed-by: {{ .Release.Name }} + user-namespace: {{ $currentNamespace }} +rules: + # Allow creating and managing KafkaTopic CRDs + - apiGroups: + - kafka.strimzi.io + resources: + - kafkatopics + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + # Allow checking KafkaTopic status + - apiGroups: + - kafka.strimzi.io + resources: + - kafkatopics/status + verbs: + - get + - list + - watch + # Allow reading KafkaUser CRDs (if using SCRAM auth) + - apiGroups: + - kafka.strimzi.io + resources: + - kafkausers + verbs: + - get + - list + - watch + # Allow reading the Kafka cluster info + - apiGroups: + - kafka.strimzi.io + resources: + - kafkas + verbs: + - get + - list + - watch +--- +# RoleBinding: Grant Kafka resource management permissions to ServiceAccount +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ $currentNamespace }}-kafka-resource-manager + namespace: {{ $operatorNamespace }} + labels: + app.kubernetes.io/name: mlrun-ce + app.kubernetes.io/component: kafka-rbac + app.kubernetes.io/managed-by: {{ .Release.Name }} + user-namespace: {{ $currentNamespace }} +subjects: + - kind: ServiceAccount + name: kafka-client + namespace: {{ $currentNamespace }} +roleRef: + kind: Role + name: {{ $currentNamespace }}-kafka-resource-manager + apiGroup: rbac.authorization.k8s.io +{{- end }} + diff --git a/charts/mlrun-ce/values.yaml b/charts/mlrun-ce/values.yaml index 8ecf30f4..75fc3ecd 100644 --- a/charts/mlrun-ce/values.yaml +++ b/charts/mlrun-ce/values.yaml @@ -397,10 +397,6 @@ pipelines: cacheImage: repository: gcr.io/google-containers/busybox tag: latest - minio: - repository: minio/minio - tag: "RELEASE.2025-10-15T17-29-55Z" - kube-prometheus-stack: fullnameOverride: monitoring @@ -450,6 +446,7 @@ kube-prometheus-stack: nodePort: 30020 kube-state-metrics: fullnameOverride: state-metrics + prometheus-node-exporter: fullnameOverride: node-exporter hostNetwork: false @@ -487,37 +484,73 @@ tdengine: CLUSTER: "0" TAOS_REPLICA: "1" +strimzi-kafka-operator: + enabled: false + watchAnyNamespace: true +# defaultImageRegistry: quay.io +# defaultImageRepository: strimzi +# defaultImageTag: 0.48.0 + kafka: - global: - security: - allowInsecureImages: true enabled: true - fullnameOverride: kafka-stream - image: - repository: 'bitnamilegacy/kafka' - extraConfigYaml: - default.replication.factor: "1" - offsets.topic.replication.factor: "1" - transaction.state.log.replication.factor: "1" - transaction.state.log.min.isr: "1" + name: kafka-stream + + + # Bootstrap service alias configuration + bootstrapAlias: + # Create a service alias for simpler Kafka bootstrap server name + # When enabled, creates: {name}.{namespace}.svc.cluster.local:9092 + # instead of the default: {name}-kafka-bootstrap.{namespace}.svc.cluster.local:9092 + enabled: true + # Name for the bootstrap service alias (only used if enabled is true) + name: kafka-stream + + replicas: 1 - controller: - replicaCount: 1 - resourcesPreset: "medium" listeners: - client: - name: CLIENT - containerPort: 9092 - protocol: PLAINTEXT - controller: - name: CONTROLLER - containerPort: 9093 - protocol: PLAINTEXT - interbroker: - name: INTERNAL - containerPort: 9094 - protocol: PLAINTEXT - advertisedListeners: >- - CLIENT://kafka-stream:9092 - CONTROLLER://kafka-stream-controller-headless:9093, - INTERNAL://kafka-stream-controller-headless:9094, + - name: client + port: 9092 + type: internal + tls: false + - name: controller + port: 9093 + type: internal + tls: false + - name: internal + port: 9094 + type: internal + tls: false + + storage: + type: persistent-claim + size: 8Gi + class: "" + + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "1000m" + + config: + # Replication settings for single-node setup + default.replication.factor: 1 + offsets.topic.replication.factor: 1 + transaction.state.log.replication.factor: 1 + transaction.state.log.min.isr: 1 + + zookeeper: + replicas: 0 + + # Kafka RBAC for user namespaces + # Enable this when installing in user namespaces (mlrun, mlrun1, etc.) + # When enabled, creates: ServiceAccount "kafka-client" + Role/RoleBinding + NetworkPolicy + rbac: + # Enable RBAC for this namespace to access Kafka + enabled: false + + # Operator namespace (where Kafka operator/cluster is running) + # Example: "mlrun-ce-cont" if that's where you installed the operator + operatorNamespace: controller