datainfrahq · AdheipSingh · Feb 18, 2025 · Feb 18, 2025
diff --git a/docs/druid_cr.md b/docs/druid_cr.md
@@ -68,3 +68,50 @@ spec:
       runtime.properties: |
       ...
 ```
+
+## Authentication Setup
+
+Authentication can be configured to secure communication with the cluster API using credentials stored in Kubernetes secrets.
+
+Currently this is used for compaction, rules, dynamic configs, and ingestion configurations.
+
+This not only applies to the `Druid` CR but also to the `DruidIngestion` CR.
+
+### Configuring Basic Authentication
+
+To use basic authentication, you need to create a Kubernetes secret containing the username and password. This secret is then referenced in the Druid CR.
+
+Steps to Configure Basic Authentication:
+
+1. **Create a Kubernetes Secret:** Store your username and password in a Kubernetes secret. Below is an example of how to define the secret in a YAML file:
+
+```yaml
+apiVersion: v1
+kind: Secret
+metadata:
+  name: mycluster-admin-operator
+  namespace: druid
+type: Opaque
+data:
+  OperatorUserName: <base64-encoded-username>
+  OperatorPassword: <base64-encoded-password>
+```
+
+Replace <base64-encoded-username> and <base64-encoded-password> with the base64-encoded values of your desired username and password.
+
+2. Define Authentication in the Druid CRD: Reference the secret in your Druid custom resource. Here is an example `Druid`:
+
+```yaml
+apiVersion: druid.apache.org/v1alpha1
+kind: Druid
+metadata:
+  name: agent
+spec:
+  auth:
+    secretRef:
+      name: mycluster-admin-operator
+      namespace: druid
+    type: basic-auth
+```
+
+This configuration specifies that the Druid cluster should use basic authentication with credentials retrieved from the mycluster-admin-operator secret.
diff --git a/docs/features.md b/docs/features.md
@@ -225,3 +225,216 @@ All the probes definitions are documented bellow:
 ```
 
 </details>
+
+## Dynamic Configurations
+
+The Druid operator now supports specifying dynamic configurations directly within the Druid manifest. This feature allows for fine-tuned control over Druid's behavior at runtime by adjusting configurations dynamically.
+
+
+### Overlord Dynamic Configurations
+
+Usage: Add overlord dynamic configurations under the middlemanagers section within the nodes element of the Druid manifest.
+
+<details>
+
+<summary>Overlord Dynamic Configurations</summary>
+
+```yaml
+spec:
+  nodes:
+    middlemanagers:
+      dynamicConfig:
+        type: default
+        selectStrategy:
+          type: fillCapacityWithCategorySpec
+          workerCategorySpec:
+            categoryMap: {}
+            strong: true
+        autoScaler: null
+```
+
+</details>
+
+### Coordinator Dynamic Configurations
+
+Adjust coordinator settings to optimize data balancing and segment management.
+
+Usage: Include coordinator dynamic configurations in the coordinator section within the nodes element of the Druid manifest.
+
+Ensure all parameters are supported for the operator to properly configure dynamic configurations.
+
+<details>
+
+<summary>Overlord Dynamic Configurations</summary>
+
+```yaml
+spec:
+  nodes:
+    coordinators:
+      dynamicConfig:
+        millisToWaitBeforeDeleting: 900000
+        mergeBytesLimit: 524288000
+        mergeSegmentsLimit: 100
+        maxSegmentsToMove: 5
+        replicantLifetime: 15
+        replicationThrottleLimit: 10
+        balancerComputeThreads: 1
+        killDataSourceWhitelist: []
+        killPendingSegmentsSkipList: []
+        maxSegmentsInNodeLoadingQueue: 100
+        decommissioningNodes: []
+        pauseCoordination: false
+        replicateAfterLoadTimeout: false
+        useRoundRobinSegmentAssignment: true
+```
+
+</details>
+
+## nativeSpec Ingestion Configuration
+
+The `nativeSpec` feature in the Druid Ingestion Operator provides a flexible and robust way to define ingestion specifications directly within Kubernetes manifests using YAML format. This enhancement allows users to leverage Kubernetes-native formats, facilitating easier integration with Kubernetes tooling and practices while offering a more readable and maintainable configuration structure.
+
+### Key Benefits
+
+* **Kubernetes-Native Integration:** By using YAML, the `nativeSpec` aligns with Kubernetes standards, enabling seamless integration with Kubernetes-native tools and processes, such as kubectl, Helm, and GitOps workflows.
+* **Improved Readability and Maintainability:** YAML's human-readable format makes it easier for operators and developers to understand and modify ingestion configurations without deep JSON knowledge or tools.
+* **Enhanced Configuration Management:** Leveraging YAML facilitates the use of environment-specific configurations and overrides, making it easier to manage configurations across different stages of deployment (e.g., development, staging, production).
+
+### Usage
+
+Specifying nativeSpec in Kubernetes Manifests
+
+To use `nativeSpec`, define your ingestion specifications in YAML format under the `nativeSpec` field in the Druid Ingestion Custom Resource Definition (CRD). This field supercedes the traditional JSON `spec` field, providing a more integrated approach to configuration management.
+
+<details>
+
+<summary>nativeSpec Example</summary>
+
+```yaml
+apiVersion: druid.apache.org/v1alpha1
+kind: DruidIngestion
+metadata:
+  labels:
+    app.kubernetes.io/name: druidingestion
+    app.kubernetes.io/instance: druidingestion-sample
+  name: kafka-1
+spec:
+  suspend: false
+  druidCluster: example-cluster
+  ingestion:
+    type: kafka
+    nativeSpec:
+      type: kafka
+      spec:
+        dataSchema:
+          dataSource: metrics-kafka-1
+          timestampSpec:
+            column: timestamp
+            format: auto
+          dimensionsSpec:
+            dimensions: []
+            dimensionExclusions:
+            - timestamp
+            - value
+          metricsSpec:
+          - name: count
+            type: count
+          - name: value_sum
+            fieldName: value
+            type: doubleSum
+          - name: value_min
+            fieldName: value
+            type: doubleMin
+          - name: value_max
+            fieldName: value
+            type: doubleMax
+          granularitySpec:
+            type: uniform
+            segmentGranularity: HOUR
+            queryGranularity: NONE
+        ioConfig:
+          topic: metrics
+          inputFormat:
+            type: json
+          consumerProperties:
+            bootstrap.servers: localhost:9092
+          taskCount: 1
+          replicas: 1
+          taskDuration: PT1H
+        tuningConfig:
+          type: kafka
+          maxRowsPerSegment: 5000000
+
+```
+
+</details>
+
+## Set Rules and Compaction in DruidIngestion
+
+### Rules
+
+Rules in Druid define automated behaviors such as data retention, load balancing, or replication. They can be configured in the Rules section of the `DruidIngestion` CRD.
+
+<details>
+
+<summary>Rules Example</summary>
+
+```yaml
+apiVersion: druid.apache.org/v1alpha1
+kind: DruidIngestion
+metadata:
+  name: example-druid-ingestion
+spec:
+  ingestion:
+    type: native-batch
+    rules:
+      - type: "loadForever"
+        tieredReplicants:
+          _default_tier: 2
+      - type: "dropByPeriod"
+        period: "P7D"
+```
+
+</details>
+
+### Compaction
+
+Compaction in Druid helps optimize data storage and query performance by merging smaller data segments into larger ones. The compaction configuration can be specified in the Compaction section of the DruidIngestion CRD.
+
+The Druid Operator ensures accurate application of compaction settings by:
+
+1. Retrieving Current Settings: It performs a GET request on the Druid API to fetch existing compaction settings.
+
+2. Comparing and Updating: If there is a discrepancy between current and desired settings specified in the Kubernetes CRD manifest, the operator updates Druid with the desired configuration.
+
+3. Ensuring Accuracy: This method ensures settings are correctly applied, addressing cases where Druid might return a 200 HTTP status code without saving the changes.
+
+<details>
+
+<summary>Compaction Example</summary>
+
+```yaml
+apiVersion: druid.apache.org/v1alpha1
+kind: DruidIngestion
+metadata:
+  name: example-druid-ingestion
+spec:
+  ingestion:
+    type: native-batch
+    compaction:
+      ioConfig:
+        type: "index_parallel"
+        inputSpec:
+          type: "dataSource"
+          dataSource: "my-data-source"
+      tuningConfig:
+        maxNumConcurrentSubTasks: 4
+      granularitySpec:
+        segmentGranularity: "day"
+        queryGranularity: "none"
+        rollup: false
+      taskPriority: "high"
+      taskContext: '{"priority": 75}'
+```
+
+</details>
diff --git a/e2e/configs/druid-cr.yaml b/e2e/configs/druid-cr.yaml
@@ -189,20 +189,15 @@ spec:
         mergeBytesLimit: 524288000
         mergeSegmentsLimit: 100
         maxSegmentsToMove: 5
-        percentOfSegmentsToConsiderPerMove: 100
-        useBatchedSegmentSampler: true
         replicantLifetime: 15
         replicationThrottleLimit: 10
         balancerComputeThreads: 1
-        emitBalancingStats: true
         killDataSourceWhitelist: []
         killPendingSegmentsSkipList: []
         maxSegmentsInNodeLoadingQueue: 100
         decommissioningNodes: []
-        decommissioningMaxPercentOfMaxSegmentsToMove: 70
         pauseCoordination: false
         replicateAfterLoadTimeout: false
-        maxNonPrimaryReplicantsToLoad: 2147483647
         useRoundRobinSegmentAssignment: true
 
     historicals:

diff --git a/e2e/e2e.sh b/e2e/e2e.sh
@@ -75,7 +75,7 @@ else
   echo "Supervisor task ID: $supervisorTaskId"
 fi
 
-# Running a test Kafka DruidIngestion resource and wait for the task to be submitted
+# Running a test Kafka DruidIngestion resource using nativeSpec and wait for the task to be submitted
 kubectl apply -f e2e/configs/kafka-ingestion-native.yaml -n ${NAMESPACE}
 sleep 30 # wait for the manager to submit the ingestion task
 

diff --git a/examples/kafka-ingestion-native.yaml b/examples/kafka-ingestion-native.yaml
@@ -0,0 +1,68 @@
+apiVersion: druid.apache.org/v1alpha1
+kind: DruidIngestion
+metadata:
+  labels:
+    app.kubernetes.io/name: druidingestion
+    app.kubernetes.io/instance: druidingestion-sample
+  name: kafka-2
+spec:
+  suspend: false
+  druidCluster: tiny-cluster
+  ingestion:
+    type: kafka
+    compaction:
+      tuningConfig:
+        type: "kafka"
+        partitionsSpec:
+          type: "dynamic"
+      skipOffsetFromLatest: "PT0S"
+      granularitySpec:
+        segmentGranularity: "DAY"
+    rules:
+    - type: dropByPeriod
+      period: P1M
+      includeFuture: true
+    - type: broadcastByPeriod
+      period: P1M
+      includeFuture: true
+    nativeSpec:
+      type: kafka
+      spec:
+        dataSchema:
+          dataSource: metrics-kafka-2
+          timestampSpec:
+            column: timestamp
+            format: auto
+          dimensionsSpec:
+            dimensions: []
+            dimensionExclusions:
+            - timestamp
+            - value
+          metricsSpec:
+          - name: count
+            type: count
+          - name: value_sum
+            fieldName: value
+            type: doubleSum
+          - name: value_min
+            fieldName: value
+            type: doubleMin
+          - name: value_max
+            fieldName: value
+            type: doubleMax
+          granularitySpec:
+            type: uniform
+            segmentGranularity: HOUR
+            queryGranularity: NONE
+        ioConfig:
+          topic: metrics
+          inputFormat:
+            type: json
+          consumerProperties:
+            bootstrap.servers: localhost:9092
+          taskCount: 1
+          replicas: 1
+          taskDuration: PT1H
+        tuningConfig:
+          type: kafka
+          maxRowsPerSegment: 5000000