diff --git a/cmd/main.go b/cmd/main.go index 6a152ce16..752173edd 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -107,11 +107,11 @@ func main() { // as certificates issued by a trusted Certificate Authority (CA). The primary risk is potentially allowing // unauthorized access to sensitive metrics data. Consider replacing with CertDir, CertName, and KeyName // to provide certificates, ensuring the server communicates using trusted and secure certificates. - TLSOpts: tlsOpts, + TLSOpts: tlsOpts, FilterProvider: filters.WithAuthenticationAndAuthorization, } - // TODO: enable https for /metrics endpoint by default + // TODO: enable https for /metrics endpoint by default // if secureMetrics { // // FilterProvider is used to protect the metrics endpoint with authn/authz. // // These configurations ensure that only authorized users and service accounts @@ -221,6 +221,13 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "Standalone") os.Exit(1) } + if err = (&intController.TelemetryReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Telemetry") + os.Exit(1) + } //+kubebuilder:scaffold:builder if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { diff --git a/config/manager/controller_manager_telemetry.yaml b/config/manager/controller_manager_telemetry.yaml new file mode 100644 index 000000000..0ed5a866b --- /dev/null +++ b/config/manager/controller_manager_telemetry.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: manager-telemetry +data: + status: | + { + "lastTransmission": "" + } \ No newline at end of file diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 47f07b0e6..608a55066 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -1,5 +1,6 @@ resources: - manager.yaml +- controller_manager_telemetry.yaml generatorOptions: disableNameSuffixHash: true @@ -16,5 +17,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller - newName: docker.io/splunk/splunk-operator - newTag: 3.0.0 + newName: 667741767953.dkr.ecr.us-west-2.amazonaws.com/mqiuecr/splunk/splunk-operator + newTag: "1" diff --git a/internal/controller/telemetry_controller.go b/internal/controller/telemetry_controller.go new file mode 100644 index 000000000..8f49faa00 --- /dev/null +++ b/internal/controller/telemetry_controller.go @@ -0,0 +1,110 @@ +/* +Copyright (c) 2018-2022 Splunk Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + enterprise "github.com/splunk/splunk-operator/pkg/splunk/enterprise" + ctrl "sigs.k8s.io/controller-runtime" + "time" + + "github.com/pkg/errors" + metrics "github.com/splunk/splunk-operator/pkg/splunk/client/metrics" + + corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" +) + +const ( + // TODO: Below two contants are defined at default/kustomizatio.yaml, need to get it programatically? + ConfigMapNamePrefix = "splunk-operator-" + ConfigMapLabelName = "splunk-operator" + + telemetryRetryDelay = time.Second * 60 +) + +// TelemetryReconciler periodically reads all keys under the "telemetry" configmap +// in the Splunk operator namespace and logs all key values. +type TelemetryReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +//+kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch + +func (r *TelemetryReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + metrics.ReconcileCounters.With(metrics.GetPrometheusLabels(req, "Telemetry")).Inc() + defer recordInstrumentionData(time.Now(), req, "controller", "Telemetry") + + reqLogger := log.FromContext(ctx) + reqLogger = reqLogger.WithValues("telemetry", req.NamespacedName) + + reqLogger.Info("Reconciling telemetry") + + // Fetch the ConfigMap + cm := &corev1.ConfigMap{} + err := r.Get(ctx, req.NamespacedName, cm) + if err != nil { + if k8serrors.IsNotFound(err) { + reqLogger.Info("telemetry configmap not found; requeueing", "period(seconds)", int(telemetryRetryDelay/time.Second)) + return ctrl.Result{Requeue: true, RequeueAfter: telemetryRetryDelay}, nil + } + return ctrl.Result{}, errors.Wrap(err, "could not load telemetry configmap") + } + + // Log all key/value pairs. No sorting per your request. + if len(cm.Data) == 0 { + reqLogger.Info("telemetry configmap has no data keys") + return ctrl.Result{Requeue: true, RequeueAfter: telemetryRetryDelay}, nil + } + + reqLogger.Info("start", "Telemetry configmap version", cm.GetResourceVersion()) + + result, err := enterprise.ApplyTelemetry(ctx, r.Client, cm) + if err != nil { + reqLogger.Error(err, "Failed") + return ctrl.Result{Requeue: true, RequeueAfter: telemetryRetryDelay}, nil + } + if result.Requeue && result.RequeueAfter != 0 { + reqLogger.Info("Requeued", "period(seconds)", int(result.RequeueAfter/time.Second)) + } + + return result, err +} + +// SetupWithManager sets up the controller with the Manager. +func (r *TelemetryReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&corev1.ConfigMap{}). + WithEventFilter(predicate.NewPredicateFuncs(func(obj client.Object) bool { + labels := obj.GetLabels() + if labels == nil { + return false + } + return obj.GetName() == enterprise.GetTelemetryConfigMapName(ConfigMapNamePrefix) && labels["name"] == ConfigMapLabelName + })). + WithOptions(controller.Options{ + MaxConcurrentReconciles: 1, + }). + Complete(r) +} diff --git a/internal/controller/telemetry_controller_test.go b/internal/controller/telemetry_controller_test.go new file mode 100644 index 000000000..c73ac5a23 --- /dev/null +++ b/internal/controller/telemetry_controller_test.go @@ -0,0 +1,64 @@ +package controller + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +var _ = Describe("Telemetry Controller", func() { + var ( + ctx context.Context + cmName = "splunk-operator-telemetry" + ns = "test-telemetry-ns" + labels = map[string]string{"name": "splunk-operator"} + ) + + BeforeEach(func() { + ctx = context.TODO() + }) + + It("Reconcile returns requeue when ConfigMap not found", func() { + builder := fake.NewClientBuilder().WithScheme(scheme.Scheme) + c := builder.Build() + r := &TelemetryReconciler{Client: c, Scheme: scheme.Scheme} + req := reconcile.Request{NamespacedName: types.NamespacedName{Name: cmName, Namespace: ns}} + result, err := r.Reconcile(ctx, req) + Expect(err).To(BeNil()) + Expect(result.Requeue).To(BeTrue()) + Expect(result.RequeueAfter).To(Equal(time.Second * 60)) + }) + + It("Reconcile returns requeue when ConfigMap has no data", func() { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: cmName, Namespace: ns, Labels: labels}, + Data: map[string]string{}, + } + builder := fake.NewClientBuilder().WithScheme(scheme.Scheme).WithObjects(cm) + c := builder.Build() + r := &TelemetryReconciler{Client: c, Scheme: scheme.Scheme} + req := reconcile.Request{NamespacedName: types.NamespacedName{Name: cmName, Namespace: ns}} + result, err := r.Reconcile(ctx, req) + Expect(err).To(BeNil()) + Expect(result.Requeue).To(BeTrue()) + Expect(result.RequeueAfter).To(Equal(time.Second * 60)) + }) + + // Additional tests for error and success cases can be added here +}) + +/* +func TestTelemetryController(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Telemetry Controller Suite") +} + +*/ diff --git a/pkg/splunk/client/enterprise.go b/pkg/splunk/client/enterprise.go index 8bc36b08a..8a6247320 100644 --- a/pkg/splunk/client/enterprise.go +++ b/pkg/splunk/client/enterprise.go @@ -16,6 +16,7 @@ package client import ( + "bytes" "crypto/tls" "encoding/json" "fmt" @@ -954,6 +955,49 @@ func (c *SplunkClient) SetIdxcSecret(idxcSecret string) error { return c.Do(request, expectedStatus, nil) } +type LicenseInfo struct { + ID string `json:"guid"` + Type string `json:"type"` +} + +func (c *SplunkClient) GetLicenseInfo() (*LicenseInfo, error) { + apiResponse := struct { + Entry []struct { + Content LicenseInfo `json:"content"` + } `json:"entry"` + }{} + path := "/services/licenser/licenses" + err := c.Get(path, &apiResponse) + if err != nil { + return nil, err + } + if len(apiResponse.Entry) < 1 { + return nil, fmt.Errorf("invalid response from %s%s", c.ManagementURI, path) + } + return &apiResponse.Entry[0].Content, nil +} + +type TelemetryResponse struct { + Message string `json:"message"` + MetricValueID string `json:"metricValueId"` +} + +func (c *SplunkClient) SendTelemetry(path string, body []byte) (*TelemetryResponse, error) { + endpoint := fmt.Sprintf("%s%s", c.ManagementURI, path) + request, err := http.NewRequest("POST", endpoint, bytes.NewReader(body)) + if err != nil { + return nil, err + } + request.Header.Set("Content-Type", "application/json") + expectedStatus := []int{201} + var response TelemetryResponse + err = c.Do(request, expectedStatus, &response) + if err != nil { + return nil, err + } + return &response, nil +} + // RestartSplunk restarts specific Splunk instance // Can be used for any Splunk Instance // See https://docs.splunk.com/Documentation/Splunk/latest/RESTREF/RESTsystem#server.2Fcontrol.2Frestart diff --git a/pkg/splunk/enterprise/afwscheduler.go b/pkg/splunk/enterprise/afwscheduler.go index cc99eca0d..f01190574 100644 --- a/pkg/splunk/enterprise/afwscheduler.go +++ b/pkg/splunk/enterprise/afwscheduler.go @@ -138,26 +138,6 @@ func runCustomCommandOnSplunkPods(ctx context.Context, cr splcommon.MetaObject, return err } -// Get extension for name of telemetry app -func getTelAppNameExtension(crKind string) (string, error) { - switch crKind { - case "Standalone": - return "stdaln", nil - case "LicenseMaster": - return "lmaster", nil - case "LicenseManager": - return "lmanager", nil - case "SearchHeadCluster": - return "shc", nil - case "ClusterMaster": - return "cmaster", nil - case "ClusterManager": - return "cmanager", nil - default: - return "", errors.New("Invalid CR kind for telemetry app") - } -} - // addTelApp adds a telemetry app var addTelApp = func(ctx context.Context, podExecClient splutil.PodExecClientImpl, replicas int32, cr splcommon.MetaObject) error { var err error @@ -170,26 +150,20 @@ var addTelApp = func(ctx context.Context, podExecClient splutil.PodExecClientImp // Create pod exec client crKind := cr.GetObjectKind().GroupVersionKind().Kind - // Get Tel App Name Extension - appNameExt, err := getTelAppNameExtension(crKind) - if err != nil { - return err - } - // Commands to run on pods var command1, command2 string // Handle non SHC scenarios(Standalone, CM, LM) if crKind != "SearchHeadCluster" { // Create dir on pods - command1 = fmt.Sprintf(createTelAppNonShcString, appNameExt, appNameExt, telAppConfString, appNameExt, telAppDefMetaConfString, appNameExt) + command1 = fmt.Sprintf(createTelAppNonShcString, telAppConfString, telAppDefMetaConfString) // App reload command2 = telAppReloadString } else { // Create dir on pods - command1 = fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, appNameExt, shcAppsLocationOnDeployer, appNameExt, telAppConfString, shcAppsLocationOnDeployer, appNameExt, telAppDefMetaConfString, shcAppsLocationOnDeployer, appNameExt) + command1 = fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, shcAppsLocationOnDeployer, telAppConfString, shcAppsLocationOnDeployer, telAppDefMetaConfString, shcAppsLocationOnDeployer) // Bundle push command2 = fmt.Sprintf(applySHCBundleCmdStr, GetSplunkStatefulsetURL(cr.GetNamespace(), SplunkSearchHead, cr.GetName(), 0, false), "/tmp/status.txt") diff --git a/pkg/splunk/enterprise/afwscheduler_test.go b/pkg/splunk/enterprise/afwscheduler_test.go index 87c5f2ba8..ed7f6fe3c 100644 --- a/pkg/splunk/enterprise/afwscheduler_test.go +++ b/pkg/splunk/enterprise/afwscheduler_test.go @@ -4203,31 +4203,6 @@ func TestAdjustClusterAppsFilePermissions(t *testing.T) { mockPodExecReturnContexts[0].StdErr = "" } -func TestGetTelAppNameExtension(t *testing.T) { - crKinds := map[string]string{ - "Standalone": "stdaln", - "LicenseMaster": "lmaster", - "LicenseManager": "lmanager", - "SearchHeadCluster": "shc", - "ClusterMaster": "cmaster", - "ClusterManager": "cmanager", - } - - // Test all CR kinds - for k, v := range crKinds { - val, _ := getTelAppNameExtension(k) - if v != val { - t.Errorf("Invalid extension crkind %v, extension %v", k, v) - } - } - - // Test error code - _, err := getTelAppNameExtension("incorrect value") - if err == nil { - t.Errorf("Expected error") - } -} - func TestAddTelAppCMaster(t *testing.T) { ctx := context.TODO() @@ -4246,7 +4221,7 @@ func TestAddTelAppCMaster(t *testing.T) { // Define mock podexec context podExecCommands := []string{ - fmt.Sprintf(createTelAppNonShcString, "cmaster", "cmaster", telAppConfString, "cmaster", telAppDefMetaConfString, "cmaster"), + fmt.Sprintf(createTelAppNonShcString, telAppConfString, telAppDefMetaConfString), telAppReloadString, } @@ -4270,7 +4245,7 @@ func TestAddTelAppCMaster(t *testing.T) { // Test shc podExecCommands = []string{ - fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, "shc", shcAppsLocationOnDeployer, "shc", telAppConfString, shcAppsLocationOnDeployer, "shc", telAppDefMetaConfString, shcAppsLocationOnDeployer, "shc"), + fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, shcAppsLocationOnDeployer, telAppConfString, shcAppsLocationOnDeployer, telAppDefMetaConfString, shcAppsLocationOnDeployer), fmt.Sprintf(applySHCBundleCmdStr, GetSplunkStatefulsetURL(shcCr.GetNamespace(), SplunkSearchHead, shcCr.GetName(), 0, false), "/tmp/status.txt"), } @@ -4286,7 +4261,7 @@ func TestAddTelAppCMaster(t *testing.T) { // Test non-shc error 1 podExecCommandsError := []string{ - fmt.Sprintf(createTelAppNonShcString, "cmerror", "cmerror", telAppConfString, "cmerror", telAppDefMetaConfString, "cmerror"), + fmt.Sprintf(createTelAppNonShcString, telAppConfString, telAppDefMetaConfString), } mockPodExecReturnContextsError := []*spltest.MockPodExecReturnContext{ @@ -4305,7 +4280,7 @@ func TestAddTelAppCMaster(t *testing.T) { // Test non-shc error 2 podExecCommandsError = []string{ - fmt.Sprintf(createTelAppNonShcString, "cm", "cm", telAppConfString, "cm", telAppDefMetaConfString, "cm"), + fmt.Sprintf(createTelAppNonShcString, telAppConfString, telAppDefMetaConfString), } var mockPodExecClientError2 *spltest.MockPodExecClient = &spltest.MockPodExecClient{Cr: cmCr} mockPodExecClientError2.AddMockPodExecReturnContexts(ctx, podExecCommandsError, mockPodExecReturnContextsError...) @@ -4317,7 +4292,7 @@ func TestAddTelAppCMaster(t *testing.T) { // Test shc error 1 podExecCommandsError = []string{ - fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, "shcerror", shcAppsLocationOnDeployer, "shcerror", telAppConfString, shcAppsLocationOnDeployer, "shcerror", telAppDefMetaConfString, shcAppsLocationOnDeployer, "shcerror"), + fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, shcAppsLocationOnDeployer, telAppConfString, shcAppsLocationOnDeployer, telAppDefMetaConfString, shcAppsLocationOnDeployer), } var mockPodExecClientError3 *spltest.MockPodExecClient = &spltest.MockPodExecClient{Cr: shcCr} @@ -4330,7 +4305,7 @@ func TestAddTelAppCMaster(t *testing.T) { // Test shc error 2 podExecCommandsError = []string{ - fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, "shc", shcAppsLocationOnDeployer, "shc", telAppConfString, shcAppsLocationOnDeployer, "shc", telAppDefMetaConfString, shcAppsLocationOnDeployer, "shc"), + fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, shcAppsLocationOnDeployer, telAppConfString, shcAppsLocationOnDeployer, telAppDefMetaConfString, shcAppsLocationOnDeployer), } var mockPodExecClientError4 *spltest.MockPodExecClient = &spltest.MockPodExecClient{Cr: shcCr} mockPodExecClientError4.AddMockPodExecReturnContexts(ctx, podExecCommandsError, mockPodExecReturnContextsError...) @@ -4359,7 +4334,7 @@ func TestAddTelAppCManager(t *testing.T) { // Define mock podexec context podExecCommands := []string{ - fmt.Sprintf(createTelAppNonShcString, "cmanager", "cmanager", telAppConfString, "cmanager", telAppDefMetaConfString, "cmanager"), + fmt.Sprintf(createTelAppNonShcString, telAppConfString, telAppDefMetaConfString), telAppReloadString, } @@ -4383,7 +4358,7 @@ func TestAddTelAppCManager(t *testing.T) { // Test shc podExecCommands = []string{ - fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, "shc", shcAppsLocationOnDeployer, "shc", telAppConfString, shcAppsLocationOnDeployer, "shc", telAppDefMetaConfString, shcAppsLocationOnDeployer, "shc"), + fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, shcAppsLocationOnDeployer, telAppConfString, shcAppsLocationOnDeployer, telAppDefMetaConfString, shcAppsLocationOnDeployer), fmt.Sprintf(applySHCBundleCmdStr, GetSplunkStatefulsetURL(shcCr.GetNamespace(), SplunkSearchHead, shcCr.GetName(), 0, false), "/tmp/status.txt"), } @@ -4399,7 +4374,7 @@ func TestAddTelAppCManager(t *testing.T) { // Test non-shc error 1 podExecCommandsError := []string{ - fmt.Sprintf(createTelAppNonShcString, "cmerror", "cmerror", telAppConfString, "cmerror", telAppDefMetaConfString, "cmerror"), + fmt.Sprintf(createTelAppNonShcString, telAppConfString, telAppDefMetaConfString), } mockPodExecReturnContextsError := []*spltest.MockPodExecReturnContext{ @@ -4418,7 +4393,7 @@ func TestAddTelAppCManager(t *testing.T) { // Test non-shc error 2 podExecCommandsError = []string{ - fmt.Sprintf(createTelAppNonShcString, "cm", "cm", telAppConfString, "cm", telAppDefMetaConfString, "cm"), + fmt.Sprintf(createTelAppNonShcString, telAppConfString, telAppDefMetaConfString), } var mockPodExecClientError2 *spltest.MockPodExecClient = &spltest.MockPodExecClient{Cr: cmCr} mockPodExecClientError2.AddMockPodExecReturnContexts(ctx, podExecCommandsError, mockPodExecReturnContextsError...) @@ -4430,7 +4405,7 @@ func TestAddTelAppCManager(t *testing.T) { // Test shc error 1 podExecCommandsError = []string{ - fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, "shcerror", shcAppsLocationOnDeployer, "shcerror", telAppConfString, shcAppsLocationOnDeployer, "shcerror", telAppDefMetaConfString, shcAppsLocationOnDeployer, "shcerror"), + fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, shcAppsLocationOnDeployer, telAppConfString, shcAppsLocationOnDeployer, telAppDefMetaConfString, shcAppsLocationOnDeployer), } var mockPodExecClientError3 *spltest.MockPodExecClient = &spltest.MockPodExecClient{Cr: shcCr} @@ -4443,7 +4418,7 @@ func TestAddTelAppCManager(t *testing.T) { // Test shc error 2 podExecCommandsError = []string{ - fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, "shc", shcAppsLocationOnDeployer, "shc", telAppConfString, shcAppsLocationOnDeployer, "shc", telAppDefMetaConfString, shcAppsLocationOnDeployer, "shc"), + fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, shcAppsLocationOnDeployer, telAppConfString, shcAppsLocationOnDeployer, telAppDefMetaConfString, shcAppsLocationOnDeployer), } var mockPodExecClientError4 *spltest.MockPodExecClient = &spltest.MockPodExecClient{Cr: shcCr} mockPodExecClientError4.AddMockPodExecReturnContexts(ctx, podExecCommandsError, mockPodExecReturnContextsError...) diff --git a/pkg/splunk/enterprise/names.go b/pkg/splunk/enterprise/names.go index 3d0439db7..f788edfe1 100644 --- a/pkg/splunk/enterprise/names.go +++ b/pkg/splunk/enterprise/names.go @@ -201,13 +201,23 @@ access = read : [ * ], write : [ admin ] ` // Command to create telemetry app on non SHC scenarios - createTelAppNonShcString = "mkdir -p /opt/splunk/etc/apps/app_tel_for_sok8s_%s/default/; mkdir -p /opt/splunk/etc/apps/app_tel_for_sok8s_%s/metadata/; echo -e \"%s\" > /opt/splunk/etc/apps/app_tel_for_sok8s_%s/default/app.conf; echo -e \"%s\" > /opt/splunk/etc/apps/app_tel_for_sok8s_%s/metadata/default.meta" + createTelAppNonShcString = "mkdir -p /opt/splunk/etc/apps/app_tel_for_sok8s/default/; mkdir -p /opt/splunk/etc/apps/app_tel_for_sok8s/metadata/; echo -e \"%s\" > /opt/splunk/etc/apps/app_tel_for_sok8s/default/app.conf; echo -e \"%s\" > /opt/splunk/etc/apps/app_tel_for_sok8s/metadata/default.meta" // Command to create telemetry app on SHC scenarios - createTelAppShcString = "mkdir -p %s/app_tel_for_sok8s_%s/default/; mkdir -p %s/app_tel_for_sok8s_%s/metadata/; echo -e \"%s\" > %s/app_tel_for_sok8s_%s/default/app.conf; echo -e \"%s\" > %s/app_tel_for_sok8s_%s/metadata/default.meta" + createTelAppShcString = "mkdir -p %s/app_tel_for_sok8s/default/; mkdir -p %s/app_tel_for_sok8s/metadata/; echo -e \"%s\" > %s/app_tel_for_sok8s/default/app.conf; echo -e \"%s\" > %s/app_tel_for_sok8s/metadata/default.meta" // Command to reload app configuration telAppReloadString = "curl -k -u admin:`cat /mnt/splunk-secrets/password` https://localhost:8089/services/apps/local/_reload" + + // Name of the telemetry configmap: -manager-telemetry + telConfigMapTemplateStr = "%smanager-telemetry" + + // Name of the telemetry app: app_tel_for_sok8s + telAppNameStr = "app_tel_for_sok8s" + telSOKVersionKey = "version" + telLicenseInfoKey = "license_info" + + managerConfigMapTemplateStr = "%smanager-config" ) const ( @@ -363,3 +373,13 @@ func GetLivenessDriverFileDir() string { func GetStartupScriptName() string { return startupScriptName } + +// GetTelemetryConfigMapName returns the name of telemetry configmap +func GetTelemetryConfigMapName(namePrefix string) string { + return fmt.Sprintf(telConfigMapTemplateStr, namePrefix) +} + +// GetManagerConfigMapName returns the name of manager configmap +func GetManagerConfigMapName(namePrefix string) string { + return fmt.Sprintf(managerConfigMapTemplateStr, namePrefix) +} diff --git a/pkg/splunk/enterprise/telemetry.go b/pkg/splunk/enterprise/telemetry.go new file mode 100644 index 000000000..3a3152bf0 --- /dev/null +++ b/pkg/splunk/enterprise/telemetry.go @@ -0,0 +1,399 @@ +package enterprise + +import ( + "context" + "encoding/json" + "errors" + "fmt" + enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3" + enterpriseApi "github.com/splunk/splunk-operator/api/v4" + splclient "github.com/splunk/splunk-operator/pkg/splunk/client" + splcommon "github.com/splunk/splunk-operator/pkg/splunk/common" + splutil "github.com/splunk/splunk-operator/pkg/splunk/util" + appsv1 "k8s.io/api/apps/v1" + "os" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "time" + + corev1 "k8s.io/api/core/v1" +) + +const ( + // TODO: Should be set to one day for the release + requeAfterInSeconds = 30 + // TODO: Should change to false for the release + isTestMode = true + // TODO: Ideally the version string should be set from the release tag + SOK_VERSION = "3.0.0" + + telStatusKey = "status" +) + +//+kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch + +type Telemetry struct { + Type string `json:"type"` + Component string `json:"component"` + OptInRequired int `json:"optInRequired"` + Data map[string]interface{} `json:"data"` + Test bool `json:"test"` +} + +type TelemetryStatus struct { + LastTransmission string `json:"lastTransmission"` +} + +func ApplyTelemetry(ctx context.Context, client splcommon.ControllerClient, cm *corev1.ConfigMap) (reconcile.Result, error) { + + // unless modified, reconcile for this object will be requeued after 10 seconds + result := reconcile.Result{ + Requeue: true, + RequeueAfter: time.Second * requeAfterInSeconds, + } + + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("ApplyTelemetry") + + for k, v := range cm.Data { + scopedLog.Info("Retrieved telemetry keys", "key", k, "value", v) + } + + var data map[string]interface{} + data = make(map[string]interface{}) + + // Add SOK version + data[telSOKVersionKey] = SOK_VERSION + // Add per CR telemetry + crList := getAllCustomResources(ctx, client) + + collectCRTelData(ctx, client, crList, data) + // Add telemetry set in this configmap, i.e splunk POD's telemetry + CollectCMTelData(ctx, cm, data) + + // Now send the telemetry + for _, crs := range crList { + for _, cr := range crs { + success := SendTelemetry(ctx, client, cr, data) + if success { + updateLastTransmissionTime(ctx, client, cm) + return result, nil + } + } + } + + return result, errors.New("Failed to send telemetry data") +} + +func updateLastTransmissionTime(ctx context.Context, client splcommon.ControllerClient, cm *corev1.ConfigMap) error { + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("updateLastTransmissionTime") + + var status TelemetryStatus + status.LastTransmission = time.Now().UTC().Format(time.RFC3339) + + updated, err := json.MarshalIndent(status, "", " ") + if err != nil { + scopedLog.Error(err, "Failed to marshal telemetry status") + return err + } + cm.Data[telStatusKey] = string(updated) + if err = client.Update(ctx, cm); err != nil { + scopedLog.Error(err, "Failed to update telemetry status in configmap") + return err + } + scopedLog.Info("Updated last transmission time in configmap", "newStatus", cm.Data[telStatusKey]) + + return nil +} + +func getAllCustomResources(ctx context.Context, client splcommon.ControllerClient) map[string][]splcommon.MetaObject { + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("collectCRTelData") + + var crList map[string][]splcommon.MetaObject + crList = make(map[string][]splcommon.MetaObject) + + //var instanceID InstanceType + //var telAppName string + + var err error + var standaloneList enterpriseApi.StandaloneList + //instanceID = SplunkStandalone + //telAppName = fmt.Sprintf(telAppNameTemplateStr, "stdaln") + err = client.List(ctx, &standaloneList) + if err != nil { + scopedLog.Error(err, "Failed to list standalone objects") + } else if len(standaloneList.Items) > 0 { + crList[standaloneList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + for _, cr := range standaloneList.Items { + if !cr.Status.TelAppInstalled { + scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) + continue + } + crList[standaloneList.Items[0].Kind] = append(crList[standaloneList.Items[0].Kind], &cr) + } + } + + var lmanagerList enterpriseApi.LicenseManagerList + //instanceID = SplunkLicenseManager + //telAppName = fmt.Sprintf(telAppNameTemplateStr, "lmanager") + err = client.List(ctx, &lmanagerList) + if err != nil { + scopedLog.Error(err, "Failed to list LicenseManager objects") + } else if len(lmanagerList.Items) > 0 { + crList[lmanagerList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + for _, cr := range lmanagerList.Items { + if !cr.Status.TelAppInstalled { + scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) + continue + } + crList[lmanagerList.Items[0].Kind] = append(crList[lmanagerList.Items[0].Kind], &cr) + } + } + + var lmasterList enterpriseApiV3.LicenseMasterList + //instanceID = SplunkLicenseMaster + //telAppName = fmt.Sprintf(telAppNameTemplateStr, "lmaster") + err = client.List(ctx, &lmasterList) + if err != nil { + scopedLog.Error(err, "Failed to list LicenseMaster objects") + } else if len(lmasterList.Items) > 0 { + crList[lmasterList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + for _, cr := range lmasterList.Items { + if !cr.Status.TelAppInstalled { + scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) + continue + } + crList[lmasterList.Items[0].Kind] = append(crList[lmasterList.Items[0].Kind], &cr) + } + } + + var shcList enterpriseApi.SearchHeadClusterList + //instanceID = SplunkSearchHead + //telAppName = fmt.Sprintf(telAppNameTemplateStr, "shc") + err = client.List(ctx, &shcList) + if err != nil { + scopedLog.Error(err, "Failed to list SearchHeadCluster objects") + } else if len(shcList.Items) > 0 { + crList[shcList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + for _, cr := range shcList.Items { + if !cr.Status.TelAppInstalled { + scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) + continue + } + crList[shcList.Items[0].Kind] = append(crList[shcList.Items[0].Kind], &cr) + } + } + + var cmanagerList enterpriseApi.ClusterManagerList + err = client.List(ctx, &cmanagerList) + if err != nil { + scopedLog.Error(err, "Failed to list ClusterManager objects") + } else if len(cmanagerList.Items) > 0 { + crList[cmanagerList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + for _, cr := range cmanagerList.Items { + if !cr.Status.TelAppInstalled { + scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) + continue + } + crList[cmanagerList.Items[0].Kind] = append(crList[cmanagerList.Items[0].Kind], &cr) + } + } + + var cmasterList enterpriseApiV3.ClusterMasterList + err = client.List(ctx, &cmasterList) + if err != nil { + scopedLog.Error(err, "Failed to list ClusterMaster objects") + } else if len(cmasterList.Items) > 0 { + crList[cmasterList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + for _, cr := range cmasterList.Items { + if !cr.Status.TelAppInstalled { + scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) + continue + } + crList[cmasterList.Items[0].Kind] = append(crList[cmasterList.Items[0].Kind], &cr) + } + } + + return crList +} + +func getOwnedStatefulSets( + ctx context.Context, + c client.Client, + cr client.Object, +) ([]appsv1.StatefulSet, error) { + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("getOwnedStatefulSets") + + stsList := &appsv1.StatefulSetList{} + if err := c.List(ctx, stsList, + client.InNamespace(cr.GetNamespace()), + ); err != nil { + scopedLog.Error(err, "Failed to list StatefulSets", "CR Name", cr.GetName()) + return nil, err + } + + var result []appsv1.StatefulSet + for _, sts := range stsList.Items { + for _, owner := range sts.OwnerReferences { + if owner.UID == cr.GetUID() { + result = append(result, sts) + break + } + } + } + return result, nil +} + +func collectCRTelData(ctx context.Context, client splcommon.ControllerClient, crList map[string][]splcommon.MetaObject, data map[string]interface{}) { + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("collectCRTelData") + scopedLog.Info("Start") + + for kind, crs := range crList { + var perKindData map[string]interface{} + perKindData = make(map[string]interface{}) + for _, cr := range crs { + var perCRData []map[string]string + perCRData = make([]map[string]string, 0) + stsList, err := getOwnedStatefulSets(ctx, client, cr) + if err != nil { + scopedLog.Error(err, "Failed to get owned StatefulSets") + } else if len(stsList) > 0 { + for _, sts := range stsList { + for _, container := range sts.Spec.Template.Spec.Containers { + resPerContainer := map[string]string{ + "container_name": container.Name, + "cpu_request": container.Resources.Requests.Cpu().String(), + "memory_request": container.Resources.Requests.Memory().String(), + "cpu_limit": container.Resources.Limits.Cpu().String(), + "memory_limit": container.Resources.Limits.Memory().String(), + } + perCRData = append(perCRData, resPerContainer) + } + } + } + perKindData[cr.GetName()] = perCRData + } + data[kind] = perKindData + } +} + +// CollectCMTelData is exported for testing +func CollectCMTelData(ctx context.Context, cm *corev1.ConfigMap, data map[string]interface{}) { + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("collectCMTelData") + scopedLog.Info("Start") + + for key, val := range cm.Data { + if key == telStatusKey { + continue + } + var compData interface{} + scopedLog.Info("Processing telemetry input from other components", "key", key, "value", val) + err := json.Unmarshal([]byte(val), &compData) + if err != nil { + scopedLog.Info("Not able to unmarshal. Will include the input as string", "key", key, "value", val) + data[key] = val + } else { + data[key] = compData + } + } +} + +func isTest(ctx context.Context) bool { + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("checkTestMode") + + // Retrieve SPLUNK_TEST_MODE environment variable + testModeStr := os.Getenv("SPLUNK_TEST_MODE") + if testModeStr == "1" { + scopedLog.Info("Test mode is enabled via SPLUNK_TEST_MODE env variable") + return true + } + + scopedLog.Info("Return test mode", "isTestMode", isTestMode) + return isTestMode +} + +// SendTelemetry is exported for testing +func SendTelemetry(ctx context.Context, client splcommon.ControllerClient, cr splcommon.MetaObject, data map[string]interface{}) bool { + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("sendTelemetry").WithValues( + "name", cr.GetObjectMeta().GetName(), + "namespace", cr.GetObjectMeta().GetNamespace(), + "kind", cr.GetObjectKind().GroupVersionKind().Kind) + scopedLog.Info("Start") + + var instanceID InstanceType + switch cr.GetObjectKind().GroupVersionKind().Kind { + case "Standalone": + instanceID = SplunkStandalone + case "LicenseManager": + instanceID = SplunkLicenseManager + case "LicenseMaster": + instanceID = SplunkLicenseMaster + case "SearchHeadCluster": + instanceID = SplunkSearchHead + case "ClusterMaster": + instanceID = SplunkClusterMaster + case "ClusterManager": + instanceID = SplunkClusterManager + default: + return false + } + + serviceName := GetSplunkServiceName(instanceID, cr.GetName(), false) + scopedLog.Info("Got service name", "serviceName", serviceName) + + defaultSecretObjName := splcommon.GetNamespaceScopedSecretName(cr.GetNamespace()) + defaultSecret, err := splutil.GetSecretByName(ctx, client, cr.GetNamespace(), cr.GetName(), defaultSecretObjName) + if err != nil { + scopedLog.Error(err, "Could not access default secret object") + return false + } + + //Get the admin password from the secret object + adminPwd, foundSecret := defaultSecret.Data["password"] + if !foundSecret { + scopedLog.Info("Failed to find admin password") + return false + } + splunkClient := splclient.NewSplunkClient(fmt.Sprintf("https://%s:8089", serviceName), "admin", string(adminPwd)) + + var licenseInfo *splclient.LicenseInfo + licenseInfo, err = splunkClient.GetLicenseInfo() + if err != nil { + scopedLog.Error(err, "Failed to retrieve the license info") + return false + } else { + data[telLicenseInfoKey] = *licenseInfo + } + telemetry := Telemetry{ + Type: "event", + Component: "sok", + OptInRequired: 2, + Data: data, + Test: isTest(ctx), + } + + path := fmt.Sprintf("/servicesNS/nobody/%s/telemetry-metric", telAppNameStr) + bodyBytes, err := json.Marshal(telemetry) + if err != nil { + scopedLog.Error(err, "Failed to marshal to bytes") + return false + } + scopedLog.Info("Sending request", "path", path, "body", string(bodyBytes)) + + response, err := splunkClient.SendTelemetry(path, bodyBytes) + if err != nil { + scopedLog.Error(err, "Failed to send telemetry") + return false + } + + scopedLog.Info("Successfully sent telemetry", "response", response) + return true +} diff --git a/pkg/splunk/enterprise/telemetry_test.go b/pkg/splunk/enterprise/telemetry_test.go new file mode 100644 index 000000000..76e614def --- /dev/null +++ b/pkg/splunk/enterprise/telemetry_test.go @@ -0,0 +1,578 @@ +// Copyright (c) 2018-2022 Splunk Inc. All rights reserved. + +package enterprise + +import ( + "context" + "encoding/json" + enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3" + splcommon "github.com/splunk/splunk-operator/pkg/splunk/common" + "testing" + "time" + + "errors" + enterpriseApi "github.com/splunk/splunk-operator/api/v4" + spltest "github.com/splunk/splunk-operator/pkg/splunk/test" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func TestTelemetryGetAllCustomResources_Empty(t *testing.T) { + mockClient := spltest.NewMockClient() + ctx := context.TODO() + crMap := getAllCustomResources(ctx, mockClient) + if len(crMap) != 0 { + t.Errorf("expected no CRs, got %d", len(crMap)) + } +} + +func TestTelemetryCollectCRTelData_WithMockCR(t *testing.T) { + mockClient := spltest.NewMockClient() + ctx := context.TODO() + cr := &enterpriseApi.Standalone{} + cr.TypeMeta.Kind = "Standalone" + cr.ObjectMeta.Name = "test-standalone" + crList := map[string][]splcommon.MetaObject{"Standalone": {cr}} + data := make(map[string]interface{}) + collectCRTelData(ctx, mockClient, crList, data) + if _, ok := data["Standalone"]; !ok { + t.Errorf("expected Standalone key in data map") + } +} + +func TestApplyTelemetry_ConfigMapNoData(t *testing.T) { + mockClient := spltest.NewMockClient() + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, + Data: map[string]string{}, + } + ctx := context.TODO() + result, err := ApplyTelemetry(ctx, mockClient, cm) + if err == nil { + t.Errorf("expected error when no CRs present, got nil") + } + if !result.Requeue { + t.Errorf("expected requeue to be true, got false") + } +} + +func TestTelemetryCollectCMTelData_UnmarshalError(t *testing.T) { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, + Data: map[string]string{"bad": "notjson"}, + } + ctx := context.TODO() + data := make(map[string]interface{}) + CollectCMTelData(ctx, cm, data) + if data["bad"] != "notjson" { + t.Errorf("expected fallback to string on unmarshal error") + } +} + +func TestTelemetryCollectCMTelData_ValidJSON(t *testing.T) { + val := map[string]interface{}{"foo": "bar"} + b, _ := json.Marshal(val) + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, + Data: map[string]string{"good": string(b)}, + } + ctx := context.TODO() + data := make(map[string]interface{}) + CollectCMTelData(ctx, cm, data) + if m, ok := data["good"].(map[string]interface{}); !ok || m["foo"] != "bar" { + t.Errorf("expected valid JSON to be unmarshaled") + } +} + +func TestSendTelemetry_UnknownKind(t *testing.T) { + cr := &enterpriseApi.Standalone{} + cr.TypeMeta.Kind = "UnknownKind" + ok := SendTelemetry(context.TODO(), spltest.NewMockClient(), cr, map[string]interface{}{}) + if ok { + t.Errorf("expected SendTelemetry to return false for unknown kind") + } +} + +func TestSendTelemetry_NoSecret(t *testing.T) { + cr := &enterpriseApi.Standalone{} + cr.TypeMeta.Kind = "Standalone" + cr.ObjectMeta.Name = "test" + cr.ObjectMeta.Namespace = "default" + ok := SendTelemetry(context.TODO(), spltest.NewMockClient(), cr, map[string]interface{}{}) + if ok { + t.Errorf("expected SendTelemetry to return false if no secret found") + } +} + +func TestTelemetryGetAllCustomResources_AllKinds(t *testing.T) { + ctx := context.TODO() + fakeClient := &FakeListClient{ + crs: map[string][]client.Object{ + "Standalone": {&enterpriseApi.Standalone{TypeMeta: metav1.TypeMeta{Kind: "Standalone"}, ObjectMeta: metav1.ObjectMeta{Name: "test-standalone"}}}, + "LicenseManager": {&enterpriseApi.LicenseManager{TypeMeta: metav1.TypeMeta{Kind: "LicenseManager"}, ObjectMeta: metav1.ObjectMeta{Name: "test-licensemanager"}}}, + "LicenseMaster": {&enterpriseApiV3.LicenseMaster{TypeMeta: metav1.TypeMeta{Kind: "LicenseMaster"}, ObjectMeta: metav1.ObjectMeta{Name: "test-licensemaster"}}}, + "SearchHeadCluster": {&enterpriseApi.SearchHeadCluster{TypeMeta: metav1.TypeMeta{Kind: "SearchHeadCluster"}, ObjectMeta: metav1.ObjectMeta{Name: "test-shc"}}}, + "ClusterManager": {&enterpriseApi.ClusterManager{TypeMeta: metav1.TypeMeta{Kind: "ClusterManager"}, ObjectMeta: metav1.ObjectMeta{Name: "test-cmanager"}}}, + "ClusterMaster": {&enterpriseApiV3.ClusterMaster{TypeMeta: metav1.TypeMeta{Kind: "ClusterMaster"}, ObjectMeta: metav1.ObjectMeta{Name: "test-cmaster"}}}, + }, + sts: []apps.StatefulSet{}, // ensure all keys are present + } + crMap := getAllCustomResources(ctx, fakeClient) + kinds := []string{"Standalone", "LicenseManager", "LicenseMaster", "SearchHeadCluster", "ClusterManager", "ClusterMaster"} + for _, kind := range kinds { + if _, ok := crMap[kind]; !ok { + t.Errorf("expected kind %s in CR map", kind) + } + } +} + +func TestTelemetryCollectCRTelData_StandaloneData(t *testing.T) { + ctx := context.TODO() + cr := &enterpriseApi.Standalone{} + cr.TypeMeta.Kind = "Standalone" + cr.ObjectMeta.Name = "test-standalone" + cr.ObjectMeta.Namespace = "default" + crList := map[string][]splcommon.MetaObject{"Standalone": {cr}} + sts := apps.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-standalone-sts", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{{ + UID: cr.GetUID(), + }}, + }, + Spec: apps.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("500m"), + corev1.ResourceMemory: resource.MustParse("128Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("256Mi"), + }, + }, + }}, + }, + }, + }, + } + fakeClient := &FakeListClient{ + sts: []apps.StatefulSet{sts}, + } + data := make(map[string]interface{}) + collectCRTelData(ctx, fakeClient, crList, data) + standaloneData, ok := data["Standalone"].(map[string]interface{}) + if !ok { + t.Fatalf("expected Standalone data map") + } + crData, ok := standaloneData["test-standalone"].([]map[string]string) + if !ok || len(crData) == 0 { + t.Fatalf("expected resource data slice") + } + container := crData[0] + if container["cpu_request"] != "500m" || container["memory_request"] != "128Mi" || container["cpu_limit"] != "1" || container["memory_limit"] != "256Mi" { + t.Errorf("unexpected resource values: got %+v", container) + } +} + +func TestTelemetryCollectCRTelData_LicenseManagerData(t *testing.T) { + ctx := context.TODO() + cr := &enterpriseApi.LicenseManager{} + cr.TypeMeta.Kind = "LicenseManager" + cr.ObjectMeta.Name = "test-licensemanager" + cr.ObjectMeta.Namespace = "default" + crList := map[string][]splcommon.MetaObject{"LicenseManager": {cr}} + sts := apps.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-licensemanager-sts", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{{ + UID: cr.GetUID(), + }}, + }, + Spec: apps.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("600m"), + corev1.ResourceMemory: resource.MustParse("256Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("512Mi"), + }, + }, + }}, + }, + }, + }, + } + fakeClient := &FakeListClient{ + sts: []apps.StatefulSet{sts}, + } + data := make(map[string]interface{}) + collectCRTelData(ctx, fakeClient, crList, data) + lmData, ok := data["LicenseManager"].(map[string]interface{}) + if !ok { + t.Fatalf("expected LicenseManager data map") + } + crData, ok := lmData["test-licensemanager"].([]map[string]string) + if !ok || len(crData) == 0 { + t.Fatalf("expected resource data slice") + } + container := crData[0] + if container["cpu_request"] != "600m" || container["memory_request"] != "256Mi" || container["cpu_limit"] != "2" || container["memory_limit"] != "512Mi" { + t.Errorf("unexpected resource values: got %+v", container) + } +} + +func TestTelemetryCollectCRTelData_LicenseMasterData(t *testing.T) { + ctx := context.TODO() + cr := &enterpriseApiV3.LicenseMaster{} + cr.TypeMeta.Kind = "LicenseMaster" + cr.ObjectMeta.Name = "test-licensemaster" + cr.ObjectMeta.Namespace = "default" + crList := map[string][]splcommon.MetaObject{"LicenseMaster": {cr}} + sts := apps.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-licensemaster-sts", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{{ + UID: cr.GetUID(), + }}, + }, + Spec: apps.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("700m"), + corev1.ResourceMemory: resource.MustParse("384Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("3"), + corev1.ResourceMemory: resource.MustParse("768Mi"), + }, + }, + }}, + }, + }, + }, + } + fakeClient := &FakeListClient{ + sts: []apps.StatefulSet{sts}, + } + data := make(map[string]interface{}) + collectCRTelData(ctx, fakeClient, crList, data) + lmData, ok := data["LicenseMaster"].(map[string]interface{}) + if !ok { + t.Fatalf("expected LicenseMaster data map") + } + crData, ok := lmData["test-licensemaster"].([]map[string]string) + if !ok || len(crData) == 0 { + t.Fatalf("expected resource data slice") + } + container := crData[0] + if container["cpu_request"] != "700m" || container["memory_request"] != "384Mi" || container["cpu_limit"] != "3" || container["memory_limit"] != "768Mi" { + t.Errorf("unexpected resource values: got %+v", container) + } +} + +func TestTelemetryCollectCRTelData_SearchHeadClusterData(t *testing.T) { + ctx := context.TODO() + cr := &enterpriseApi.SearchHeadCluster{} + cr.TypeMeta.Kind = "SearchHeadCluster" + cr.ObjectMeta.Name = "test-shc" + cr.ObjectMeta.Namespace = "default" + crList := map[string][]splcommon.MetaObject{"SearchHeadCluster": {cr}} + sts := apps.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-shc-sts", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{{ + UID: cr.GetUID(), + }}, + }, + Spec: apps.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("800m"), + corev1.ResourceMemory: resource.MustParse("512Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("4"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }, + }, + }}, + }, + }, + }, + } + fakeClient := &FakeListClient{ + sts: []apps.StatefulSet{sts}, + } + data := make(map[string]interface{}) + collectCRTelData(ctx, fakeClient, crList, data) + shcData, ok := data["SearchHeadCluster"].(map[string]interface{}) + if !ok { + t.Fatalf("expected SearchHeadCluster data map") + } + crData, ok := shcData["test-shc"].([]map[string]string) + if !ok || len(crData) == 0 { + t.Fatalf("expected resource data slice") + } + container := crData[0] + if container["cpu_request"] != "800m" || container["memory_request"] != "512Mi" || container["cpu_limit"] != "4" || container["memory_limit"] != "1Gi" { + t.Errorf("unexpected resource values: got %+v", container) + } +} + +func TestTelemetryCollectCRTelData_ClusterManagerData(t *testing.T) { + ctx := context.TODO() + cr := &enterpriseApi.ClusterManager{} + cr.TypeMeta.Kind = "ClusterManager" + cr.ObjectMeta.Name = "test-cmanager" + cr.ObjectMeta.Namespace = "default" + crList := map[string][]splcommon.MetaObject{"ClusterManager": {cr}} + sts := apps.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cmanager-sts", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{{ + UID: cr.GetUID(), + }}, + }, + Spec: apps.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("900m"), + corev1.ResourceMemory: resource.MustParse("640Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("5"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }}, + }, + }, + }, + } + fakeClient := &FakeListClient{ + sts: []apps.StatefulSet{sts}, + } + data := make(map[string]interface{}) + collectCRTelData(ctx, fakeClient, crList, data) + cmData, ok := data["ClusterManager"].(map[string]interface{}) + if !ok { + t.Fatalf("expected ClusterManager data map") + } + crData, ok := cmData["test-cmanager"].([]map[string]string) + if !ok || len(crData) == 0 { + t.Fatalf("expected resource data slice") + } + container := crData[0] + if container["cpu_request"] != "900m" || container["memory_request"] != "640Mi" || container["cpu_limit"] != "5" || container["memory_limit"] != "2Gi" { + t.Errorf("unexpected resource values: got %+v", container) + } +} + +func TestTelemetryCollectCRTelData_ClusterMasterData(t *testing.T) { + ctx := context.TODO() + cr := &enterpriseApiV3.ClusterMaster{} + cr.TypeMeta.Kind = "ClusterMaster" + cr.ObjectMeta.Name = "test-cmaster" + cr.ObjectMeta.Namespace = "default" + crList := map[string][]splcommon.MetaObject{"ClusterMaster": {cr}} + sts := apps.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cmaster-sts", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{{ + UID: cr.GetUID(), + }}, + }, + Spec: apps.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1000m"), + corev1.ResourceMemory: resource.MustParse("768Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("6"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }}, + }, + }, + }, + } + fakeClient := &FakeListClient{ + sts: []apps.StatefulSet{sts}, + } + data := make(map[string]interface{}) + collectCRTelData(ctx, fakeClient, crList, data) + cmData, ok := data["ClusterMaster"].(map[string]interface{}) + if !ok { + t.Fatalf("expected ClusterMaster data map") + } + crData, ok := cmData["test-cmaster"].([]map[string]string) + if !ok || len(crData) == 0 { + t.Fatalf("expected resource data slice") + } + container := crData[0] + if container["cpu_request"] != "1" || container["memory_request"] != "768Mi" || container["cpu_limit"] != "6" || container["memory_limit"] != "4Gi" { + t.Errorf("unexpected resource values: got %+v", container) + } +} + +func TestTelemetryUpdateLastTransmissionTime_SetsTimestamp(t *testing.T) { + mockClient := spltest.NewMockClient() + ctx := context.TODO() + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, + Data: map[string]string{}, + } + + err := updateLastTransmissionTime(ctx, mockClient, cm) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + statusStr, ok := cm.Data[telStatusKey] + if !ok { + t.Fatalf("expected telStatusKey in configmap data") + } + var status TelemetryStatus + if err := json.Unmarshal([]byte(statusStr), &status); err != nil { + t.Fatalf("failed to unmarshal status: %v", err) + } + if status.LastTransmission == "" { + t.Errorf("expected LastTransmission to be set") + } + if _, err := time.Parse(time.RFC3339, status.LastTransmission); err != nil { + t.Errorf("LastTransmission is not RFC3339: %v", status.LastTransmission) + } +} + +func TestTelemetryUpdateLastTransmissionTime_UpdateError(t *testing.T) { + ctx := context.TODO() + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, + Data: map[string]string{}, + } + badClient := &errorUpdateClient{} + err := updateLastTransmissionTime(ctx, badClient, cm) + if err == nil { + t.Errorf("expected error from client.Update, got nil") + } +} + +func TestTelemetryUpdateLastTransmissionTime_RepeatedCalls(t *testing.T) { + mockClient := spltest.NewMockClient() + ctx := context.TODO() + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, + Data: map[string]string{}, + } + err := updateLastTransmissionTime(ctx, mockClient, cm) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + firstStatus := cm.Data[telStatusKey] + time.Sleep(1 * time.Second) + err = updateLastTransmissionTime(ctx, mockClient, cm) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + secondStatus := cm.Data[telStatusKey] + if firstStatus == secondStatus { + t.Errorf("expected status to change on repeated call") + } +} + +// errorUpdateClient is a mock client that always returns an error on Update +// Used for testing updateLastTransmissionTime error handling +type errorUpdateClient struct { + spltest.MockClient +} + +func (c *errorUpdateClient) Update(_ context.Context, _ client.Object, _ ...client.UpdateOption) error { + return errors.New("forced update error") +} + +// FakeListClient is a local mock client that supports List for CRs and StatefulSets for testing +// Only implements List for the types needed in these tests +type FakeListClient struct { + spltest.MockClient + crs map[string][]client.Object + sts []apps.StatefulSet +} + +func (c *FakeListClient) List(_ context.Context, list client.ObjectList, _ ...client.ListOption) error { + switch l := list.(type) { + case *enterpriseApi.StandaloneList: + l.Items = nil + for _, obj := range c.crs["Standalone"] { + l.Items = append(l.Items, *(obj.(*enterpriseApi.Standalone))) + } + case *enterpriseApi.LicenseManagerList: + l.Items = nil + for _, obj := range c.crs["LicenseManager"] { + l.Items = append(l.Items, *(obj.(*enterpriseApi.LicenseManager))) + } + case *enterpriseApiV3.LicenseMasterList: + l.Items = nil + for _, obj := range c.crs["LicenseMaster"] { + l.Items = append(l.Items, *(obj.(*enterpriseApiV3.LicenseMaster))) + } + case *enterpriseApi.SearchHeadClusterList: + l.Items = nil + for _, obj := range c.crs["SearchHeadCluster"] { + l.Items = append(l.Items, *(obj.(*enterpriseApi.SearchHeadCluster))) + } + case *enterpriseApi.ClusterManagerList: + l.Items = nil + for _, obj := range c.crs["ClusterManager"] { + l.Items = append(l.Items, *(obj.(*enterpriseApi.ClusterManager))) + } + case *enterpriseApiV3.ClusterMasterList: + l.Items = nil + for _, obj := range c.crs["ClusterMaster"] { + l.Items = append(l.Items, *(obj.(*enterpriseApiV3.ClusterMaster))) + } + case *apps.StatefulSetList: + l.Items = c.sts + default: + return nil + } + return nil +} + +// Additional tests for error paths and success can be added with more advanced mocks. diff --git a/test/trigger-tests.sh b/test/trigger-tests.sh index d778db88b..997bfa474 100644 --- a/test/trigger-tests.sh +++ b/test/trigger-tests.sh @@ -141,7 +141,8 @@ if [[ -z "${DEBUG}" ]]; then export DEBUG="${DEBUG_RUN}" fi - +# Set test mode +export SPLUNK_TEST_MODE=1 echo "Skipping following test :: ${TEST_TO_SKIP}"