diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6bc3432..d5a069c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -58,6 +58,7 @@ First of all, thanks for contributing!. Before contributing please read the [COD 1. Change the layer_name variable in zip.sh to avoid replacing the prod. 1. Run the following command to publish the layer: `sh zip.sh` +1. The zip file generated by previous step should have `extensions` folder in it, which should consist of the binary for the extension. 1. Run the following command to verify that the layer version is published across regions: `sh verify_layer_versions.sh` diff --git a/README.md b/README.md index 6277c9c..bf8812e 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,9 @@ All the logs that are not sent to Sumo Logic during the Execution phase of the A If you would like to always send logs during the execution phase however, you can add extra execution time via a sleep function at the end of lambda code, which will give your extension time to run and send logs to Sumo Logic. We recommend setting this to two seconds. +# Managed Instance Runtime Support +This Lambda extension from version v1.4.0 also supports [managed instance](https://docs.aws.amazon.com/lambda/latest/dg/lambda-managed-instances.html) runtime. + # Using Lambda extension in custom container images Follow the instruction in [docs](https://help.sumologic.com/03Send-Data/Collect-from-Other-Data-Sources/Collect_AWS_Lambda_Logs_using_an_Extension#For_AWS_Lambda_Functions_Created_Using_Container_Images:) diff --git a/containerimageexample/python-arm64/sumologic-extension-amd64.tar.gz b/containerimageexample/python-arm64/sumologic-extension-amd64.tar.gz index f2d83f0..7651b3d 100644 Binary files a/containerimageexample/python-arm64/sumologic-extension-amd64.tar.gz and b/containerimageexample/python-arm64/sumologic-extension-amd64.tar.gz differ diff --git a/containerimageexample/python-arm64/sumologic-extension-arm64.tar.gz b/containerimageexample/python-arm64/sumologic-extension-arm64.tar.gz index 0a58d59..cf09935 100644 Binary files a/containerimageexample/python-arm64/sumologic-extension-arm64.tar.gz and b/containerimageexample/python-arm64/sumologic-extension-arm64.tar.gz differ diff --git a/lambda-extensions/lambdaapi/extensionapiclient.go b/lambda-extensions/lambdaapi/extensionapiclient.go index ee63938..86c0fd4 100644 --- a/lambda-extensions/lambdaapi/extensionapiclient.go +++ b/lambda-extensions/lambdaapi/extensionapiclient.go @@ -46,15 +46,20 @@ const ( ) var ( - lambdaEvents = []EventType{"INVOKE", "SHUTDOWN"} + lambdaEvents = []EventType{"INVOKE", "SHUTDOWN"} + managedInstanceLambdaEvents = []EventType{"SHUTDOWN"} ) // RegisterExtension is to register extension to Run Time API client. Call the following method on initialization as early as possible, // otherwise you may get a timeout error. Runtime initialization will start after all extensions are registered. -func (client *Client) RegisterExtension(ctx context.Context) (*RegisterResponse, error) { +func (client *Client) RegisterExtension(ctx context.Context, isManagedInstance bool) (*RegisterResponse, error) { URL := client.baseURL + extensionURL + "register" + events := lambdaEvents + if isManagedInstance { + events = managedInstanceLambdaEvents + } reqBody, err := json.Marshal(map[string]interface{}{ - "events": lambdaEvents, + "events": events, }) if err != nil { return nil, err diff --git a/lambda-extensions/lambdaapi/extensionapiclient_test.go b/lambda-extensions/lambdaapi/extensionapiclient_test.go index a004dc8..ce4de4d 100644 --- a/lambda-extensions/lambdaapi/extensionapiclient_test.go +++ b/lambda-extensions/lambdaapi/extensionapiclient_test.go @@ -34,11 +34,11 @@ func TestRegisterExtension(t *testing.T) { client := NewClient(srv.URL[7:], extensionName) // Without Context - response, err := client.RegisterExtension(context.TODO()) + response, err := client.RegisterExtension(context.TODO(), false) commonAsserts(t, client, response, err) // With Context - response, err = client.RegisterExtension(context.Background()) + response, err = client.RegisterExtension(context.Background(), false) commonAsserts(t, client, response, err) } @@ -147,3 +147,146 @@ func TestExitError(t *testing.T) { response, err = client.ExitError(context.Background(), "EXIT ERROR") commonAsserts(t, client, response, err) } + +// TestRegisterExtension_ManagedInstanceMode tests extension registration in managed instance mode +// In ManagedInstance mode, only SHUTDOWN events are registered (not INVOKE) +func TestRegisterExtension_ManagedInstanceMode(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assertEqual(t, r.Method, http.MethodPost, "Method is not POST") + assertNotEmpty(t, r.Header.Get(extensionNameHeader), "Extension Name Header not present") + + reqBytes, err := ioutil.ReadAll(r.Body) + assertNoError(t, err, "Received error while reading request") + defer func() { + if err := r.Body.Close(); err != nil { + log.Printf("failed to close body: %v", err) + } + }() + assertNotEmpty(t, reqBytes, "Received error in request") + + // Verify the request body contains only SHUTDOWN event for managed instance mode + var reqBody map[string]interface{} + err = json.Unmarshal(reqBytes, &reqBody) + assertNoError(t, err, "Failed to unmarshal request body") + + events, ok := reqBody["events"].([]interface{}) + if !ok { + t.Error("Events field not found or not an array") + } + assertEqual(t, len(events), 1, "Expected 1 event for managed instance mode") + assertEqual(t, events[0], "SHUTDOWN", "Expected only SHUTDOWN event for managed instance mode") + + w.Header().Add(extensionIdentiferHeader, "test-sumo-id") + w.WriteHeader(200) + respBytes, _ := json.Marshal(RegisterResponse{ + FunctionName: "test-function", + FunctionVersion: "$LATEST", + Handler: "index.handler", + }) + _, _ = w.Write(respBytes) + })) + + defer srv.Close() + client := NewClient(srv.URL[7:], extensionName) + + // Test with isManagedInstance = true + response, err := client.RegisterExtension(context.Background(), true) + commonAsserts(t, client, response, err) + + // Verify the response is properly unmarshaled + if response.FunctionName != "test-function" { + t.Errorf("Expected function name 'test-function', got '%s'", response.FunctionName) + } +} + +// TestRegisterExtension_ManagedInstanceModeWithoutContext tests managed instance mode without context +func TestRegisterExtension_ManagedInstanceModeWithoutContext(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + reqBytes, err := ioutil.ReadAll(r.Body) + assertNoError(t, err, "Received error while reading request") + defer func() { + if err := r.Body.Close(); err != nil { + log.Printf("failed to close body: %v", err) + } + }() + + var reqBody map[string]interface{} + err = json.Unmarshal(reqBytes, &reqBody) + assertNoError(t, err, "Failed to unmarshal request body") + + events, ok := reqBody["events"].([]interface{}) + if !ok { + t.Error("Events field not found or not an array") + } + assertEqual(t, len(events), 1, "Expected 1 event for managed instance mode") + + w.Header().Add(extensionIdentiferHeader, "test-sumo-id") + w.WriteHeader(200) + respBytes, _ := json.Marshal(RegisterResponse{}) + _, _ = w.Write(respBytes) + })) + + defer srv.Close() + client := NewClient(srv.URL[7:], extensionName) + + // Test with isManagedInstance = true and nil context + response, err := client.RegisterExtension(context.TODO(), true) + commonAsserts(t, client, response, err) +} + +// TestRegisterExtension_ManagedInstanceModeEventValidation tests that managed instance mode registers correct events +func TestRegisterExtension_ManagedInstanceModeEventValidation(t *testing.T) { + receivedEvents := make([]string, 0) + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + reqBytes, err := ioutil.ReadAll(r.Body) + assertNoError(t, err, "Received error while reading request") + defer func() { + if err := r.Body.Close(); err != nil { + log.Printf("failed to close body: %v", err) + } + }() + + var reqBody map[string]interface{} + err = json.Unmarshal(reqBytes, &reqBody) + assertNoError(t, err, "Failed to unmarshal request body") + + events, ok := reqBody["events"].([]interface{}) + if !ok { + t.Error("Events field not found or not an array") + } + + for _, e := range events { + receivedEvents = append(receivedEvents, e.(string)) + } + + w.Header().Add(extensionIdentiferHeader, "test-sumo-id") + w.WriteHeader(200) + respBytes, _ := json.Marshal(RegisterResponse{}) + _, _ = w.Write(respBytes) + })) + + defer srv.Close() + client := NewClient(srv.URL[7:], extensionName) + + _, err := client.RegisterExtension(context.Background(), true) + assertNoError(t, err, "Failed to register extension in ManagedInstance mode") + + // Validate that INVOKE event is NOT present in managed instance mode + for _, event := range receivedEvents { + if event == "INVOKE" { + t.Error("INVOKE event should not be registered in managed instance mode") + } + } + + // Validate that SHUTDOWN event IS present + foundShutdown := false + for _, event := range receivedEvents { + if event == "SHUTDOWN" { + foundShutdown = true + } + } + if !foundShutdown { + t.Error("SHUTDOWN event should be registered in managed instance mode") + } +} diff --git a/lambda-extensions/lambdaapi/telemetryapiclient.go b/lambda-extensions/lambdaapi/telemetryapiclient.go index 65c4336..462f1a4 100644 --- a/lambda-extensions/lambdaapi/telemetryapiclient.go +++ b/lambda-extensions/lambdaapi/telemetryapiclient.go @@ -14,15 +14,18 @@ const ( //telemetry_receiverPort = 4243 ) -// SubscribeToLogsAPI is - Subscribe to Logs API to receive the Lambda Logs. -func (client *Client) SubscribeToTelemetryAPI(ctx context.Context, logEvents []string, telemetryTimeoutMs int, telemetryMaxBytes int64, telemetryMaxItems int) ([]byte, error) { +// SubscribeToTelemetryAPI is - Subscribe to Telemetry API to receive the Lambda Telemetry. +func (client *Client) SubscribeToTelemetryAPI(ctx context.Context, logEvents []string, telemetryTimeoutMs int, telemetryMaxBytes int64, telemetryMaxItems int, isManagedInstance bool) ([]byte, error) { URL := client.baseURL + telemetryURL - + schemaVersion := "2022-07-01" + if isManagedInstance { + schemaVersion = "2025-01-29" + } reqBody, err := json.Marshal(map[string]interface{}{ "destination": map[string]interface{}{"protocol": "HTTP", "URI": fmt.Sprintf("http://sandbox:%v", receiverPort)}, "types": logEvents, "buffering": map[string]interface{}{"timeoutMs": telemetryTimeoutMs, "maxBytes": telemetryMaxBytes, "maxItems": telemetryMaxItems}, - "schemaVersion": "2022-07-01", + "schemaVersion": schemaVersion, }) if err != nil { return nil, err diff --git a/lambda-extensions/lambdaapi/telemetryapiclient_test.go b/lambda-extensions/lambdaapi/telemetryapiclient_test.go index 58a7ce4..356f3d0 100644 --- a/lambda-extensions/lambdaapi/telemetryapiclient_test.go +++ b/lambda-extensions/lambdaapi/telemetryapiclient_test.go @@ -2,6 +2,7 @@ package lambdaapi import ( "context" + "encoding/json" ioutil "io" "log" "net/http" @@ -31,10 +32,69 @@ func TestSubscribeToTelemetryAPI(t *testing.T) { client := NewClient(srv.URL[7:], extensionName) // Without Context - response, err := client.SubscribeToTelemetryAPI(context.TODO(), []string{"platform", "function", "extension"}, 1000, 262144, 10000) + response, err := client.SubscribeToTelemetryAPI(context.TODO(), []string{"platform", "function", "extension"}, 1000, 262144, 10000, false) commonAsserts(t, client, response, err) // With Context - response, err = client.SubscribeToTelemetryAPI(context.Background(), []string{"platform", "function", "extension"}, 1000, 262144, 10000) + response, err = client.SubscribeToTelemetryAPI(context.Background(), []string{"platform", "function", "extension"}, 1000, 262144, 10000, false) + commonAsserts(t, client, response, err) +} + +// TestSubscribeToTelemetryAPI_ManagedInstanceMode tests telemetry API subscription in managed instance mode +// In managed instance mode, schema version should be "2025-01-29" instead of "2022-07-01" +func TestSubscribeToTelemetryAPI_ManagedInstanceMode(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assertEqual(t, r.Method, http.MethodPut, "Method is not PUT") + assertNotEmpty(t, r.Header.Get(extensionNameHeader), "Extension Name Header not present") + + reqBytes, err := ioutil.ReadAll(r.Body) + assertNoError(t, err, "Received error") + defer func() { + if err := r.Body.Close(); err != nil { + log.Printf("failed to close body: %v", err) + } + }() + assertNotEmpty(t, reqBytes, "Received error in request") + + // Verify the request body contains managed instance mode schema version + var reqBody map[string]interface{} + err = json.Unmarshal(reqBytes, &reqBody) + assertNoError(t, err, "Failed to unmarshal request body") + + schemaVersion, ok := reqBody["schemaVersion"].(string) + if !ok { + t.Error("schemaVersion field not found or not a string") + } + assertEqual(t, schemaVersion, "2025-01-29", "Expected managed instance mode schema version '2025-01-29'") + + // Verify other required fields are present + _, destinationExists := reqBody["destination"] + if !destinationExists { + t.Error("destination field not found") + } + + _, typesExists := reqBody["types"] + if !typesExists { + t.Error("types field not found") + } + + _, bufferingExists := reqBody["buffering"] + if !bufferingExists { + t.Error("buffering field not found") + } + + w.Header().Add(extensionIdentiferHeader, "test-sumo-id") + w.WriteHeader(200) + })) + + defer srv.Close() + client := NewClient(srv.URL[7:], extensionName) + + // Test with isManagedInstance = true (context) + response, err := client.SubscribeToTelemetryAPI(context.Background(), []string{"platform", "function", "extension"}, 1000, 262144, 10000, true) + commonAsserts(t, client, response, err) + + // Test with isManagedInstance = true (without context) + response, err = client.SubscribeToTelemetryAPI(context.TODO(), []string{"platform", "function", "extension"}, 1000, 262144, 10000, true) commonAsserts(t, client, response, err) } diff --git a/lambda-extensions/sumologic-extension.go b/lambda-extensions/sumologic-extension.go index 84fa399..4365a96 100644 --- a/lambda-extensions/sumologic-extension.go +++ b/lambda-extensions/sumologic-extension.go @@ -26,8 +26,12 @@ var ( var producer workers.TaskProducer var consumer workers.TaskConsumer +var managedInstanceProducer workers.ManagedInstanceTaskProducer +var managedInstanceConsumer workers.ManagedInstanceTaskConsumer var config *cfg.LambdaExtensionConfig var dataQueue chan []byte +var flushSignal chan string +var isManagedInstance bool func init() { Formatter := new(logrus.TextFormatter) @@ -47,22 +51,52 @@ func init() { logger.Logger.SetLevel(config.LogLevel) dataQueue = make(chan []byte, config.MaxDataQueueLength) - // Start HTTP Server before subscription in a goRoutine - producer = workers.NewTaskProducer(dataQueue, logger) - go func() { - if err := producer.Start(); err != nil { - logger.Errorf("producer Start failed: %v", err) - } - }() + // Check initialization type to determine if managed instance mode should be used + initializationType := os.Getenv("AWS_LAMBDA_INITIALIZATION_TYPE") + if initializationType == "lambda-managed-instances" { + isManagedInstance = true + logger.Debug("Initializing in Managed Instance mode") + + // Initialize flushSignal channel for managed instance mode communication + flushSignal = make(chan string, 10) // Buffered channel to prevent blocking + + // Initialize Managed Instance Producer and start it in a goroutine + managedInstanceProducer = workers.NewManagedInstanceTaskProducer(dataQueue, flushSignal, logger) + go func() { + if err := managedInstanceProducer.Start(); err != nil { + logger.Errorf("managedInstanceProducer Start failed: %v", err) + } + }() + + // Initialize Managed Instance Consumer and start it + managedInstanceConsumer = workers.NewManagedInstanceTaskConsumer(dataQueue, flushSignal, config, logger) + // Start the consumer's independent processing loop + ctx := context.Background() + managedInstanceConsumer.Start(ctx) + + logger.Debug("Managed Instance mode initialization complete") + } else { + logger.Debug("Initializing in standard mode") + // Start HTTP Server before subscription in a goRoutine + producer = workers.NewTaskProducer(dataQueue, logger) + go func() { + if err := producer.Start(); err != nil { + logger.Errorf("producer Start failed: %v", err) + } + }() - // Creating SumoTaskConsumer - consumer = workers.NewTaskConsumer(dataQueue, config, logger) + // Creating SumoTaskConsumer + consumer = workers.NewTaskConsumer(dataQueue, config, logger) + logger.Debug("Standard mode initialization complete") + } + + logger.Debug("Is Managed Instance value: ", isManagedInstance) } func runTimeAPIInit() (int64, error) { // Register early so Runtime could start in parallel logger.Debug("Registering Extension to Run Time API Client..........") - registerResponse, err := extensionClient.RegisterExtension(context.TODO()) + registerResponse, err := extensionClient.RegisterExtension(context.TODO(), isManagedInstance) if err != nil { return 0, err } @@ -70,7 +104,7 @@ func runTimeAPIInit() (int64, error) { // Subscribe to Telemetry API logger.Debug("Subscribing Extension to Telemetry API........") - subscribeResponse, err := extensionClient.SubscribeToTelemetryAPI(context.TODO(), config.LogTypes, config.TelemetryTimeoutMs, config.TelemetryMaxBytes, config.TelemetryMaxItems) + subscribeResponse, err := extensionClient.SubscribeToTelemetryAPI(context.TODO(), config.LogTypes, config.TelemetryTimeoutMs, config.TelemetryMaxBytes, config.TelemetryMaxItems, isManagedInstance) if err != nil { return 0, err } @@ -78,11 +112,14 @@ func runTimeAPIInit() (int64, error) { logger.Debug("Successfully subscribed to Telemetry API: ", utils.PrettyPrint(string(subscribeResponse))) // Call next to say registration is successful and get the deadtimems - nextResponse, err := nextEvent(context.TODO()) - if err != nil { - return 0, err + if !isManagedInstance { + nextResponse, err := nextEvent(context.TODO()) + if err != nil { + return 0, err + } + return nextResponse.DeadlineMs, nil } - return nextResponse.DeadlineMs, nil + return 0, nil } func nextEvent(ctx context.Context) (*lambdaapi.NextEventResponse, error) { @@ -109,15 +146,17 @@ func processEvents(ctx context.Context) { consumer.FlushDataQueue(ctx) return default: - logger.Debugf("switching to other go routine") - runtime.Gosched() - logger.Infof("Calling DrainQueue from processEvents") - // for { - runtime_done := consumer.DrainQueue(ctx) - - if runtime_done == 1 { - logger.Infof("Exiting DrainQueueLoop: Runtime is Done") + if !isManagedInstance { + logger.Debugf("switching to other go routine") + runtime.Gosched() + logger.Infof("Calling DrainQueue from processEvents") + // for { + runtime_done := consumer.DrainQueue(ctx) + if runtime_done == 1 { + logger.Infof("Exiting DrainQueueLoop: Runtime is Done") + } } + // } // This statement will freeze lambda diff --git a/lambda-extensions/workers/managed_instance_consumer.go b/lambda-extensions/workers/managed_instance_consumer.go new file mode 100644 index 0000000..d61ceb2 --- /dev/null +++ b/lambda-extensions/workers/managed_instance_consumer.go @@ -0,0 +1,164 @@ +package workers + +import ( + "context" + + cfg "github.com/SumoLogic/sumologic-lambda-extensions/lambda-extensions/config" + sumocli "github.com/SumoLogic/sumologic-lambda-extensions/lambda-extensions/sumoclient" + + "github.com/sirupsen/logrus" +) + +// ManagedInstanceTaskConsumer exposes methods for consuming tasks in managed instance mode +type ManagedInstanceTaskConsumer interface { + Start(context.Context) + FlushDataQueue(context.Context) + DrainQueue(context.Context) int +} + +// managedInstanceSumoConsumer drains log from dataQueue in managed instance mode +type managedInstanceSumoConsumer struct { + dataQueue chan []byte + flushSignal chan string + logger *logrus.Entry + config *cfg.LambdaExtensionConfig + sumoclient sumocli.LogSender +} + +// NewManagedInstanceTaskConsumer returns a new managed instance consumer +// flushSignal channel is used to receive signals from producer to trigger flushing +func NewManagedInstanceTaskConsumer(consumerQueue chan []byte, flushSignal chan string, config *cfg.LambdaExtensionConfig, logger *logrus.Entry) ManagedInstanceTaskConsumer { + return &managedInstanceSumoConsumer{ + dataQueue: consumerQueue, + flushSignal: flushSignal, + logger: logger, + sumoclient: sumocli.NewLogSenderClient(logger, config), + config: config, + } +} + +// Start starts the managed instance consumer in a goroutine to listen for flush signals independently +func (esc *managedInstanceSumoConsumer) Start(ctx context.Context) { + esc.logger.Info("Starting Managed Instance Consumer") + go esc.processFlushSignals(ctx) +} + +// processFlushSignals continuously listens for flush signals and triggers queue draining +// This runs independently without needing callbacks from main thread +func (esc *managedInstanceSumoConsumer) processFlushSignals(ctx context.Context) { + esc.logger.Info("Managed Instance Consumer: Started listening for flush signals") + + for { + select { + case <-ctx.Done(): + esc.logger.Info("Managed Instance Consumer: Context cancelled, flushing remaining data") + esc.FlushDataQueue(ctx) + return + + case signal := <-esc.flushSignal: + esc.logger.Infof("Managed Instance Consumer: Received flush signal: %s", signal) + + switch signal { + case "queue_threshold": + esc.logger.Info("Managed Instance Consumer: Draining queue due to 80% threshold") + esc.DrainQueue(ctx) + + case "platform.report": + esc.logger.Info("Managed Instance Consumer: Draining queue due to platform.report event") + esc.DrainQueue(ctx) + + default: + esc.logger.Warnf("Managed Instance Consumer: Unknown flush signal received: %s", signal) + } + } + } +} + +// FlushDataQueue drains the dataqueue completely (called during shutdown) +func (esc *managedInstanceSumoConsumer) FlushDataQueue(ctx context.Context) { + esc.logger.Info("Managed Instance Consumer: Flushing DataQueue") + + if esc.config.EnableFailover { + var rawMsgArr [][]byte + Loop: + for { + select { + case rawmsg := <-esc.dataQueue: + rawMsgArr = append(rawMsgArr, rawmsg) + default: + if len(rawMsgArr) > 0 { + err := esc.sumoclient.FlushAll(rawMsgArr) + if err != nil { + esc.logger.Errorln("Managed Instance Consumer: Unable to flush DataQueue", err.Error()) + // putting back all the msg to the queue in case of failure + for _, msg := range rawMsgArr { + select { + case esc.dataQueue <- msg: + default: + esc.logger.Warnf("Managed Instance Consumer: Failed to requeue message, queue full") + } + } + } else { + esc.logger.Infof("Managed Instance Consumer: Successfully flushed %d messages", len(rawMsgArr)) + } + } + close(esc.dataQueue) + esc.logger.Debugf("Managed Instance Consumer: DataQueue completely drained and closed") + break Loop + } + } + } else { + // calling drainqueue (during shutdown) if failover is not enabled + maxCallsNeededForCompleteDraining := (len(esc.dataQueue) / esc.config.MaxConcurrentRequests) + 1 + for i := 0; i < maxCallsNeededForCompleteDraining; i++ { + esc.DrainQueue(ctx) + } + esc.logger.Info("Managed Instance Consumer: DataQueue drained without failover") + } +} + +// DrainQueue drains the current contents of the queue +func (esc *managedInstanceSumoConsumer) DrainQueue(ctx context.Context) int { + esc.logger.Debug("Managed Instance Consumer: Draining data from dataQueue") + + var rawMsgArr [][]byte + var logsStr string + var runtime_done = 0 + + // Collect all available messages from the queue +Loop: + for { + select { + case rawmsg := <-esc.dataQueue: + rawMsgArr = append(rawMsgArr, rawmsg) + logsStr = string(rawmsg) + esc.logger.Debugf("Managed Instance Consumer: DrainQueue: logsStr length: %d", len(logsStr)) + + default: + // No more messages in queue, send what we have + if len(rawMsgArr) > 0 { + esc.logger.Infof("Managed Instance Consumer: Sending %d messages to Sumo Logic", len(rawMsgArr)) + err := esc.sumoclient.SendAllLogs(ctx, rawMsgArr) + if err != nil { + esc.logger.Errorln("Managed Instance Consumer: Unable to send logs to Sumo Logic", err.Error()) + // putting back all the msg to the queue in case of failure + for _, msg := range rawMsgArr { + select { + case esc.dataQueue <- msg: + default: + esc.logger.Warn("Managed Instance Consumer: Failed to requeue message, queue full") + } + } + } else { + esc.logger.Infof("Managed Instance Consumer: Successfully sent %d messages", len(rawMsgArr)) + } + } else { + esc.logger.Debug("Managed Instance Consumer: No messages to drain") + } + break Loop + } + } + + esc.logger.Debugf("Managed Instance Consumer: DrainQueue complete. Runtime done: %d", runtime_done) + return runtime_done +} diff --git a/lambda-extensions/workers/managed_instance_producer.go b/lambda-extensions/workers/managed_instance_producer.go new file mode 100644 index 0000000..ea8ba04 --- /dev/null +++ b/lambda-extensions/workers/managed_instance_producer.go @@ -0,0 +1,143 @@ +package workers + +import ( + "encoding/json" + "fmt" + ioutil "io" + "net/http" + + "github.com/sirupsen/logrus" +) + +const ( + // managedReceiverIP is Web Server Constants for managed instance mode + managedReceiverIP = "0.0.0.0" + // managedReceiverPort is Web Server Constants for managed instance mode + managedReceiverPort = 4243 + // queueThresholdPercent is the threshold percentage for triggering flush + queueThresholdPercent = 0.8 +) + +// ManagedInstanceTaskProducer exposes methods for producing tasks in managed instance mode +type ManagedInstanceTaskProducer interface { + Start() error +} + +type managedInstanceHttpServer struct { + dataQueue chan []byte + logger *logrus.Entry + flushSignal chan string // Signal channel to notify consumer to flush +} + +type Event struct { + Time string `json:"time"` + Type string `json:"type"` + Record json.RawMessage `json:"record"` +} + +// NewManagedInstanceTaskProducer returns a new managed instance producer object +// flushSignal channel is used to signal consumer when queue is 80% full or platform.report is received +func NewManagedInstanceTaskProducer(consumerQueue chan []byte, flushSignal chan string, logger *logrus.Entry) ManagedInstanceTaskProducer { + return &managedInstanceHttpServer{ + dataQueue: consumerQueue, + logger: logger, + flushSignal: flushSignal, + } +} + +// Start starts the HTTP Server for managed instance mode +func (mhs *managedInstanceHttpServer) Start() error { + http.HandleFunc("/", mhs.logsHandler) + mhs.logger.Info("Starting Managed Instance HTTP Server on port ", managedReceiverPort) + err := http.ListenAndServe(fmt.Sprintf("%s:%d", managedReceiverIP, managedReceiverPort), nil) + if err != nil { + mhs.logger.Errorf("Managed Instance HTTP server failed to start: %v", err) + panic(err) + } + return err +} + +// checkQueueThreshold checks if dataQueue has reached 80% capacity and signals consumer +func (mhs *managedInstanceHttpServer) checkQueueThreshold() { + queueLen := len(mhs.dataQueue) + queueCap := cap(mhs.dataQueue) + threshold := int(float64(queueCap) * queueThresholdPercent) + + mhs.logger.Debugf("Managed Instance Producer: Queue status - Length: %d, Capacity: %d, Threshold: %d", queueLen, queueCap, threshold) + + if queueLen >= threshold { + mhs.logger.Infof("Managed Instance Producer: Queue reached %d%% capacity (%d/%d), signaling consumer to flush", + int(queueThresholdPercent*100), queueLen, queueCap) + // Send flush signal to consumer (non-blocking) + select { + case mhs.flushSignal <- "queue_threshold": + mhs.logger.Debugf("Managed Instance Producer: Sent queue_threshold signal to consumer") + default: + mhs.logger.Warnf("Managed Instance Producer: Flush signal channel full, signal dropped") + } + } +} + +// logsHandler is Server Implementation to get Logs from logs API for managed instance mode +func (mhs *managedInstanceHttpServer) logsHandler(writer http.ResponseWriter, request *http.Request) { + if request.URL.Path != "/" { + http.NotFound(writer, request) + return + } + switch request.Method { + case "POST": + defer func() { + if err := request.Body.Close(); err != nil { + mhs.logger.Errorf("failed to close body: %v", err) + } + }() + + reqBody, err := ioutil.ReadAll(request.Body) + if err != nil { + mhs.logger.Error("Read from Logs API failed: ", err.Error()) + writer.WriteHeader(http.StatusInternalServerError) + return + } + + mhs.logger.Debugf("Managed Instance Producer: Producing data into dataQueue - %d bytes\n", len(reqBody)) + payload := []byte(reqBody) + + // Send payload to dataQueue (non-blocking to prevent deadlock) + select { + case mhs.dataQueue <- payload: + mhs.logger.Debugf("Managed Instance Producer: Successfully queued data") + default: + mhs.logger.Warnf("Managed Instance Producer: dataQueue is full, dropping message") + } + + // Check if queue has reached 80% capacity after adding data + mhs.checkQueueThreshold() + + // Parse events and check for platform.report + var events []Event + err = json.Unmarshal(reqBody, &events) + if err != nil { + mhs.logger.Errorf("Managed Instance Producer: Error parsing JSON: %v", err) + } else { + mhs.logger.Debugf("Managed Instance Producer: Parsed %d events from telemetry payload\n", len(events)) + + // Check for platform.report type + for _, event := range events { + if event.Type == "platform.report" { + mhs.logger.Infof("Managed Instance Producer: Found platform.report event at time: %s\n", event.Time) + // Send platform.report signal to consumer (non-blocking) + select { + case mhs.flushSignal <- "platform.report": + mhs.logger.Debugf("Managed Instance Producer: Sent platform.report signal to consumer") + default: + mhs.logger.Warnf("Managed Instance Producer: Flush signal channel full, signal dropped") + } + } + } + } + + writer.WriteHeader(http.StatusOK) + default: + http.Error(writer, "Method not allowed", http.StatusMethodNotAllowed) + } +} diff --git a/scripts/verify_layer_versions.sh b/scripts/verify_layer_versions.sh index 7dc58dc..dc50d67 100755 --- a/scripts/verify_layer_versions.sh +++ b/scripts/verify_layer_versions.sh @@ -22,6 +22,7 @@ AWS_REGIONS=( eu-central-1 us-west-1 us-west-2 + ca-west-1 ) echo "Fetching latest version of layer: $LAYER_NAME" diff --git a/scripts/zip.sh b/scripts/zip.sh index 4d2d0ab..9a2e4d5 100755 --- a/scripts/zip.sh +++ b/scripts/zip.sh @@ -40,7 +40,7 @@ for arch in "${ARCHITECTURES[@]}"; do echo "Creating the Zip file binary in extension folder." cd "${TARGET_DIR}/${arch}" - zip -j "zip/${binary_name}.zip" "extensions/${binary_name}" + zip -r "zip/${binary_name}.zip" "extensions/${binary_name}" tar -czvf "zip/${binary_name}-${arch}.tar.gz" -C extensions "${binary_name}" status=$? if [ $status -ne 0 ]; then @@ -74,6 +74,7 @@ for arch in "${ARCHITECTURES[@]}"; do eu-central-1 us-west-1 us-west-2 + ca-west-1 ) echo "Using AWS_PROFILE: ${AWS_PROFILE}"