From 8347d72afd4a2b091ba09bafa4b3c41b2ed8cf6c Mon Sep 17 00:00:00 2001 From: yux0 Date: Tue, 3 Mar 2026 19:00:35 -0800 Subject: [PATCH 1/4] exclude ExecuteMultiOperation in health check --- common/rpc/interceptor/health_check.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/common/rpc/interceptor/health_check.go b/common/rpc/interceptor/health_check.go index 24f234fc2b..7680107aa5 100644 --- a/common/rpc/interceptor/health_check.go +++ b/common/rpc/interceptor/health_check.go @@ -51,7 +51,8 @@ var excludedAPIsForHealthSignal = map[string]struct{}{ // not history node health. With no workers polling, queries block ~30s until // context deadline, which can push AverageLatency() past the threshold // and cause healthy nodes to report HEALTH_STATE_NOT_SERVING. - "QueryWorkflow": {}, + "QueryWorkflow": {}, + "ExecuteMultiOperation": {}, } var getWorkflowExecutionHistoryAPI = "GetWorkflowExecutionHistory" From d8c8a3e3467b1e448bda3d4970c51a3dc0ecd677 Mon Sep 17 00:00:00 2001 From: yux0 Date: Wed, 4 Mar 2026 19:34:12 -0800 Subject: [PATCH 2/4] Move to use method options in proto --- api/common/v1/api_category.go-helpers.pb.go | 65 +++++ api/common/v1/api_category.pb.go | 230 ++++++++++++++++++ api/historyservice/v1/service.pb.go | 31 +-- common/rpc/interceptor/health_check.go | 77 +++--- .../server/api/common/v1/api_category.proto | 35 +++ .../api/historyservice/v1/service.proto | 9 + 6 files changed, 404 insertions(+), 43 deletions(-) create mode 100644 api/common/v1/api_category.go-helpers.pb.go create mode 100644 api/common/v1/api_category.pb.go create mode 100644 proto/internal/temporal/server/api/common/v1/api_category.proto diff --git a/api/common/v1/api_category.go-helpers.pb.go b/api/common/v1/api_category.go-helpers.pb.go new file mode 100644 index 0000000000..d105bd7080 --- /dev/null +++ b/api/common/v1/api_category.go-helpers.pb.go @@ -0,0 +1,65 @@ +// Code generated by protoc-gen-go-helpers. DO NOT EDIT. +package commonspb + +import ( + "fmt" + + "google.golang.org/protobuf/proto" +) + +// Marshal an object of type ApiCategoryOptions to the protobuf v3 wire format +func (val *ApiCategoryOptions) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type ApiCategoryOptions from the protobuf v3 wire format +func (val *ApiCategoryOptions) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *ApiCategoryOptions) Size() int { + return proto.Size(val) +} + +// Equal returns whether two ApiCategoryOptions values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *ApiCategoryOptions) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *ApiCategoryOptions + switch t := that.(type) { + case *ApiCategoryOptions: + that1 = t + case ApiCategoryOptions: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} + +var ( + ApiCategory_shorthandValue = map[string]int32{ + "Unspecified": 0, + "Standard": 1, + "LongPoll": 2, + "System": 3, + } +) + +// ApiCategoryFromString parses a ApiCategory value from either the protojson +// canonical SCREAMING_CASE enum or the traditional temporal PascalCase enum to ApiCategory +func ApiCategoryFromString(s string) (ApiCategory, error) { + if v, ok := ApiCategory_value[s]; ok { + return ApiCategory(v), nil + } else if v, ok := ApiCategory_shorthandValue[s]; ok { + return ApiCategory(v), nil + } + return ApiCategory(0), fmt.Errorf("%s is not a valid ApiCategory", s) +} diff --git a/api/common/v1/api_category.pb.go b/api/common/v1/api_category.pb.go new file mode 100644 index 0000000000..248dd81cee --- /dev/null +++ b/api/common/v1/api_category.pb.go @@ -0,0 +1,230 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// plugins: +// protoc-gen-go +// protoc +// source: temporal/server/api/common/v1/api_category.proto + +package commonspb + +import ( + reflect "reflect" + "strconv" + sync "sync" + unsafe "unsafe" + + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + descriptorpb "google.golang.org/protobuf/types/descriptorpb" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type ApiCategory int32 + +const ( + // Unspecified API category. Treated as standard API. + API_CATEGORY_UNSPECIFIED ApiCategory = 0 + // Standard API with typical request/response patterns. + API_CATEGORY_STANDARD ApiCategory = 1 + // Long-polling API that intentionally waits for state changes or external events. + // These APIs should be excluded from health signal tracking because their latency + // reflects client wait times and event availability rather than server health. + // Including them in health metrics would skew the data and could cause healthy + // nodes to appear unhealthy. + // + // Examples: PollMutableState, PollWorkflowExecutionUpdate, QueryWorkflow + API_CATEGORY_LONG_POLL ApiCategory = 2 + API_CATEGORY_SYSTEM ApiCategory = 3 +) + +// Enum value maps for ApiCategory. +var ( + ApiCategory_name = map[int32]string{ + 0: "API_CATEGORY_UNSPECIFIED", + 1: "API_CATEGORY_STANDARD", + 2: "API_CATEGORY_LONG_POLL", + 3: "API_CATEGORY_SYSTEM", + } + ApiCategory_value = map[string]int32{ + "API_CATEGORY_UNSPECIFIED": 0, + "API_CATEGORY_STANDARD": 1, + "API_CATEGORY_LONG_POLL": 2, + "API_CATEGORY_SYSTEM": 3, + } +) + +func (x ApiCategory) Enum() *ApiCategory { + p := new(ApiCategory) + *p = x + return p +} + +func (x ApiCategory) String() string { + switch x { + case API_CATEGORY_UNSPECIFIED: + return "Unspecified" + case API_CATEGORY_STANDARD: + return "Standard" + case API_CATEGORY_LONG_POLL: + return "LongPoll" + case API_CATEGORY_SYSTEM: + return "System" + default: + return strconv.Itoa(int(x)) + } + +} + +func (ApiCategory) Descriptor() protoreflect.EnumDescriptor { + return file_temporal_server_api_common_v1_api_category_proto_enumTypes[0].Descriptor() +} + +func (ApiCategory) Type() protoreflect.EnumType { + return &file_temporal_server_api_common_v1_api_category_proto_enumTypes[0] +} + +func (x ApiCategory) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use ApiCategory.Descriptor instead. +func (ApiCategory) EnumDescriptor() ([]byte, []int) { + return file_temporal_server_api_common_v1_api_category_proto_rawDescGZIP(), []int{0} +} + +type ApiCategoryOptions struct { + state protoimpl.MessageState `protogen:"open.v1"` + // The category of this API for health and observability purposes. + Category ApiCategory `protobuf:"varint,1,opt,name=category,proto3,enum=temporal.server.api.common.v1.ApiCategory" json:"category,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ApiCategoryOptions) Reset() { + *x = ApiCategoryOptions{} + mi := &file_temporal_server_api_common_v1_api_category_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ApiCategoryOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ApiCategoryOptions) ProtoMessage() {} + +func (x *ApiCategoryOptions) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_api_common_v1_api_category_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ApiCategoryOptions.ProtoReflect.Descriptor instead. +func (*ApiCategoryOptions) Descriptor() ([]byte, []int) { + return file_temporal_server_api_common_v1_api_category_proto_rawDescGZIP(), []int{0} +} + +func (x *ApiCategoryOptions) GetCategory() ApiCategory { + if x != nil { + return x.Category + } + return API_CATEGORY_UNSPECIFIED +} + +var file_temporal_server_api_common_v1_api_category_proto_extTypes = []protoimpl.ExtensionInfo{ + { + ExtendedType: (*descriptorpb.MethodOptions)(nil), + ExtensionType: (*ApiCategoryOptions)(nil), + Field: 50001, + Name: "temporal.server.api.common.v1.api_category", + Tag: "bytes,50001,opt,name=api_category", + Filename: "temporal/server/api/common/v1/api_category.proto", + }, +} + +// Extension fields to descriptorpb.MethodOptions. +var ( + // optional temporal.server.api.common.v1.ApiCategoryOptions api_category = 50001; + E_ApiCategory = &file_temporal_server_api_common_v1_api_category_proto_extTypes[0] +) + +var File_temporal_server_api_common_v1_api_category_proto protoreflect.FileDescriptor + +const file_temporal_server_api_common_v1_api_category_proto_rawDesc = "" + + "\n" + + "0temporal/server/api/common/v1/api_category.proto\x12\x1dtemporal.server.api.common.v1\x1a google/protobuf/descriptor.proto\"\\\n" + + "\x12ApiCategoryOptions\x12F\n" + + "\bcategory\x18\x01 \x01(\x0e2*.temporal.server.api.common.v1.ApiCategoryR\bcategory*{\n" + + "\vApiCategory\x12\x1c\n" + + "\x18API_CATEGORY_UNSPECIFIED\x10\x00\x12\x19\n" + + "\x15API_CATEGORY_STANDARD\x10\x01\x12\x1a\n" + + "\x16API_CATEGORY_LONG_POLL\x10\x02\x12\x17\n" + + "\x13API_CATEGORY_SYSTEM\x10\x03:y\n" + + "\fapi_category\x12\x1e.google.protobuf.MethodOptions\x18ц\x03 \x01(\v21.temporal.server.api.common.v1.ApiCategoryOptionsR\vapiCategory\x88\x01\x01B/Z-go.temporal.io/server/api/common/v1;commonspbb\x06proto3" + +var ( + file_temporal_server_api_common_v1_api_category_proto_rawDescOnce sync.Once + file_temporal_server_api_common_v1_api_category_proto_rawDescData []byte +) + +func file_temporal_server_api_common_v1_api_category_proto_rawDescGZIP() []byte { + file_temporal_server_api_common_v1_api_category_proto_rawDescOnce.Do(func() { + file_temporal_server_api_common_v1_api_category_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_temporal_server_api_common_v1_api_category_proto_rawDesc), len(file_temporal_server_api_common_v1_api_category_proto_rawDesc))) + }) + return file_temporal_server_api_common_v1_api_category_proto_rawDescData +} + +var file_temporal_server_api_common_v1_api_category_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_temporal_server_api_common_v1_api_category_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_temporal_server_api_common_v1_api_category_proto_goTypes = []any{ + (ApiCategory)(0), // 0: temporal.server.api.common.v1.ApiCategory + (*ApiCategoryOptions)(nil), // 1: temporal.server.api.common.v1.ApiCategoryOptions + (*descriptorpb.MethodOptions)(nil), // 2: google.protobuf.MethodOptions +} +var file_temporal_server_api_common_v1_api_category_proto_depIdxs = []int32{ + 0, // 0: temporal.server.api.common.v1.ApiCategoryOptions.category:type_name -> temporal.server.api.common.v1.ApiCategory + 2, // 1: temporal.server.api.common.v1.api_category:extendee -> google.protobuf.MethodOptions + 1, // 2: temporal.server.api.common.v1.api_category:type_name -> temporal.server.api.common.v1.ApiCategoryOptions + 3, // [3:3] is the sub-list for method output_type + 3, // [3:3] is the sub-list for method input_type + 2, // [2:3] is the sub-list for extension type_name + 1, // [1:2] is the sub-list for extension extendee + 0, // [0:1] is the sub-list for field type_name +} + +func init() { file_temporal_server_api_common_v1_api_category_proto_init() } +func file_temporal_server_api_common_v1_api_category_proto_init() { + if File_temporal_server_api_common_v1_api_category_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_temporal_server_api_common_v1_api_category_proto_rawDesc), len(file_temporal_server_api_common_v1_api_category_proto_rawDesc)), + NumEnums: 1, + NumMessages: 1, + NumExtensions: 1, + NumServices: 0, + }, + GoTypes: file_temporal_server_api_common_v1_api_category_proto_goTypes, + DependencyIndexes: file_temporal_server_api_common_v1_api_category_proto_depIdxs, + EnumInfos: file_temporal_server_api_common_v1_api_category_proto_enumTypes, + MessageInfos: file_temporal_server_api_common_v1_api_category_proto_msgTypes, + ExtensionInfos: file_temporal_server_api_common_v1_api_category_proto_extTypes, + }.Build() + File_temporal_server_api_common_v1_api_category_proto = out.File + file_temporal_server_api_common_v1_api_category_proto_goTypes = nil + file_temporal_server_api_common_v1_api_category_proto_depIdxs = nil +} diff --git a/api/historyservice/v1/service.pb.go b/api/historyservice/v1/service.pb.go index 1e62533311..2b9c1af4e1 100644 --- a/api/historyservice/v1/service.pb.go +++ b/api/historyservice/v1/service.pb.go @@ -10,6 +10,7 @@ import ( reflect "reflect" unsafe "unsafe" + _ "go.temporal.io/server/api/common/v1" protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" ) @@ -25,11 +26,11 @@ var File_temporal_server_api_historyservice_v1_service_proto protoreflect.FileDe const file_temporal_server_api_historyservice_v1_service_proto_rawDesc = "" + "\n" + - "3temporal/server/api/historyservice/v1/service.proto\x12%temporal.server.api.historyservice.v1\x1a.temporal.server.api.historyservice.v1.GetMutableStateResponse\"\x00\x12\x95\x01\n" + - "\x10PollMutableState\x12>.temporal.server.api.historyservice.v1.PollMutableStateRequest\x1a?.temporal.server.api.historyservice.v1.PollMutableStateResponse\"\x00\x12\xa1\x01\n" + + "\x0fGetMutableState\x12=.temporal.server.api.historyservice.v1.GetMutableStateRequest\x1a>.temporal.server.api.historyservice.v1.GetMutableStateResponse\"\x00\x12\x9b\x01\n" + + "\x10PollMutableState\x12>.temporal.server.api.historyservice.v1.PollMutableStateRequest\x1a?.temporal.server.api.historyservice.v1.PollMutableStateResponse\"\x06\x8a\xb5\x18\x02\b\x02\x12\xa1\x01\n" + "\x14ResetStickyTaskQueue\x12B.temporal.server.api.historyservice.v1.ResetStickyTaskQueueRequest\x1aC.temporal.server.api.historyservice.v1.ResetStickyTaskQueueResponse\"\x00\x12\xb0\x01\n" + "\x19RecordWorkflowTaskStarted\x12G.temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest\x1aH.temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse\"\x00\x12\xb0\x01\n" + "\x19RecordActivityTaskStarted\x12G.temporal.server.api.historyservice.v1.RecordActivityTaskStartedRequest\x1aH.temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse\"\x00\x12\xb9\x01\n" + @@ -42,8 +43,8 @@ const file_temporal_server_api_historyservice_v1_service_proto_rawDesc = "" + "\x1bRespondActivityTaskCanceled\x12I.temporal.server.api.historyservice.v1.RespondActivityTaskCanceledRequest\x1aJ.temporal.server.api.historyservice.v1.RespondActivityTaskCanceledResponse\"\x00\x12\x9e\x01\n" + "\x13IsActivityTaskValid\x12A.temporal.server.api.historyservice.v1.IsActivityTaskValidRequest\x1aB.temporal.server.api.historyservice.v1.IsActivityTaskValidResponse\"\x00\x12\xaa\x01\n" + "\x17SignalWorkflowExecution\x12E.temporal.server.api.historyservice.v1.SignalWorkflowExecutionRequest\x1aF.temporal.server.api.historyservice.v1.SignalWorkflowExecutionResponse\"\x00\x12\xc5\x01\n" + - " SignalWithStartWorkflowExecution\x12N.temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionRequest\x1aO.temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionResponse\"\x00\x12\xa4\x01\n" + - "\x15ExecuteMultiOperation\x12C.temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest\x1aD.temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse\"\x00\x12\xad\x01\n" + + " SignalWithStartWorkflowExecution\x12N.temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionRequest\x1aO.temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionResponse\"\x00\x12\xaa\x01\n" + + "\x15ExecuteMultiOperation\x12C.temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest\x1aD.temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse\"\x06\x8a\xb5\x18\x02\b\x02\x12\xad\x01\n" + "\x18RemoveSignalMutableState\x12F.temporal.server.api.historyservice.v1.RemoveSignalMutableStateRequest\x1aG.temporal.server.api.historyservice.v1.RemoveSignalMutableStateResponse\"\x00\x12\xb3\x01\n" + "\x1aTerminateWorkflowExecution\x12H.temporal.server.api.historyservice.v1.TerminateWorkflowExecutionRequest\x1aI.temporal.server.api.historyservice.v1.TerminateWorkflowExecutionResponse\"\x00\x12\xaa\x01\n" + "\x17DeleteWorkflowExecution\x12E.temporal.server.api.historyservice.v1.DeleteWorkflowExecutionRequest\x1aF.temporal.server.api.historyservice.v1.DeleteWorkflowExecutionResponse\"\x00\x12\xa7\x01\n" + @@ -67,8 +68,8 @@ const file_temporal_server_api_historyservice_v1_service_proto_rawDesc = "" + "\n" + "RemoveTask\x128.temporal.server.api.historyservice.v1.RemoveTaskRequest\x1a9.temporal.server.api.historyservice.v1.RemoveTaskResponse\"\x00\x12\xa7\x01\n" + "\x16GetReplicationMessages\x12D.temporal.server.api.historyservice.v1.GetReplicationMessagesRequest\x1aE.temporal.server.api.historyservice.v1.GetReplicationMessagesResponse\"\x00\x12\xb0\x01\n" + - "\x19GetDLQReplicationMessages\x12G.temporal.server.api.historyservice.v1.GetDLQReplicationMessagesRequest\x1aH.temporal.server.api.historyservice.v1.GetDLQReplicationMessagesResponse\"\x00\x12\x8c\x01\n" + - "\rQueryWorkflow\x12;.temporal.server.api.historyservice.v1.QueryWorkflowRequest\x1a<.temporal.server.api.historyservice.v1.QueryWorkflowResponse\"\x00\x12\x8c\x01\n" + + "\x19GetDLQReplicationMessages\x12G.temporal.server.api.historyservice.v1.GetDLQReplicationMessagesRequest\x1aH.temporal.server.api.historyservice.v1.GetDLQReplicationMessagesResponse\"\x00\x12\x92\x01\n" + + "\rQueryWorkflow\x12;.temporal.server.api.historyservice.v1.QueryWorkflowRequest\x1a<.temporal.server.api.historyservice.v1.QueryWorkflowResponse\"\x06\x8a\xb5\x18\x02\b\x02\x12\x8c\x01\n" + "\rReapplyEvents\x12;.temporal.server.api.historyservice.v1.ReapplyEventsRequest\x1a<.temporal.server.api.historyservice.v1.ReapplyEventsResponse\"\x00\x12\x8f\x01\n" + "\x0eGetDLQMessages\x12<.temporal.server.api.historyservice.v1.GetDLQMessagesRequest\x1a=.temporal.server.api.historyservice.v1.GetDLQMessagesResponse\"\x00\x12\x95\x01\n" + "\x10PurgeDLQMessages\x12>.temporal.server.api.historyservice.v1.PurgeDLQMessagesRequest\x1a?.temporal.server.api.historyservice.v1.PurgeDLQMessagesResponse\"\x00\x12\x95\x01\n" + @@ -78,25 +79,25 @@ const file_temporal_server_api_historyservice_v1_service_proto_rawDesc = "" + "\x14GetReplicationStatus\x12B.temporal.server.api.historyservice.v1.GetReplicationStatusRequest\x1aC.temporal.server.api.historyservice.v1.GetReplicationStatusResponse\"\x00\x12\x9e\x01\n" + "\x13RebuildMutableState\x12A.temporal.server.api.historyservice.v1.RebuildMutableStateRequest\x1aB.temporal.server.api.historyservice.v1.RebuildMutableStateResponse\"\x00\x12\xaa\x01\n" + "\x17ImportWorkflowExecution\x12E.temporal.server.api.historyservice.v1.ImportWorkflowExecutionRequest\x1aF.temporal.server.api.historyservice.v1.ImportWorkflowExecutionResponse\"\x00\x12\xbf\x01\n" + - "\x1eDeleteWorkflowVisibilityRecord\x12L.temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordRequest\x1aM.temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordResponse\"\x00\x12\xaa\x01\n" + - "\x17UpdateWorkflowExecution\x12E.temporal.server.api.historyservice.v1.UpdateWorkflowExecutionRequest\x1aF.temporal.server.api.historyservice.v1.UpdateWorkflowExecutionResponse\"\x00\x12\xb6\x01\n" + - "\x1bPollWorkflowExecutionUpdate\x12I.temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateRequest\x1aJ.temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateResponse\"\x00\x12\xcc\x01\n" + + "\x1eDeleteWorkflowVisibilityRecord\x12L.temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordRequest\x1aM.temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordResponse\"\x00\x12\xb0\x01\n" + + "\x17UpdateWorkflowExecution\x12E.temporal.server.api.historyservice.v1.UpdateWorkflowExecutionRequest\x1aF.temporal.server.api.historyservice.v1.UpdateWorkflowExecutionResponse\"\x06\x8a\xb5\x18\x02\b\x02\x12\xbc\x01\n" + + "\x1bPollWorkflowExecutionUpdate\x12I.temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateRequest\x1aJ.temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateResponse\"\x06\x8a\xb5\x18\x02\b\x02\x12\xcc\x01\n" + "!StreamWorkflowReplicationMessages\x12O.temporal.server.api.historyservice.v1.StreamWorkflowReplicationMessagesRequest\x1aP.temporal.server.api.historyservice.v1.StreamWorkflowReplicationMessagesResponse\"\x00(\x010\x01\x12\xb6\x01\n" + "\x1bGetWorkflowExecutionHistory\x12I.temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryRequest\x1aJ.temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponse\"\x00\x12\xcb\x01\n" + "\"GetWorkflowExecutionHistoryReverse\x12P.temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseRequest\x1aQ.temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseResponse\"\x00\x12\xc5\x01\n" + " GetWorkflowExecutionRawHistoryV2\x12N.temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Request\x1aO.temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Response\"\x00\x12\xbf\x01\n" + "\x1eGetWorkflowExecutionRawHistory\x12L.temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryRequest\x1aM.temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryResponse\"\x00\x12\xb9\x01\n" + - "\x1cForceDeleteWorkflowExecution\x12J.temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionRequest\x1aK.temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionResponse\"\x00\x12\x86\x01\n" + - "\vGetDLQTasks\x129.temporal.server.api.historyservice.v1.GetDLQTasksRequest\x1a:.temporal.server.api.historyservice.v1.GetDLQTasksResponse\"\x00\x12\x8f\x01\n" + - "\x0eDeleteDLQTasks\x12<.temporal.server.api.historyservice.v1.DeleteDLQTasksRequest\x1a=.temporal.server.api.historyservice.v1.DeleteDLQTasksResponse\"\x00\x12\x83\x01\n" + + "\x1cForceDeleteWorkflowExecution\x12J.temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionRequest\x1aK.temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionResponse\"\x00\x12\x8c\x01\n" + + "\vGetDLQTasks\x129.temporal.server.api.historyservice.v1.GetDLQTasksRequest\x1a:.temporal.server.api.historyservice.v1.GetDLQTasksResponse\"\x06\x8a\xb5\x18\x02\b\x03\x12\x95\x01\n" + + "\x0eDeleteDLQTasks\x12<.temporal.server.api.historyservice.v1.DeleteDLQTasksRequest\x1a=.temporal.server.api.historyservice.v1.DeleteDLQTasksResponse\"\x06\x8a\xb5\x18\x02\b\x03\x12\x83\x01\n" + "\n" + "ListQueues\x128.temporal.server.api.historyservice.v1.ListQueuesRequest\x1a9.temporal.server.api.historyservice.v1.ListQueuesResponse\"\x00\x12}\n" + "\bAddTasks\x126.temporal.server.api.historyservice.v1.AddTasksRequest\x1a7.temporal.server.api.historyservice.v1.AddTasksResponse\"\x00\x12\x80\x01\n" + "\tListTasks\x127.temporal.server.api.historyservice.v1.ListTasksRequest\x1a8.temporal.server.api.historyservice.v1.ListTasksResponse\"\x00\x12\xa7\x01\n" + "\x16CompleteNexusOperation\x12D.temporal.server.api.historyservice.v1.CompleteNexusOperationRequest\x1aE.temporal.server.api.historyservice.v1.CompleteNexusOperationResponse\"\x00\x12\xb6\x01\n" + "\x1bCompleteNexusOperationChasm\x12I.temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest\x1aJ.temporal.server.api.historyservice.v1.CompleteNexusOperationChasmResponse\"\x00\x12\xad\x01\n" + - "\x18InvokeStateMachineMethod\x12F.temporal.server.api.historyservice.v1.InvokeStateMachineMethodRequest\x1aG.temporal.server.api.historyservice.v1.InvokeStateMachineMethodResponse\"\x00\x12\x92\x01\n" + - "\x0fDeepHealthCheck\x12=.temporal.server.api.historyservice.v1.DeepHealthCheckRequest\x1a>.temporal.server.api.historyservice.v1.DeepHealthCheckResponse\"\x00\x12\x98\x01\n" + + "\x18InvokeStateMachineMethod\x12F.temporal.server.api.historyservice.v1.InvokeStateMachineMethodRequest\x1aG.temporal.server.api.historyservice.v1.InvokeStateMachineMethodResponse\"\x00\x12\x98\x01\n" + + "\x0fDeepHealthCheck\x12=.temporal.server.api.historyservice.v1.DeepHealthCheckRequest\x1a>.temporal.server.api.historyservice.v1.DeepHealthCheckResponse\"\x06\x8a\xb5\x18\x02\b\x03\x12\x98\x01\n" + "\x11SyncWorkflowState\x12?.temporal.server.api.historyservice.v1.SyncWorkflowStateRequest\x1a@.temporal.server.api.historyservice.v1.SyncWorkflowStateResponse\"\x00\x12\xa4\x01\n" + "\x15UpdateActivityOptions\x12C.temporal.server.api.historyservice.v1.UpdateActivityOptionsRequest\x1aD.temporal.server.api.historyservice.v1.UpdateActivityOptionsResponse\"\x00\x12\x8c\x01\n" + "\rPauseActivity\x12;.temporal.server.api.historyservice.v1.PauseActivityRequest\x1a<.temporal.server.api.historyservice.v1.PauseActivityResponse\"\x00\x12\x92\x01\n" + diff --git a/common/rpc/interceptor/health_check.go b/common/rpc/interceptor/health_check.go index 7680107aa5..b58bd0bba2 100644 --- a/common/rpc/interceptor/health_check.go +++ b/common/rpc/interceptor/health_check.go @@ -2,16 +2,20 @@ package interceptor import ( "context" + "fmt" "time" "go.temporal.io/api/serviceerror" - "go.temporal.io/server/api/historyservice/v1" + commonspb "go.temporal.io/server/api/common/v1" + historyservicepb "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/common" "go.temporal.io/server/common/aggregate" - "go.temporal.io/server/common/api" "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/log" "google.golang.org/grpc" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/types/descriptorpb" ) type ( @@ -41,21 +45,46 @@ type ( } ) -var excludedAPIsForHealthSignal = map[string]struct{}{ - "DeepHealthCheck": {}, - "PollMutableState": {}, - "PollWorkflowExecutionUpdate": {}, - "PollWorkflowExecutionHistory": {}, - "UpdateWorkflowExecution": {}, - // QueryWorkflow is excluded because its latency reflects worker availability, - // not history node health. With no workers polling, queries block ~30s until - // context deadline, which can push AverageLatency() past the threshold - // and cause healthy nodes to report HEALTH_STATE_NOT_SERVING. - "QueryWorkflow": {}, - "ExecuteMultiOperation": {}, +// longPollAPIs maps full method names to true if they should be excluded from health signals. +// This includes both long-polling APIs and admin APIs. +// Built at init time from proto method options. +var excludedAPIs = make(map[string]bool) + +func init() { + // Categories to exclude from health signal tracking + excludedCategories := map[commonspb.ApiCategory]bool{ + commonspb.API_CATEGORY_LONG_POLL: true, + commonspb.API_CATEGORY_SYSTEM: true, + } + + // Process HistoryService + processServiceFile(historyservicepb.File_temporal_server_api_historyservice_v1_service_proto, excludedCategories) } -var getWorkflowExecutionHistoryAPI = "GetWorkflowExecutionHistory" +// processServiceFile enumerates all methods in a service file and adds excluded categories to longPollAPIs +func processServiceFile(file protoreflect.FileDescriptor, excludedCategories map[commonspb.ApiCategory]bool) { + services := file.Services() + for i := 0; i < services.Len(); i++ { + service := services.Get(i) + methods := service.Methods() + for j := 0; j < methods.Len(); j++ { + method := methods.Get(j) + opts := method.Options().(*descriptorpb.MethodOptions) + if proto.HasExtension(opts, commonspb.E_ApiCategory) { + categoryOpts := proto.GetExtension(opts, commonspb.E_ApiCategory).(*commonspb.ApiCategoryOptions) + if categoryOpts != nil && excludedCategories[categoryOpts.GetCategory()] { + fullMethod := fmt.Sprintf("/%s/%s", service.FullName(), method.Name()) + excludedAPIs[fullMethod] = true + } + } + } + } +} + +// isExcludedAPI checks if an API is marked as a non-standard API via proto options +func isExcludedAPI(fullMethod string) bool { + return excludedAPIs[fullMethod] +} // NewHealthCheckInterceptor creates a new health check interceptor func NewHealthCheckInterceptor(healthSignalAggregator HealthSignalAggregator) *HealthCheckInterceptor { @@ -75,21 +104,13 @@ func (h *HealthCheckInterceptor) UnaryIntercept( resp, err := handler(ctx, req) elapsed := time.Since(startTime) - // Skip health check recording for specific methods - methodName := api.MethodName(info.FullMethod) - - // Skip GetWorkflowExecutionHistory polling request - if methodName == getWorkflowExecutionHistoryAPI { - if request, ok := req.(*historyservice.GetWorkflowExecutionHistoryRequest); ok { - if r := request.GetRequest(); r != nil && r.GetWaitNewEvent() { - return resp, err - } - } + // Skip health signal recording for non-standard APIs + if isExcludedAPI(info.FullMethod) { + return resp, err } - if _, ok := excludedAPIsForHealthSignal[methodName]; !ok { - h.healthSignalAggregator.Record(elapsed, err) - } + // Record health signal for standard APIs + h.healthSignalAggregator.Record(elapsed, err) return resp, err } diff --git a/proto/internal/temporal/server/api/common/v1/api_category.proto b/proto/internal/temporal/server/api/common/v1/api_category.proto new file mode 100644 index 0000000000..b6d40e6068 --- /dev/null +++ b/proto/internal/temporal/server/api/common/v1/api_category.proto @@ -0,0 +1,35 @@ +syntax = "proto3"; + +package temporal.server.api.common.v1; + +option go_package = "go.temporal.io/server/api/common/v1;commonspb"; + +import "google/protobuf/descriptor.proto"; + +extend google.protobuf.MethodOptions { + optional ApiCategoryOptions api_category = 50001; +} + +message ApiCategoryOptions { + // The category of this API for health and observability purposes. + ApiCategory category = 1; +} + +enum ApiCategory { + // Unspecified API category. Treated as standard API. + API_CATEGORY_UNSPECIFIED = 0; + + // Standard API with typical request/response patterns. + API_CATEGORY_STANDARD = 1; + + // Long-polling API that intentionally waits for state changes or external events. + // These APIs should be excluded from health signal tracking because their latency + // reflects client wait times and event availability rather than server health. + // Including them in health metrics would skew the data and could cause healthy + // nodes to appear unhealthy. + // + // Examples: PollMutableState, PollWorkflowExecutionUpdate, QueryWorkflow + API_CATEGORY_LONG_POLL = 2; + + API_CATEGORY_SYSTEM = 3; +} diff --git a/proto/internal/temporal/server/api/historyservice/v1/service.proto b/proto/internal/temporal/server/api/historyservice/v1/service.proto index 994b609ca8..2b75d59d22 100644 --- a/proto/internal/temporal/server/api/historyservice/v1/service.proto +++ b/proto/internal/temporal/server/api/historyservice/v1/service.proto @@ -4,6 +4,7 @@ package temporal.server.api.historyservice.v1; option go_package = "go.temporal.io/server/api/historyservice/v1;historyservice"; import "temporal/server/api/historyservice/v1/request_response.proto"; +import "temporal/server/api/common/v1/api_category.proto"; // HistoryService provides API to start a new long running workflow instance, as well as query and update the history // of workflow instances already created. @@ -25,6 +26,7 @@ service HistoryService { // It fails with 'EntityNotExistError' if specified workflow execution in unknown to the service. // It returns CurrentBranchChangedError if the workflow version branch has changed. rpc PollMutableState (PollMutableStateRequest) returns (PollMutableStateResponse) { + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_LONG_POLL; } // Reset the sticky task queue related information in mutable state of a given workflow. @@ -118,6 +120,7 @@ service HistoryService { // ExecuteMultiOperation executes multiple operations within a single workflow. rpc ExecuteMultiOperation (ExecuteMultiOperationRequest) returns (ExecuteMultiOperationResponse) { + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_LONG_POLL; } // RemoveSignalMutableState is used to remove a signal request Id that was previously recorded. This is currently @@ -236,6 +239,7 @@ service HistoryService { // QueryWorkflow returns query result for a specified workflow execution. rpc QueryWorkflow (QueryWorkflowRequest) returns (QueryWorkflowResponse) { + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_LONG_POLL; } // ReapplyEvents applies stale events to the current workflow and current run. @@ -286,11 +290,13 @@ service HistoryService { // (-- api-linter: core::0134=disabled // aip.dev/not-precedent: This service does not follow the update method API --) rpc UpdateWorkflowExecution(UpdateWorkflowExecutionRequest) returns (UpdateWorkflowExecutionResponse) { + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_LONG_POLL; } // (-- api-linter: core::0134=disabled // aip.dev/not-precedent: This service does not follow the update method API --) rpc PollWorkflowExecutionUpdate(PollWorkflowExecutionUpdateRequest) returns (PollWorkflowExecutionUpdateResponse){ + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_LONG_POLL; } rpc StreamWorkflowReplicationMessages(stream StreamWorkflowReplicationMessagesRequest) returns (stream StreamWorkflowReplicationMessagesResponse) { @@ -312,9 +318,11 @@ service HistoryService { } rpc GetDLQTasks (GetDLQTasksRequest) returns (GetDLQTasksResponse) { + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_SYSTEM; } rpc DeleteDLQTasks (DeleteDLQTasksRequest) returns (DeleteDLQTasksResponse) { + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_SYSTEM; } rpc ListQueues (ListQueuesRequest) returns (ListQueuesResponse) { @@ -351,6 +359,7 @@ service HistoryService { // Deep health check history service dependencies health status rpc DeepHealthCheck (DeepHealthCheckRequest) returns (DeepHealthCheckResponse) { + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_SYSTEM; } rpc SyncWorkflowState(SyncWorkflowStateRequest) returns (SyncWorkflowStateResponse) { From a99766ed31c5fec0f4992306eb2404b84b432505 Mon Sep 17 00:00:00 2001 From: yux0 Date: Wed, 4 Mar 2026 19:42:49 -0800 Subject: [PATCH 3/4] fix type assertion --- common/rpc/interceptor/health_check.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/common/rpc/interceptor/health_check.go b/common/rpc/interceptor/health_check.go index b58bd0bba2..87a590d7a9 100644 --- a/common/rpc/interceptor/health_check.go +++ b/common/rpc/interceptor/health_check.go @@ -69,10 +69,10 @@ func processServiceFile(file protoreflect.FileDescriptor, excludedCategories map methods := service.Methods() for j := 0; j < methods.Len(); j++ { method := methods.Get(j) - opts := method.Options().(*descriptorpb.MethodOptions) - if proto.HasExtension(opts, commonspb.E_ApiCategory) { - categoryOpts := proto.GetExtension(opts, commonspb.E_ApiCategory).(*commonspb.ApiCategoryOptions) - if categoryOpts != nil && excludedCategories[categoryOpts.GetCategory()] { + opts, ok := method.Options().(*descriptorpb.MethodOptions) + if ok && proto.HasExtension(opts, commonspb.E_ApiCategory) { + categoryOpts, ok := proto.GetExtension(opts, commonspb.E_ApiCategory).(*commonspb.ApiCategoryOptions) + if ok && categoryOpts != nil && excludedCategories[categoryOpts.GetCategory()] { fullMethod := fmt.Sprintf("/%s/%s", service.FullName(), method.Name()) excludedAPIs[fullMethod] = true } From 9e7fd0ae2b97e132c34d6e7c19a4d64d14fafb87 Mon Sep 17 00:00:00 2001 From: yux0 Date: Wed, 4 Mar 2026 20:56:43 -0800 Subject: [PATCH 4/4] lint --- common/rpc/interceptor/health_check.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/rpc/interceptor/health_check.go b/common/rpc/interceptor/health_check.go index 87a590d7a9..a02a16a5cd 100644 --- a/common/rpc/interceptor/health_check.go +++ b/common/rpc/interceptor/health_check.go @@ -7,7 +7,7 @@ import ( "go.temporal.io/api/serviceerror" commonspb "go.temporal.io/server/api/common/v1" - historyservicepb "go.temporal.io/server/api/historyservice/v1" + "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/common" "go.temporal.io/server/common/aggregate" "go.temporal.io/server/common/dynamicconfig" @@ -58,7 +58,7 @@ func init() { } // Process HistoryService - processServiceFile(historyservicepb.File_temporal_server_api_historyservice_v1_service_proto, excludedCategories) + processServiceFile(historyservice.File_temporal_server_api_historyservice_v1_service_proto, excludedCategories) } // processServiceFile enumerates all methods in a service file and adds excluded categories to longPollAPIs