diff --git a/libs/domains/observability/data-access/src/lib/domains-observability-data-access.ts b/libs/domains/observability/data-access/src/lib/domains-observability-data-access.ts index ef80bafcc73..6f6cf6de864 100644 --- a/libs/domains/observability/data-access/src/lib/domains-observability-data-access.ts +++ b/libs/domains/observability/data-access/src/lib/domains-observability-data-access.ts @@ -91,6 +91,41 @@ export const observability = createQueryKeys('observability', { return response.data.metrics && (JSON.parse(response.data.metrics).data[0] as string) }, }), + httpRouteName: ({ + clusterId, + serviceId, + startDate, + endDate, + }: { + clusterId: string + serviceId: string + startDate: string + endDate: string + }) => ({ + queryKey: ['httpPortName', clusterId, serviceId], + async queryFn() { + const endpoint = `api/v1/label/httproute_name/values?match[]=kube_httproute_labels{qovery_com_associated_service_id="${serviceId}"}` + const response = await clusterApi.getClusterMetrics( + clusterId, + endpoint, + endpoint, + '', + startDate, + endDate, + undefined, + undefined, + undefined, + 'True', + 'True', + undefined, + 'prometheus', + 'false', + 'service_overview', + 'httpRouteName' + ) + return response.data.metrics && (JSON.parse(response.data.metrics).data[0] as string) + }, + }), hpaName: ({ clusterId, serviceId, diff --git a/libs/domains/observability/feature/src/lib/hooks/use-http-route-name/use-http-route-name.ts b/libs/domains/observability/feature/src/lib/hooks/use-http-route-name/use-http-route-name.ts new file mode 100644 index 00000000000..1ae80987137 --- /dev/null +++ b/libs/domains/observability/feature/src/lib/hooks/use-http-route-name/use-http-route-name.ts @@ -0,0 +1,20 @@ +import { useQuery } from '@tanstack/react-query' +import { observability } from '@qovery/domains/observability/data-access' + +export interface UseHttpRouteNameProps { + clusterId: string + serviceId: string + startDate: string + endDate: string + enabled?: boolean +} + +// Retrieves the http route name associated with a specific service (http managed by envoy) +export function useHttpRouteName({ clusterId, serviceId, enabled = true, startDate, endDate }: UseHttpRouteNameProps) { + return useQuery({ + ...observability.httpRouteName({ clusterId, serviceId, startDate, endDate }), + enabled: enabled && Boolean(clusterId && serviceId), + }) +} + +export default useHttpRouteName diff --git a/libs/domains/observability/feature/src/lib/hooks/use-metrics/use-metrics.ts b/libs/domains/observability/feature/src/lib/hooks/use-metrics/use-metrics.ts index e7071545ac9..5e4dc46af6f 100644 --- a/libs/domains/observability/feature/src/lib/hooks/use-metrics/use-metrics.ts +++ b/libs/domains/observability/feature/src/lib/hooks/use-metrics/use-metrics.ts @@ -23,6 +23,7 @@ interface UseMetricsProps { overriddenMaxPoints?: number boardShortName: 'service_overview' | 'rds_overview' metricShortName: string + enabled?: boolean } function useLiveUpdateSetting(): boolean { @@ -42,6 +43,7 @@ export function useMetrics({ overriddenMaxPoints, boardShortName, metricShortName, + enabled = true, }: UseMetricsProps) { // Get context and live update setting, but allow override const context = useDashboardContext() @@ -87,6 +89,7 @@ export function useMetrics({ traceId: context.traceId, alignedRange, }), + enabled, keepPreviousData: true, refetchInterval: finalLiveUpdateEnabled ? 30_000 : false, // Refetch every 30 seconds only if live update is enabled refetchIntervalInBackground: finalLiveUpdateEnabled, diff --git a/libs/domains/observability/feature/src/lib/service/service-dashboard/card-http-errors/card-http-errors.spec.tsx b/libs/domains/observability/feature/src/lib/service/service-dashboard/card-http-errors/card-http-errors.spec.tsx index 37046d86a1b..0d142f1efa0 100644 --- a/libs/domains/observability/feature/src/lib/service/service-dashboard/card-http-errors/card-http-errors.spec.tsx +++ b/libs/domains/observability/feature/src/lib/service/service-dashboard/card-http-errors/card-http-errors.spec.tsx @@ -32,6 +32,7 @@ describe('CardHTTPErrors', () => { clusterId: 'test-cluster-id', containerName: 'test-container-name', ingressName: 'test-ingress-name', + httpRouteName: 'test-httproute-name', } beforeEach(() => { @@ -39,6 +40,7 @@ describe('CardHTTPErrors', () => { }) it('should render successfully with loading state', () => { + // Mock all 4 calls (nginx error, nginx total, envoy error, envoy total) as loading useInstantMetrics.mockReturnValue(createMockUseInstantMetricsReturn(undefined, true)) const { baseElement } = renderWithProviders( @@ -52,6 +54,7 @@ describe('CardHTTPErrors', () => { it('should render with no errors (GREEN status)', () => { useInstantMetrics + // NGINX error requests .mockReturnValueOnce( createMockUseInstantMetricsReturn({ data: { @@ -63,6 +66,31 @@ describe('CardHTTPErrors', () => { }, }) ) + // NGINX total requests + .mockReturnValueOnce( + createMockUseInstantMetricsReturn({ + data: { + result: [ + { + value: [1234567890, '100'], + }, + ], + }, + }) + ) + // ENVOY error requests + .mockReturnValueOnce( + createMockUseInstantMetricsReturn({ + data: { + result: [ + { + value: [1234567890, '0'], + }, + ], + }, + }) + ) + // ENVOY total requests .mockReturnValueOnce( createMockUseInstantMetricsReturn({ data: { @@ -87,6 +115,7 @@ describe('CardHTTPErrors', () => { it('should render with errors (RED status) and show modal link', () => { useInstantMetrics + // NGINX error requests .mockReturnValueOnce( createMockUseInstantMetricsReturn({ data: { @@ -98,6 +127,7 @@ describe('CardHTTPErrors', () => { }, }) ) + // NGINX total requests .mockReturnValueOnce( createMockUseInstantMetricsReturn({ data: { @@ -109,6 +139,30 @@ describe('CardHTTPErrors', () => { }, }) ) + // ENVOY error requests + .mockReturnValueOnce( + createMockUseInstantMetricsReturn({ + data: { + result: [ + { + value: [1234567890, '0'], + }, + ], + }, + }) + ) + // ENVOY total requests + .mockReturnValueOnce( + createMockUseInstantMetricsReturn({ + data: { + result: [ + { + value: [1234567890, '0'], + }, + ], + }, + }) + ) renderWithProviders( @@ -125,6 +179,23 @@ describe('CardHTTPErrors', () => { it('should handle empty metrics data', () => { useInstantMetrics + // NGINX error requests + .mockReturnValueOnce( + createMockUseInstantMetricsReturn({ + data: { + result: [], + }, + }) + ) + // NGINX total requests + .mockReturnValueOnce( + createMockUseInstantMetricsReturn({ + data: { + result: [], + }, + }) + ) + // ENVOY error requests .mockReturnValueOnce( createMockUseInstantMetricsReturn({ data: { @@ -132,6 +203,7 @@ describe('CardHTTPErrors', () => { }, }) ) + // ENVOY total requests .mockReturnValueOnce( createMockUseInstantMetricsReturn({ data: { @@ -151,7 +223,13 @@ describe('CardHTTPErrors', () => { it('should handle undefined metrics data', () => { useInstantMetrics + // NGINX error requests + .mockReturnValueOnce(createMockUseInstantMetricsReturn()) + // NGINX total requests + .mockReturnValueOnce(createMockUseInstantMetricsReturn()) + // ENVOY error requests .mockReturnValueOnce(createMockUseInstantMetricsReturn()) + // ENVOY total requests .mockReturnValueOnce(createMockUseInstantMetricsReturn()) renderWithProviders( @@ -167,7 +245,9 @@ describe('CardHTTPErrors', () => { let callCount = 0 useInstantMetrics.mockImplementation(() => { callCount++ + // Calls: 1=nginx errors, 2=nginx total, 3=envoy errors, 4=envoy total if (callCount === 1) { + // NGINX error requests return createMockUseInstantMetricsReturn({ data: { result: [ @@ -177,7 +257,8 @@ describe('CardHTTPErrors', () => { ], }, }) - } else { + } else if (callCount === 2) { + // NGINX total requests return createMockUseInstantMetricsReturn({ data: { result: [ @@ -187,6 +268,28 @@ describe('CardHTTPErrors', () => { ], }, }) + } else if (callCount === 3) { + // ENVOY error requests + return createMockUseInstantMetricsReturn({ + data: { + result: [ + { + value: [1234567890, '0'], + }, + ], + }, + }) + } else { + // ENVOY total requests + return createMockUseInstantMetricsReturn({ + data: { + result: [ + { + value: [1234567890, '0'], + }, + ], + }, + }) } }) @@ -209,7 +312,13 @@ describe('CardHTTPErrors', () => { it('should call useInstantMetrics with correct parameters', () => { useInstantMetrics + // NGINX error requests .mockReturnValueOnce(createMockUseInstantMetricsReturn()) + // NGINX total requests + .mockReturnValueOnce(createMockUseInstantMetricsReturn()) + // ENVOY error requests + .mockReturnValueOnce(createMockUseInstantMetricsReturn()) + // ENVOY total requests .mockReturnValueOnce(createMockUseInstantMetricsReturn()) renderWithProviders( @@ -218,7 +327,10 @@ describe('CardHTTPErrors', () => { ) - expect(useInstantMetrics).toHaveBeenCalledTimes(2) + // Now called 4 times: nginx errors, nginx total, envoy errors, envoy total + expect(useInstantMetrics).toHaveBeenCalledTimes(4) + + // Check nginx calls expect(useInstantMetrics).toHaveBeenCalledWith({ clusterId: 'test-cluster-id', query: expect.stringContaining('nginx:req_inc:5m'), @@ -230,10 +342,15 @@ describe('CardHTTPErrors', () => { const calledQuery = useInstantMetrics.mock.calls[0][0].query expect(calledQuery).toContain('test-ingress-name') + + // Check envoy calls + const envoyQuery = useInstantMetrics.mock.calls[2][0].query + expect(envoyQuery).toContain('test-httproute-name') }) it('should not show modal link when there are no errors', () => { useInstantMetrics + // NGINX error requests .mockReturnValueOnce( createMockUseInstantMetricsReturn({ data: { @@ -245,6 +362,7 @@ describe('CardHTTPErrors', () => { }, }) ) + // NGINX total requests .mockReturnValueOnce( createMockUseInstantMetricsReturn({ data: { @@ -256,6 +374,30 @@ describe('CardHTTPErrors', () => { }, }) ) + // ENVOY error requests + .mockReturnValueOnce( + createMockUseInstantMetricsReturn({ + data: { + result: [ + { + value: [1234567890, '0'], + }, + ], + }, + }) + ) + // ENVOY total requests + .mockReturnValueOnce( + createMockUseInstantMetricsReturn({ + data: { + result: [ + { + value: [1234567890, '0'], + }, + ], + }, + }) + ) renderWithProviders( diff --git a/libs/domains/observability/feature/src/lib/service/service-dashboard/card-http-errors/card-http-errors.tsx b/libs/domains/observability/feature/src/lib/service/service-dashboard/card-http-errors/card-http-errors.tsx index 7403e573511..c715e02a369 100644 --- a/libs/domains/observability/feature/src/lib/service/service-dashboard/card-http-errors/card-http-errors.tsx +++ b/libs/domains/observability/feature/src/lib/service/service-dashboard/card-http-errors/card-http-errors.tsx @@ -6,6 +6,7 @@ import { useDashboardContext } from '../../../util-filter/dashboard-context' import { CardMetric } from '../card-metric/card-metric' import { InstanceHTTPErrorsChart } from '../instance-http-errors-chart/instance-http-errors-chart' +// NGINX: Queries for nginx metrics (to remove when migrating to envoy) const queryErrorRequest = (timeRange: string, ingressName: string) => ` sum(sum_over_time( (nginx:req_inc:5m_by_status{ingress="${ingressName}", status=~"5.."})[${timeRange}:5m] @@ -18,20 +19,36 @@ const queryTotalRequest = (timeRange: string, ingressName: string) => ` ) ` +// ENVOY: Queries for envoy metrics +const queryEnvoyErrorRequest = (timeRange: string, httpRouteName: string) => ` + sum(sum_over_time( + (envoy_proxy:req_inc:5m_by_status{httproute_name="${httpRouteName}", envoy_response_code=~"5.."})[${timeRange}:5m] + )) +` + +const queryEnvoyTotalRequest = (timeRange: string, httpRouteName: string) => ` + sum_over_time( + (envoy_proxy:req_inc:5m{httproute_name="${httpRouteName}"})[${timeRange}:5m] + ) +` + export function CardHTTPErrors({ serviceId, clusterId, containerName, ingressName, + httpRouteName, }: { serviceId: string clusterId: string containerName: string ingressName: string + httpRouteName: string }) { const { queryTimeRange, startTimestamp, endTimestamp } = useDashboardContext() const [isModalOpen, setIsModalOpen] = useState(false) + // NGINX: Fetch nginx metrics (to remove when migrating to envoy) const { data: metricsErrorRequest, isLoading: isLoadingMetrics } = useInstantMetrics({ clusterId, query: queryErrorRequest(queryTimeRange, ingressName), @@ -50,8 +67,35 @@ export function CardHTTPErrors({ metricShortName: 'card_req_all_number', }) - const errorRaw = Math.round(metricsErrorRequest?.data?.result[0]?.value[1]) - const totalRequest = Math.round(metricsTotalRequest?.data?.result[0]?.value[1]) || 0 + // ENVOY: Fetch envoy metrics (only if httpRouteName is configured) + const { data: metricsEnvoyErrorRequest, isLoading: isLoadingMetricsEnvoyError } = useInstantMetrics({ + clusterId, + query: queryEnvoyErrorRequest(queryTimeRange, httpRouteName), + startTimestamp, + endTimestamp, + boardShortName: 'service_overview', + metricShortName: 'card_envoy_req_errors_number', + enabled: !!httpRouteName, + }) + + const { data: metricsEnvoyTotalRequest, isLoading: isLoadingMetricsEnvoyTotal } = useInstantMetrics({ + clusterId, + query: queryEnvoyTotalRequest(queryTimeRange, httpRouteName), + startTimestamp, + endTimestamp, + boardShortName: 'service_overview', + metricShortName: 'card_envoy_req_all_number', + enabled: !!httpRouteName, + }) + + // Aggregate nginx + envoy metrics + const nginxErrors = Math.round(metricsErrorRequest?.data?.result[0]?.value[1]) || 0 + const nginxTotal = Math.round(metricsTotalRequest?.data?.result[0]?.value[1]) || 0 + const envoyErrors = Math.round(metricsEnvoyErrorRequest?.data?.result[0]?.value[1]) || 0 + const envoyTotal = Math.round(metricsEnvoyTotalRequest?.data?.result[0]?.value[1]) || 0 + + const errorRaw = nginxErrors + envoyErrors + const totalRequest = nginxTotal + envoyTotal const errorRate = Math.ceil(totalRequest > 0 ? 100 * (errorRaw / totalRequest) : 0) || 0 const isError = errorRate > 0 @@ -63,7 +107,9 @@ export function CardHTTPErrors({ setIsModalOpen(true) : undefined} hasModalLink={isError} /> @@ -80,6 +126,7 @@ export function CardHTTPErrors({ serviceId={serviceId} containerName={containerName} ingressName={ingressName} + httpRouteName={httpRouteName} /> diff --git a/libs/domains/observability/feature/src/lib/service/service-dashboard/card-percentile-99/card-percentile-99.tsx b/libs/domains/observability/feature/src/lib/service/service-dashboard/card-percentile-99/card-percentile-99.tsx index 66d80b97346..f6125e357a9 100644 --- a/libs/domains/observability/feature/src/lib/service/service-dashboard/card-percentile-99/card-percentile-99.tsx +++ b/libs/domains/observability/feature/src/lib/service/service-dashboard/card-percentile-99/card-percentile-99.tsx @@ -5,23 +5,32 @@ import { useDashboardContext } from '../../../util-filter/dashboard-context' import { CardMetric } from '../card-metric/card-metric' import NetworkRequestDurationChart from '../network-request-duration-chart/network-request-duration-chart' +// NGINX: Query for nginx metrics (to remove when migrating to envoy) const query = (timeRange: string, ingressName: string) => ` max_over_time(nginx:request_p99:5m{ingress="${ingressName}"}[${timeRange}]) ` +// ENVOY: Query for envoy metrics +const queryEnvoy = (timeRange: string, httpRouteName: string) => ` + max_over_time(envoy_proxy:request_p99:5m{httproute_name="${httpRouteName}"}[${timeRange}]) +` + export function CardPercentile99({ serviceId, clusterId, ingressName, + httpRouteName, }: { serviceId: string clusterId: string ingressName: string + httpRouteName: string }) { const { queryTimeRange, startTimestamp, endTimestamp } = useDashboardContext() const [isModalOpen, setIsModalOpen] = useState(false) - const { data: metrics, isLoading: isLoadingMetrics } = useInstantMetrics({ + // NGINX: Fetch nginx metrics (to remove when migrating to envoy) + const { data: metricsInSeconds, isLoading: isLoadingMetrics } = useInstantMetrics({ clusterId, query: query(queryTimeRange, ingressName), startTimestamp, @@ -30,7 +39,21 @@ export function CardPercentile99({ metricShortName: 'card_p99_count', }) - const value = Math.round(Number(metrics?.data?.result[0]?.value[1]) * 1000) || 0 + // ENVOY: Fetch envoy metrics (only if httpRouteName is configured) + const { data: metricsEnvoyInMs, isLoading: isLoadingMetricsEnvoy } = useInstantMetrics({ + clusterId, + query: queryEnvoy(queryTimeRange, httpRouteName), + startTimestamp, + endTimestamp, + boardShortName: 'service_overview', + metricShortName: 'card_envoy_p99_count', + enabled: !!httpRouteName, + }) + + // Use max of both sources (convert nginx seconds to ms, envoy already in ms) + const nginxValue = Math.round(Number(metricsInSeconds?.data?.result[0]?.value[1]) * 1000) || 0 + const envoyValue = Math.round(Number(metricsEnvoyInMs?.data?.result[0]?.value[1])) || 0 + const value = Math.max(nginxValue, envoyValue) const defaultThreshold = 250 const isError = value > defaultThreshold @@ -43,7 +66,7 @@ export function CardPercentile99({ title={title} description={description} status={isError ? 'RED' : 'GREEN'} - isLoading={isLoadingMetrics} + isLoading={isLoadingMetrics || isLoadingMetricsEnvoy} onClick={() => setIsModalOpen(true)} hasModalLink /> @@ -55,6 +78,7 @@ export function CardPercentile99({ serviceId={serviceId} isFullscreen ingressName={ingressName} + httpRouteName={httpRouteName} /> diff --git a/libs/domains/observability/feature/src/lib/service/service-dashboard/instance-http-errors-chart/instance-http-errors-chart.tsx b/libs/domains/observability/feature/src/lib/service/service-dashboard/instance-http-errors-chart/instance-http-errors-chart.tsx index 847216b8621..9e481faa09d 100644 --- a/libs/domains/observability/feature/src/lib/service/service-dashboard/instance-http-errors-chart/instance-http-errors-chart.tsx +++ b/libs/domains/observability/feature/src/lib/service/service-dashboard/instance-http-errors-chart/instance-http-errors-chart.tsx @@ -8,6 +8,7 @@ import { addTimeRangePadding } from '../../../util-chart/add-time-range-padding' import { processMetricsData } from '../../../util-chart/process-metrics-data' import { useDashboardContext } from '../../../util-filter/dashboard-context' +// NGINX: Query for nginx metrics (to remove when migrating to envoy) const query = (ingressName: string) => ` 100 * sum by (status) ( @@ -23,15 +24,33 @@ clamp_min( ) > 0 ` +// ENVOY: Query for envoy metrics +const queryEnvoy = (httpRouteName: string) => ` +100 * +sum by (envoy_response_code) ( + envoy_proxy:req_rate:5m_by_status{httproute_name="${httpRouteName}", envoy_response_code=~"5.."} +) +/ +ignoring(envoy_response_code) group_left +clamp_min( + sum( + envoy_proxy:req_rate:5m{httproute_name="${httpRouteName}"} + ), + 1 +) > 0 +` + export function InstanceHTTPErrorsChart({ clusterId, serviceId, ingressName, + httpRouteName, }: { clusterId: string serviceId: string containerName: string ingressName: string + httpRouteName: string }) { const { startTimestamp, endTimestamp, useLocalTime, timeRange } = useDashboardContext() @@ -53,6 +72,7 @@ export function InstanceHTTPErrorsChart({ setLegendSelectedKeys(new Set()) } + // NGINX: Fetch nginx metrics (to remove when migrating to envoy) const { data: metricsHttpStatusErrorRatio, isLoading: isLoadingHttpStatusErrorRatio } = useMetrics({ clusterId, startTimestamp, @@ -63,19 +83,47 @@ export function InstanceHTTPErrorsChart({ metricShortName: 'http_errors', }) + // ENVOY: Fetch envoy metrics + const { data: metricsEnvoyHttpStatusErrorRatio, isLoading: isLoadingEnvoyHttpStatusErrorRatio } = useMetrics({ + clusterId, + startTimestamp, + endTimestamp, + query: queryEnvoy(httpRouteName), + timeRange, + boardShortName: 'service_overview', + metricShortName: 'envoy_http_errors', + }) + const chartData = useMemo(() => { - // Merge healthy and unhealthy metrics into a single timeSeriesMap + // Check if we have data from either source + if (!metricsHttpStatusErrorRatio?.data?.result && !metricsEnvoyHttpStatusErrorRatio?.data?.result) { + return [] + } + + // Merge nginx and envoy metrics into a single timeSeriesMap const timeSeriesMap = new Map< number, { timestamp: number; time: string; fullTime: string; [key: string]: string | number | null } >() - // Process ratio of HTTP status error + // NGINX: Process nginx HTTP status error ratio (to remove when migrating to envoy) if (metricsHttpStatusErrorRatio?.data?.result) { processMetricsData( metricsHttpStatusErrorRatio, timeSeriesMap, - (_, index) => JSON.stringify(metricsHttpStatusErrorRatio.data.result[index].metric), + (_, index) => JSON.stringify({ ...metricsHttpStatusErrorRatio.data.result[index].metric, source: 'nginx' }), + (value) => parseFloat(value), + useLocalTime + ) + } + + // ENVOY: Process envoy HTTP status error ratio + if (metricsEnvoyHttpStatusErrorRatio?.data?.result) { + processMetricsData( + metricsEnvoyHttpStatusErrorRatio, + timeSeriesMap, + (_, index) => + JSON.stringify({ ...metricsEnvoyHttpStatusErrorRatio.data.result[index].metric, source: 'envoy' }), (value) => parseFloat(value), useLocalTime ) @@ -84,19 +132,36 @@ export function InstanceHTTPErrorsChart({ // Convert map to sorted array and add time range padding const baseChartData = Array.from(timeSeriesMap.values()).sort((a, b) => a.timestamp - b.timestamp) return addTimeRangePadding(baseChartData, startTimestamp, endTimestamp, useLocalTime) - }, [metricsHttpStatusErrorRatio, useLocalTime, startTimestamp, endTimestamp]) + }, [metricsHttpStatusErrorRatio, metricsEnvoyHttpStatusErrorRatio, useLocalTime, startTimestamp, endTimestamp]) const seriesNames = useMemo(() => { - if (!metricsHttpStatusErrorRatio?.data?.result) return [] - return metricsHttpStatusErrorRatio.data.result.map((_: unknown, index: number) => - JSON.stringify(metricsHttpStatusErrorRatio.data.result[index].metric) - ) as string[] - }, [metricsHttpStatusErrorRatio]) + const names: string[] = [] + + // NGINX: Extract nginx series names (to remove when migrating to envoy) + if (metricsHttpStatusErrorRatio?.data?.result) { + names.push( + ...metricsHttpStatusErrorRatio.data.result.map((_: unknown, index: number) => + JSON.stringify({ ...metricsHttpStatusErrorRatio.data.result[index].metric, source: 'nginx' }) + ) + ) + } + + // ENVOY: Extract envoy series names + if (metricsEnvoyHttpStatusErrorRatio?.data?.result) { + names.push( + ...metricsEnvoyHttpStatusErrorRatio.data.result.map((_: unknown, index: number) => + JSON.stringify({ ...metricsEnvoyHttpStatusErrorRatio.data.result[index].metric, source: 'envoy' }) + ) + ) + } + + return names + }, [metricsHttpStatusErrorRatio, metricsEnvoyHttpStatusErrorRatio]) return ( ` nginx:request_p50:5m{ingress="${ingressName}"} ` @@ -19,16 +20,31 @@ const queryDuration99 = (ingressName: string) => ` nginx:request_p99:5m{ingress="${ingressName}"} ` +// ENVOY: Queries for envoy metrics +const queryEnvoyDuration50 = (httpRouteName: string) => ` + envoy_proxy:request_p50:5m{httproute_name="${httpRouteName}"} +` + +const queryEnvoyDuration95 = (httpRouteName: string) => ` + envoy_proxy:request_p95:5m{httproute_name="${httpRouteName}"} +` + +const queryEnvoyDuration99 = (httpRouteName: string) => ` + envoy_proxy:request_p99:5m{httproute_name="${httpRouteName}"} +` + export function NetworkRequestDurationChart({ clusterId, serviceId, isFullscreen, ingressName, + httpRouteName, }: { clusterId: string serviceId: string isFullscreen?: boolean ingressName: string + httpRouteName: string }) { const { startTimestamp, endTimestamp, useLocalTime, timeRange } = useDashboardContext() @@ -50,7 +66,8 @@ export function NetworkRequestDurationChart({ setLegendSelectedKeys(new Set()) } - const { data: metrics50, isLoading: isLoadingMetrics50 } = useMetrics({ + // NGINX: Fetch nginx metrics (to remove when migrating to envoy) + const { data: metricsP50InSeconds, isLoading: isLoadingMetrics50 } = useMetrics({ clusterId, startTimestamp, endTimestamp, @@ -60,7 +77,7 @@ export function NetworkRequestDurationChart({ metricShortName: 'network_p50', }) - const { data: metrics99, isLoading: isLoadingMetrics99 } = useMetrics({ + const { data: metricsP99InSeconds, isLoading: isLoadingMetrics99 } = useMetrics({ clusterId, startTimestamp, endTimestamp, @@ -70,7 +87,7 @@ export function NetworkRequestDurationChart({ metricShortName: 'network_p99', }) - const { data: metrics95, isLoading: isLoadingMetrics95 } = useMetrics({ + const { data: metricsP95InSeconds, isLoading: isLoadingMetrics95 } = useMetrics({ clusterId, startTimestamp, endTimestamp, @@ -80,8 +97,43 @@ export function NetworkRequestDurationChart({ metricShortName: 'network_p95', }) + // ENVOY: Fetch envoy metrics (only if httpRouteName is configured) + const { data: metricsEnvoyP50InMs, isLoading: isLoadingMetricsEnvoy50 } = useMetrics({ + clusterId, + startTimestamp, + endTimestamp, + timeRange, + query: queryEnvoyDuration50(httpRouteName), + boardShortName: 'service_overview', + metricShortName: 'envoy_p50', + enabled: !!httpRouteName, + }) + + const { data: metricsEnvoyP99InMs, isLoading: isLoadingMetricsEnvoy99 } = useMetrics({ + clusterId, + startTimestamp, + endTimestamp, + timeRange, + query: queryEnvoyDuration99(httpRouteName), + boardShortName: 'service_overview', + metricShortName: 'envoy_p99', + enabled: !!httpRouteName, + }) + + const { data: metricsEnvoyP95InMs, isLoading: isLoadingMetricsEnvoy95 } = useMetrics({ + clusterId, + startTimestamp, + endTimestamp, + timeRange, + query: queryEnvoyDuration95(httpRouteName), + boardShortName: 'service_overview', + metricShortName: 'envoy_p95', + enabled: !!httpRouteName, + }) + const chartData = useMemo(() => { - if (!metrics99?.data?.result) { + // Check if we have data from either source + if (!metricsP99InSeconds?.data?.result && !metricsEnvoyP99InMs?.data?.result) { return [] } @@ -90,39 +142,90 @@ export function NetworkRequestDurationChart({ { timestamp: number; time: string; fullTime: string; [key: string]: string | number | null } >() - // Process network duration 99th percentile metrics - processMetricsData( - metrics99, - timeSeriesMap, - () => '99th percentile', - (value) => parseFloat(value) * 1000, // Convert to ms - useLocalTime - ) - - // Process network duration 99th percentile metrics - processMetricsData( - metrics95, - timeSeriesMap, - () => '95th percentile', - (value) => parseFloat(value) * 1000, // Convert to ms - useLocalTime - ) - - // Process network duration 0.5th percentile metrics - processMetricsData( - metrics50, - timeSeriesMap, - () => '50th percentile', - (value) => parseFloat(value) * 1000, // Convert to ms - useLocalTime - ) + // NGINX: Process nginx duration metrics (convert seconds to ms) + if (metricsP99InSeconds?.data?.result) { + processMetricsData( + metricsP99InSeconds, + timeSeriesMap, + () => '99th percentile (nginx)', + (value) => parseFloat(value) * 1000, // Convert seconds to ms + useLocalTime + ) + } + + if (metricsP95InSeconds?.data?.result) { + processMetricsData( + metricsP95InSeconds, + timeSeriesMap, + () => '95th percentile (nginx)', + (value) => parseFloat(value) * 1000, // Convert seconds to ms + useLocalTime + ) + } + + if (metricsP50InSeconds?.data?.result) { + processMetricsData( + metricsP50InSeconds, + timeSeriesMap, + () => '50th percentile (nginx)', + (value) => parseFloat(value) * 1000, // Convert seconds to ms + useLocalTime + ) + } + + // ENVOY: Process envoy duration metrics (already in ms) + if (metricsEnvoyP99InMs?.data?.result) { + processMetricsData( + metricsEnvoyP99InMs, + timeSeriesMap, + () => '99th percentile (envoy)', + (value) => parseFloat(value), // Already in ms + useLocalTime + ) + } + + if (metricsEnvoyP95InMs?.data?.result) { + processMetricsData( + metricsEnvoyP95InMs, + timeSeriesMap, + () => '95th percentile (envoy)', + (value) => parseFloat(value), // Already in ms + useLocalTime + ) + } + + if (metricsEnvoyP50InMs?.data?.result) { + processMetricsData( + metricsEnvoyP50InMs, + timeSeriesMap, + () => '50th percentile (envoy)', + (value) => parseFloat(value), // Already in ms + useLocalTime + ) + } const baseChartData = Array.from(timeSeriesMap.values()).sort((a, b) => a.timestamp - b.timestamp) return addTimeRangePadding(baseChartData, startTimestamp, endTimestamp, useLocalTime) - }, [metrics99, metrics95, metrics50, useLocalTime, startTimestamp, endTimestamp]) + }, [ + metricsP99InSeconds, + metricsP95InSeconds, + metricsP50InSeconds, + metricsEnvoyP99InMs, + metricsEnvoyP95InMs, + metricsEnvoyP50InMs, + useLocalTime, + startTimestamp, + endTimestamp, + ]) - const isLoadingMetrics = isLoadingMetrics99 || isLoadingMetrics50 || isLoadingMetrics95 + const isLoadingMetrics = + isLoadingMetrics99 || + isLoadingMetrics50 || + isLoadingMetrics95 || + isLoadingMetricsEnvoy99 || + isLoadingMetricsEnvoy50 || + isLoadingMetricsEnvoy95 return ( 0 ? handleResetLegend : undefined} > + {/* NGINX: Lines for nginx metrics (to remove when migrating to envoy) */} 0 && !legendSelectedKeys.has('50th percentile') ? true : false} + hide={legendSelectedKeys.size > 0 && !legendSelectedKeys.has('50th percentile (nginx)')} /> 0 && !legendSelectedKeys.has('95th percentile') ? true : false} + hide={legendSelectedKeys.size > 0 && !legendSelectedKeys.has('95th percentile (nginx)')} /> 0 && !legendSelectedKeys.has('99th percentile') ? true : false} + hide={legendSelectedKeys.size > 0 && !legendSelectedKeys.has('99th percentile (nginx)')} /> + {/* ENVOY: Lines for envoy metrics (only shown if httpRouteName is configured) */} + {httpRouteName && ( + <> + 0 && !legendSelectedKeys.has('50th percentile (envoy)')} + /> + 0 && !legendSelectedKeys.has('95th percentile (envoy)')} + /> + 0 && !legendSelectedKeys.has('99th percentile (envoy)')} + /> + + )} {!isLoadingMetrics && chartData.length > 0 && ( ` sum(nginx:resp_bytes_rate:5m{ingress="${ingressName}"}) ` @@ -15,14 +16,25 @@ const queryRequestSize = (ingressName: string) => ` sum(nginx:req_bytes_rate:5m{ingress="${ingressName}"}) ` +// ENVOY: Queries for envoy metrics +const queryEnvoyResponseSize = (httpRouteName: string) => ` + sum(envoy_proxy:resp_bytes_rate:5m{httproute_name="${httpRouteName}"}) +` + +const queryEnvoyRequestSize = (httpRouteName: string) => ` + sum(envoy_proxy:req_bytes_rate:5m{httproute_name="${httpRouteName}"}) +` + export function NetworkRequestSizeChart({ clusterId, serviceId, ingressName, + httpRouteName, }: { clusterId: string serviceId: string ingressName: string + httpRouteName: string }) { const { startTimestamp, endTimestamp, useLocalTime, timeRange } = useDashboardContext() @@ -44,6 +56,7 @@ export function NetworkRequestSizeChart({ setLegendSelectedKeys(new Set()) } + // NGINX: Fetch nginx metrics (to remove when migrating to envoy) const { data: metricsResponseSize, isLoading: isLoadingMetricsResponseSize } = useMetrics({ clusterId, startTimestamp, @@ -64,8 +77,32 @@ export function NetworkRequestSizeChart({ metricShortName: 'network_req_size', }) + // ENVOY: Fetch envoy metrics (only if httpRouteName is configured) + const { data: metricsEnvoyResponseSize, isLoading: isLoadingMetricsEnvoyResponseSize } = useMetrics({ + clusterId, + startTimestamp, + endTimestamp, + timeRange, + query: queryEnvoyResponseSize(httpRouteName), + boardShortName: 'service_overview', + metricShortName: 'envoy_resp_size', + enabled: !!httpRouteName, + }) + + const { data: metricsEnvoyRequestSize, isLoading: isLoadingMetricsEnvoyRequestSize } = useMetrics({ + clusterId, + startTimestamp, + endTimestamp, + timeRange, + query: queryEnvoyRequestSize(httpRouteName), + boardShortName: 'service_overview', + metricShortName: 'envoy_req_size', + enabled: !!httpRouteName, + }) + const chartData = useMemo(() => { - if (!metricsResponseSize?.data?.result) { + // Check if we have data from either source + if (!metricsResponseSize?.data?.result && !metricsEnvoyResponseSize?.data?.result) { return [] } @@ -74,30 +111,66 @@ export function NetworkRequestSizeChart({ { timestamp: number; time: string; fullTime: string; [key: string]: string | number | null } >() - // Process network response size metrics - processMetricsData( - metricsResponseSize, - timeSeriesMap, - () => 'Response size', - (value) => parseFloat(value), // Convert to bytes - useLocalTime - ) - - // Process network request size metrics - processMetricsData( - metricsRequestSize, - timeSeriesMap, - () => 'Request size', - (value) => parseFloat(value), // Convert to bytes - useLocalTime - ) + // NGINX: Process nginx size metrics (to remove when migrating to envoy) + if (metricsResponseSize?.data?.result) { + processMetricsData( + metricsResponseSize, + timeSeriesMap, + () => 'Response size (nginx)', + (value) => parseFloat(value), + useLocalTime + ) + } + + if (metricsRequestSize?.data?.result) { + processMetricsData( + metricsRequestSize, + timeSeriesMap, + () => 'Request size (nginx)', + (value) => parseFloat(value), + useLocalTime + ) + } + + // ENVOY: Process envoy size metrics + if (metricsEnvoyResponseSize?.data?.result) { + processMetricsData( + metricsEnvoyResponseSize, + timeSeriesMap, + () => 'Response size (envoy)', + (value) => parseFloat(value), + useLocalTime + ) + } + + if (metricsEnvoyRequestSize?.data?.result) { + processMetricsData( + metricsEnvoyRequestSize, + timeSeriesMap, + () => 'Request size (envoy)', + (value) => parseFloat(value), + useLocalTime + ) + } const baseChartData = Array.from(timeSeriesMap.values()).sort((a, b) => a.timestamp - b.timestamp) return addTimeRangePadding(baseChartData, startTimestamp, endTimestamp, useLocalTime) - }, [metricsResponseSize, metricsRequestSize, useLocalTime, startTimestamp, endTimestamp]) + }, [ + metricsResponseSize, + metricsRequestSize, + metricsEnvoyResponseSize, + metricsEnvoyRequestSize, + useLocalTime, + startTimestamp, + endTimestamp, + ]) - const isLoadingMetrics = isLoadingMetricsResponseSize || isLoadingMetricsRequestSize + const isLoadingMetrics = + isLoadingMetricsResponseSize || + isLoadingMetricsRequestSize || + isLoadingMetricsEnvoyResponseSize || + isLoadingMetricsEnvoyRequestSize return ( 0 ? handleResetLegend : undefined} > + {/* NGINX: Lines for nginx metrics (to remove when migrating to envoy) */} 0 && !legendSelectedKeys.has('Response size') ? true : false} + hide={legendSelectedKeys.size > 0 && !legendSelectedKeys.has('Response size (nginx)')} /> 0 && !legendSelectedKeys.has('Request size') ? true : false} + hide={legendSelectedKeys.size > 0 && !legendSelectedKeys.has('Request size (nginx)')} /> + {/* ENVOY: Lines for envoy metrics (only shown if httpRouteName is configured) */} + {httpRouteName && ( + <> + 0 && !legendSelectedKeys.has('Response size (envoy)')} + /> + 0 && !legendSelectedKeys.has('Request size (envoy)')} + /> + + )} {!isLoadingMetrics && chartData.length > 0 && ( ` sum by(path,status)(nginx:req_rate:5m_by_path_status{ingress="${ingressName}"}) > 0 ` +// ENVOY: Query for envoy metrics +const queryEnvoy = (httpRouteName: string) => ` + sum by(envoy_response_code)(envoy_proxy:req_rate:5m_by_status{httproute_name="${httpRouteName}"}) > 0 +` + export function NetworkRequestStatusChart({ clusterId, serviceId, ingressName, + httpRouteName, }: { clusterId: string serviceId: string ingressName: string + httpRouteName: string }) { const { startTimestamp, endTimestamp, useLocalTime, timeRange } = useDashboardContext() @@ -41,6 +49,7 @@ export function NetworkRequestStatusChart({ setLegendSelectedKeys(new Set()) } + // NGINX: Fetch nginx metrics (to remove when migrating to envoy) const { data: metrics, isLoading: isLoadingMetrics } = useMetrics({ clusterId, startTimestamp, @@ -51,8 +60,21 @@ export function NetworkRequestStatusChart({ metricShortName: 'network_req_status', }) + // ENVOY: Fetch envoy metrics (only if httpRouteName is configured) + const { data: metricsEnvoy, isLoading: isLoadingMetricsEnvoy } = useMetrics({ + clusterId, + startTimestamp, + endTimestamp, + timeRange, + query: queryEnvoy(httpRouteName), + boardShortName: 'service_overview', + metricShortName: 'envoy_req_status', + enabled: !!httpRouteName, + }) + const chartData = useMemo(() => { - if (!metrics?.data?.result) { + // Check if we have data from either source + if (!metrics?.data?.result && !metricsEnvoy?.data?.result) { return [] } @@ -61,31 +83,66 @@ export function NetworkRequestStatusChart({ { timestamp: number; time: string; fullTime: string; [key: string]: string | number | null } >() - // Process network request metrics - processMetricsData( - metrics, - timeSeriesMap, - (_, index) => JSON.stringify(metrics.data.result[index].metric), - (value) => parseFloat(value), - useLocalTime - ) + // NGINX: Process nginx metrics (to remove when migrating to envoy) + if (metrics?.data?.result) { + processMetricsData( + metrics, + timeSeriesMap, + (_, index) => JSON.stringify({ ...metrics.data.result[index].metric, source: 'nginx' }), + (value) => parseFloat(value), + useLocalTime + ) + } + + // ENVOY: Process envoy metrics + if (metricsEnvoy?.data?.result) { + processMetricsData( + metricsEnvoy, + timeSeriesMap, + (_, index) => JSON.stringify({ ...metricsEnvoy.data.result[index].metric, source: 'envoy' }), + (value) => parseFloat(value), + useLocalTime + ) + } const baseChartData = Array.from(timeSeriesMap.values()).sort((a, b) => a.timestamp - b.timestamp) return addTimeRangePadding(baseChartData, startTimestamp, endTimestamp, useLocalTime) - }, [metrics, useLocalTime, startTimestamp, endTimestamp]) + }, [metrics, metricsEnvoy, useLocalTime, startTimestamp, endTimestamp]) const seriesNames = useMemo(() => { - if (!metrics?.data?.result) return [] - return metrics.data.result.map((_: unknown, index: number) => - JSON.stringify(metrics.data.result[index].metric) - ) as string[] - }, [metrics]) + const names: string[] = [] + + // NGINX: Extract nginx series names (to remove when migrating to envoy) + if (metrics?.data?.result) { + names.push( + ...metrics.data.result.map((_: unknown, index: number) => + JSON.stringify({ ...metrics.data.result[index].metric, source: 'nginx' }) + ) + ) + } + + // ENVOY: Extract envoy series names + if (metricsEnvoy?.data?.result) { + names.push( + ...metricsEnvoy.data.result + .filter((result: any) => { + const code = result.metric?.envoy_response_code + return code !== 'undefined' && code !== undefined && code !== '' + }) + .map((result: any) => JSON.stringify({ ...result.metric, source: 'envoy' })) + ) + } + + return names + }, [metrics, metricsEnvoy]) + + const isLoading = isLoadingMetrics || isLoadingMetricsEnvoy return ( 0 && !legendSelectedKeys.has(name) ? true : false} + hide={legendSelectedKeys.size > 0 && !legendSelectedKeys.has(name)} /> ))} - {!isLoadingMetrics && chartData.length > 0 && ( + {!isLoading && chartData.length > 0 && ( ( - { - const { path, status } = JSON.parse(value) - return `path: "${path}" status: "${status}"` - }} - {...props} - /> - )} + content={(props) => { + // Group series by source + const formatter = (value: string) => { + const metric = JSON.parse(value) + const { source } = metric + + if (source === 'nginx') { + const { path, status } = metric + return `path: "${path}" status: "${status}" (nginx)` + } else { + const { envoy_response_code } = metric + return `status: "${envoy_response_code}" (envoy)` + } + } + + return + }} /> )} diff --git a/libs/domains/observability/feature/src/lib/service/service-dashboard/service-dashboard.tsx b/libs/domains/observability/feature/src/lib/service/service-dashboard/service-dashboard.tsx index 93ab304933a..55479504807 100644 --- a/libs/domains/observability/feature/src/lib/service/service-dashboard/service-dashboard.tsx +++ b/libs/domains/observability/feature/src/lib/service/service-dashboard/service-dashboard.tsx @@ -7,6 +7,7 @@ import { useService } from '@qovery/domains/services/feature' import { Button, Callout, Chart, Heading, Icon, InputSelectSmall, Section, Tooltip } from '@qovery/shared/ui' import { useContainerName } from '../../hooks/use-container-name/use-container-name' import { useEnvironment } from '../../hooks/use-environment/use-environment' +import useHttpRouteName from '../../hooks/use-http-route-name/use-http-route-name' import { useIngressName } from '../../hooks/use-ingress-name/use-ingress-name' import { useNamespace } from '../../hooks/use-namespace/use-namespace' import { usePodNames } from '../../hooks/use-pod-names/use-pod-names' @@ -105,6 +106,14 @@ function ServiceDashboardContent() { endDate: now.toISOString(), }) + const { data: httpRouteName = '' } = useHttpRouteName({ + clusterId: environment?.cluster_id ?? '', + serviceId: serviceId, + enabled: hasPublicPort, + startDate: oneHourAgo.toISOString(), + endDate: now.toISOString(), + }) + if ((!containerName && isFetchedContainerName) || (!namespace && isFetchedNamespace)) { return (
@@ -231,6 +240,7 @@ function ServiceDashboardContent() { serviceId={serviceId} containerName={containerName} ingressName={ingressName} + httpRouteName={httpRouteName} /> )} {hasOnlyPrivatePorts && ( @@ -242,7 +252,12 @@ function ServiceDashboardContent() { )} {hasStorage && } {hasPublicPort && ( - + )} {hasOnlyPrivatePorts && (
@@ -301,6 +317,7 @@ function ServiceDashboardContent() { clusterId={environment.cluster_id} serviceId={serviceId} ingressName={ingressName} + httpRouteName={httpRouteName} />
@@ -308,6 +325,7 @@ function ServiceDashboardContent() { clusterId={environment.cluster_id} serviceId={serviceId} ingressName={ingressName} + httpRouteName={httpRouteName} />
diff --git a/libs/domains/observability/feature/src/lib/util-filter/dashboard-context.tsx b/libs/domains/observability/feature/src/lib/util-filter/dashboard-context.tsx index 70c58c4f837..7742d69cd25 100644 --- a/libs/domains/observability/feature/src/lib/util-filter/dashboard-context.tsx +++ b/libs/domains/observability/feature/src/lib/util-filter/dashboard-context.tsx @@ -163,11 +163,14 @@ export function DashboardProvider({ children }: PropsWithChildren) { const startTimestamp = startDate && convertDatetoTimestamp(startDate).toString() const endTimestamp = endDate && convertDatetoTimestamp(endDate).toString() - // Calculate the effective duration for Prometheus queries (accounts for zoom) - const queryTimeRange = - isAnyChartZoomed && startTimestamp && endTimestamp - ? `${Math.floor((parseInt(endTimestamp) - parseInt(startTimestamp)) / 60)}m` - : timeRange + // Calculate the effective duration for Prometheus queries (accounts for zoom and custom ranges) + const queryTimeRange = useMemo(() => { + // For custom time range or zoomed charts, calculate duration from timestamps + if ((timeRange === 'custom' || isAnyChartZoomed) && startTimestamp && endTimestamp) { + return `${Math.floor((parseInt(endTimestamp) - parseInt(startTimestamp)) / 60)}m` + } + return timeRange + }, [timeRange, isAnyChartZoomed, startTimestamp, endTimestamp]) // Calculate the average over queryTimeRange with a sub-sampling every 5m or 1m const THREE_DAYS_IN_SECONDS = 3 * 24 * 60 * 60 diff --git a/libs/shared/ui/src/lib/components/chart/chart.tsx b/libs/shared/ui/src/lib/components/chart/chart.tsx index 3f8bff0191a..6926331da4f 100644 --- a/libs/shared/ui/src/lib/components/chart/chart.tsx +++ b/libs/shared/ui/src/lib/components/chart/chart.tsx @@ -218,12 +218,14 @@ export const ChartLegendContent = ({ style.id = styleId if (key) { + // Escape special characters in the key for CSS selector + const escapedKey = CSS.escape(key) // When highlighting, make non-highlighted paths semi-transparent style.textContent = ` - path[name]:not([name="${key}"]) { + path[name]:not([name="${escapedKey}"]) { opacity: 0.15 !important; } - path[name="${key}"] { + path[name="${escapedKey}"] { opacity: 1 !important; } `