Skip to content

Commit 5a5985a

Browse files
authored
fix(cells) Don't record proxied request failures towards circuit breaker (#111639)
We don't want to trip the breaker when the proxied requests fail with 500s. We're mostly interested in preventing resource exhaustion from slow requests/connection errors here. Refs INFRENG-275
1 parent d9cf05d commit 5a5985a

File tree

2 files changed

+66
-11
lines changed

2 files changed

+66
-11
lines changed

src/sentry/hybridcloud/apigateway/proxy.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from django.http.response import HttpResponseBase
1515
from requests import Response as ExternalResponse
1616
from requests import request as external_request
17-
from requests.exceptions import Timeout
17+
from requests.exceptions import ConnectionError, Timeout
1818

1919
from sentry import options
2020
from sentry.api.exceptions import RequestTimeout
@@ -193,18 +193,22 @@ def proxy_cell_request(request: HttpRequest, cell: Cell, url_name: str) -> HttpR
193193
)
194194
except Timeout:
195195
metrics.incr("apigateway.proxy.request_timeout", tags=metric_tags)
196-
try:
197-
if circuit_breaker is not None:
198-
circuit_breaker.record_error()
199-
except Exception:
200-
logger.exception("Failed to record circuitbreaker failure")
196+
if circuit_breaker is not None:
197+
circuit_breaker.record_error()
201198

202199
# remote silo timeout. Use DRF timeout instead
203200
raise RequestTimeout()
201+
except ConnectionError:
202+
metrics.incr("apigateway.proxy.connection_error", tags=metric_tags)
203+
if circuit_breaker is not None:
204+
circuit_breaker.record_error()
205+
206+
raise
204207

205-
if resp.status_code >= 500 and circuit_breaker is not None:
208+
if resp.status_code >= 502:
206209
metrics.incr("apigateway.proxy.request_failed", tags=metric_tags)
207-
circuit_breaker.record_error()
210+
if circuit_breaker is not None:
211+
circuit_breaker.record_error()
208212

209213
new_headers = clean_outbound_headers(resp.headers)
210214
resp.headers.clear()

tests/sentry/hybridcloud/apigateway/test_proxy.py

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from django.core.files.uploadedfile import SimpleUploadedFile
88
from django.http import HttpResponse
99
from django.test.client import RequestFactory
10-
from requests.exceptions import Timeout
10+
from requests.exceptions import ConnectionError, Timeout
1111

1212
from sentry.api.exceptions import RequestTimeout
1313
from sentry.hybridcloud.apigateway.proxy import proxy_request
@@ -352,7 +352,58 @@ def test_timeout_records_error(self) -> None:
352352

353353
@responses.activate
354354
@override_options(CB_ENABLED)
355-
def test_5xx_response_records_error(self) -> None:
355+
def test_connection_error_records_error(self) -> None:
356+
responses.add(
357+
responses.GET,
358+
f"{self.CELL.address}/connect-error",
359+
body=ConnectionError(),
360+
)
361+
with patch("sentry.hybridcloud.apigateway.proxy.CircuitBreaker") as mock_breaker_class:
362+
mock_breaker = self._make_breaker_mock(allow_request=True)
363+
mock_breaker_class.return_value = mock_breaker
364+
request = RequestFactory().get("http://sentry.io/connect-error")
365+
with pytest.raises(ConnectionError):
366+
proxy_request(request, self.organization.slug, url_name)
367+
mock_breaker.record_error.assert_called_once()
368+
369+
@responses.activate
370+
@override_options(CB_ENABLED)
371+
def test_connection_error_records_metric(self) -> None:
372+
responses.add(
373+
responses.GET,
374+
f"{self.CELL.address}/connect-error",
375+
body=ConnectionError(),
376+
)
377+
with patch("sentry.hybridcloud.apigateway.proxy.metrics") as mock_metrics:
378+
request = RequestFactory().get("http://sentry.io/connect-error")
379+
with pytest.raises(ConnectionError):
380+
proxy_request(request, self.organization.slug, url_name)
381+
mock_metrics.incr.assert_any_call(
382+
"apigateway.proxy.connection_error",
383+
tags={"region": self.CELL.name, "url_name": url_name},
384+
)
385+
386+
@responses.activate
387+
@override_options(CB_ENABLED)
388+
def test_504_response_does_record_error(self) -> None:
389+
responses.add(
390+
responses.GET,
391+
f"{self.CELL.address}/server-error",
392+
status=504,
393+
body=json.dumps({"detail": "gateway timeout"}),
394+
content_type="application/json",
395+
)
396+
with patch("sentry.hybridcloud.apigateway.proxy.CircuitBreaker") as mock_breaker_class:
397+
mock_breaker = self._make_breaker_mock(allow_request=True)
398+
mock_breaker_class.return_value = mock_breaker
399+
request = RequestFactory().get("http://sentry.io/server-error")
400+
resp = proxy_request(request, self.organization.slug, url_name)
401+
assert resp.status_code == 504
402+
mock_breaker.record_error.assert_called_once()
403+
404+
@responses.activate
405+
@override_options(CB_ENABLED)
406+
def test_500_response_does_not_record_error(self) -> None:
356407
responses.add(
357408
responses.GET,
358409
f"{self.CELL.address}/server-error",
@@ -366,7 +417,7 @@ def test_5xx_response_records_error(self) -> None:
366417
request = RequestFactory().get("http://sentry.io/server-error")
367418
resp = proxy_request(request, self.organization.slug, url_name)
368419
assert resp.status_code == 500
369-
mock_breaker.record_error.assert_called_once()
420+
mock_breaker.record_error.assert_not_called()
370421

371422
@responses.activate
372423
@override_options(CB_ENABLED)

0 commit comments

Comments
 (0)