From ebebedcd8932efea0010ac292ee39e8e0873ba87 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Thu, 30 Jan 2025 23:19:12 +0000 Subject: [PATCH 1/7] feat: add rate limit statistics --- src/apify_client/_http_client.py | 15 +++++++++++ src/apify_client/_statistics.py | 32 +++++++++++++++++++++++ src/apify_client/client.py | 3 +++ tests/unit/test_statistics.py | 44 ++++++++++++++++++++++++++++++++ 4 files changed, 94 insertions(+) create mode 100644 src/apify_client/_statistics.py create mode 100644 tests/unit/test_statistics.py diff --git a/src/apify_client/_http_client.py b/src/apify_client/_http_client.py index ca4e39c1..235e49b4 100644 --- a/src/apify_client/_http_client.py +++ b/src/apify_client/_http_client.py @@ -14,6 +14,7 @@ from apify_client._errors import ApifyApiError, InvalidResponseBodyError, is_retryable_error from apify_client._logging import log_context, logger_name +from apify_client._statistics import Statistics from apify_client._utils import retry_with_exp_backoff, retry_with_exp_backoff_async if TYPE_CHECKING: @@ -35,6 +36,7 @@ def __init__( max_retries: int = 8, min_delay_between_retries_millis: int = 500, timeout_secs: int = 360, + stats: Statistics | None = None, ) -> None: self.max_retries = max_retries self.min_delay_between_retries_millis = min_delay_between_retries_millis @@ -59,6 +61,8 @@ def __init__( self.httpx_client = httpx.Client(headers=headers, follow_redirects=True, timeout=timeout_secs) self.httpx_async_client = httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=timeout_secs) + self.stats = stats or Statistics() + @staticmethod def _maybe_parse_response(response: httpx.Response) -> Any: if response.status_code == HTTPStatus.NO_CONTENT: @@ -143,6 +147,8 @@ def call( log_context.method.set(method) log_context.url.set(url) + self.stats.calls += 1 + if stream and parse_response: raise ValueError('Cannot stream response and parse it at the same time!') @@ -153,6 +159,9 @@ def call( def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response: log_context.attempt.set(attempt) logger.debug('Sending request') + + self.stats.requests += 1 + try: request = httpx_client.build_request( method=method, @@ -177,6 +186,9 @@ def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response: return response + if response.status_code == HTTPStatus.TOO_MANY_REQUESTS: + self.stats.add_rate_limit_error(attempt) + except Exception as e: logger.debug('Request threw exception', exc_info=e) if not is_retryable_error(e): @@ -251,6 +263,9 @@ async def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response return response + if response.status_code == HTTPStatus.TOO_MANY_REQUESTS: + self.stats.add_rate_limit_error(attempt) + except Exception as e: logger.debug('Request threw exception', exc_info=e) if not is_retryable_error(e): diff --git a/src/apify_client/_statistics.py b/src/apify_client/_statistics.py new file mode 100644 index 00000000..0678946c --- /dev/null +++ b/src/apify_client/_statistics.py @@ -0,0 +1,32 @@ +from dataclasses import dataclass, field + + +@dataclass +class Statistics: + """Statistics about API client usage and rate limit errors.""" + + calls: int = 0 + requests: int = 0 + rate_limit_errors: list[int] = field(default_factory=list) + + def add_rate_limit_error(self, attempt: int) -> None: + """Add rate limit error for specific attempt. + + Args: + attempt: The attempt number (1-based indexing) + """ + if attempt < 1: + raise ValueError('Attempt must be greater than 0') + + index = attempt - 1 + self._ensure_list_capacity(index) + self.rate_limit_errors[index] += 1 + + def _ensure_list_capacity(self, index: int) -> None: + """Ensure rate_limit_errors list has enough capacity. + + Args: + index: Required index to access + """ + if len(self.rate_limit_errors) <= index: + self.rate_limit_errors.extend([0] * (index - len(self.rate_limit_errors) + 1)) diff --git a/src/apify_client/client.py b/src/apify_client/client.py index 09a04a0d..e26bb736 100644 --- a/src/apify_client/client.py +++ b/src/apify_client/client.py @@ -3,6 +3,7 @@ from apify_shared.utils import ignore_docs from apify_client._http_client import HTTPClient, HTTPClientAsync +from apify_client._statistics import Statistics from apify_client.clients import ( ActorClient, ActorClientAsync, @@ -126,11 +127,13 @@ def __init__( timeout_secs=timeout_secs, ) + self.stats = Statistics() self.http_client = HTTPClient( token=token, max_retries=self.max_retries, min_delay_between_retries_millis=self.min_delay_between_retries_millis, timeout_secs=self.timeout_secs, + stats=self.stats, ) def actor(self, actor_id: str) -> ActorClient: diff --git a/tests/unit/test_statistics.py b/tests/unit/test_statistics.py new file mode 100644 index 00000000..72473b84 --- /dev/null +++ b/tests/unit/test_statistics.py @@ -0,0 +1,44 @@ +import pytest + +from apify_client._statistics import Statistics + + +@pytest.mark.parametrize( + ('attempts', 'expected_errors'), + [ + ([1], [1]), + ([1, 5], [1, 0, 0, 0, 1]), + ([5, 1], [1, 0, 0, 0, 1]), + ([3, 5, 1], [1, 0, 1, 0, 1]), + ([1, 5, 3], [1, 0, 1, 0, 1]), + ([2, 1, 2, 1, 5, 2, 1], [3, 3, 0, 0, 1]), + ], +) +def test_add_rate_limit_error(attempts: list[int], expected_errors: list[int]) -> None: + """Test that add_rate_limit_error correctly tracks errors for different attempt sequences.""" + stats = Statistics() + for attempt in attempts: + stats.add_rate_limit_error(attempt) + assert stats.rate_limit_errors == expected_errors + + +def test_add_rate_limit_error_invalid_attempt() -> None: + """Test that add_rate_limit_error raises ValueError for invalid attempt.""" + stats = Statistics() + with pytest.raises(ValueError, match='Attempt must be greater than 0'): + stats.add_rate_limit_error(0) + + +def test_statistics_initial_state() -> None: + """Test initial state of Statistics instance.""" + stats = Statistics() + assert stats.calls == 0 + assert stats.requests == 0 + assert stats.rate_limit_errors == [] + + +def test_add_rate_limit_error_type_validation() -> None: + """Test type validation in add_rate_limit_error.""" + stats = Statistics() + with pytest.raises(TypeError): + stats.add_rate_limit_error('1') # type: ignore[arg-type] From d95b7c8665a0df7100c189e68b70d3855ef22034 Mon Sep 17 00:00:00 2001 From: Max Bohomolov <34358312+Mantisus@users.noreply.github.com> Date: Mon, 3 Feb 2025 15:36:00 +0200 Subject: [PATCH 2/7] Update src/apify_client/_statistics.py Co-authored-by: Vlada Dusek --- src/apify_client/_statistics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/apify_client/_statistics.py b/src/apify_client/_statistics.py index 0678946c..d6d603d5 100644 --- a/src/apify_client/_statistics.py +++ b/src/apify_client/_statistics.py @@ -13,7 +13,7 @@ def add_rate_limit_error(self, attempt: int) -> None: """Add rate limit error for specific attempt. Args: - attempt: The attempt number (1-based indexing) + attempt: The attempt number (1-based indexing). """ if attempt < 1: raise ValueError('Attempt must be greater than 0') From b5cc3789402add1e8a52ee3937ea9a04a6d54245 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Mon, 3 Feb 2025 15:55:34 +0000 Subject: [PATCH 3/7] fix increment stats calls for `HTTPClientAsync` --- src/apify_client/_http_client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/apify_client/_http_client.py b/src/apify_client/_http_client.py index 235e49b4..2a23516f 100644 --- a/src/apify_client/_http_client.py +++ b/src/apify_client/_http_client.py @@ -229,6 +229,8 @@ async def call( log_context.method.set(method) log_context.url.set(url) + self.stats.calls += 1 + if stream and parse_response: raise ValueError('Cannot stream response and parse it at the same time!') From d7bdbda38c1cafd57cea512ac8066522481571c9 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Mon, 3 Feb 2025 15:56:25 +0000 Subject: [PATCH 4/7] add describe fields --- src/apify_client/_statistics.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/apify_client/_statistics.py b/src/apify_client/_statistics.py index d6d603d5..c99fccb1 100644 --- a/src/apify_client/_statistics.py +++ b/src/apify_client/_statistics.py @@ -6,8 +6,13 @@ class Statistics: """Statistics about API client usage and rate limit errors.""" calls: int = 0 + """Total number of API method calls made by the client.""" + requests: int = 0 + """Total number of HTTP requests sent, including retries.""" + rate_limit_errors: list[int] = field(default_factory=list) + """List tracking which retry attempts encountered rate limit (429) errors.""" def add_rate_limit_error(self, attempt: int) -> None: """Add rate limit error for specific attempt. From 04ca439d0217299085ab309d4b0804cf5e7e2503 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Mon, 3 Feb 2025 16:08:35 +0000 Subject: [PATCH 5/7] add ids for test --- tests/unit/test_statistics.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/unit/test_statistics.py b/tests/unit/test_statistics.py index 72473b84..dc7eb1f1 100644 --- a/tests/unit/test_statistics.py +++ b/tests/unit/test_statistics.py @@ -13,6 +13,14 @@ ([1, 5, 3], [1, 0, 1, 0, 1]), ([2, 1, 2, 1, 5, 2, 1], [3, 3, 0, 0, 1]), ], + ids=[ + 'single_error', + 'two_single_errors', + 'two_single_errors_reversed', + 'three_single_errors', + 'three_single_errors_reordered', + 'multiple_errors_per_attempt', + ], ) def test_add_rate_limit_error(attempts: list[int], expected_errors: list[int]) -> None: """Test that add_rate_limit_error correctly tracks errors for different attempt sequences.""" From 43e53a6553c1dac9645b6a02dd2133b4b6a34d92 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Tue, 4 Feb 2025 09:38:44 +0000 Subject: [PATCH 6/7] list to dict --- src/apify_client/_statistics.py | 16 +++------------- tests/unit/test_statistics.py | 14 +++++++------- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/src/apify_client/_statistics.py b/src/apify_client/_statistics.py index c99fccb1..d06d8d82 100644 --- a/src/apify_client/_statistics.py +++ b/src/apify_client/_statistics.py @@ -1,3 +1,4 @@ +from collections import defaultdict from dataclasses import dataclass, field @@ -11,7 +12,7 @@ class Statistics: requests: int = 0 """Total number of HTTP requests sent, including retries.""" - rate_limit_errors: list[int] = field(default_factory=list) + rate_limit_errors: defaultdict[int, int] = field(default_factory=lambda: defaultdict(int)) """List tracking which retry attempts encountered rate limit (429) errors.""" def add_rate_limit_error(self, attempt: int) -> None: @@ -23,15 +24,4 @@ def add_rate_limit_error(self, attempt: int) -> None: if attempt < 1: raise ValueError('Attempt must be greater than 0') - index = attempt - 1 - self._ensure_list_capacity(index) - self.rate_limit_errors[index] += 1 - - def _ensure_list_capacity(self, index: int) -> None: - """Ensure rate_limit_errors list has enough capacity. - - Args: - index: Required index to access - """ - if len(self.rate_limit_errors) <= index: - self.rate_limit_errors.extend([0] * (index - len(self.rate_limit_errors) + 1)) + self.rate_limit_errors[attempt - 1] += 1 diff --git a/tests/unit/test_statistics.py b/tests/unit/test_statistics.py index dc7eb1f1..032410ef 100644 --- a/tests/unit/test_statistics.py +++ b/tests/unit/test_statistics.py @@ -6,12 +6,12 @@ @pytest.mark.parametrize( ('attempts', 'expected_errors'), [ - ([1], [1]), - ([1, 5], [1, 0, 0, 0, 1]), - ([5, 1], [1, 0, 0, 0, 1]), - ([3, 5, 1], [1, 0, 1, 0, 1]), - ([1, 5, 3], [1, 0, 1, 0, 1]), - ([2, 1, 2, 1, 5, 2, 1], [3, 3, 0, 0, 1]), + ([1], {0: 1}), + ([1, 5], {0: 1, 4: 1}), + ([5, 1], {0: 1, 4: 1}), + ([3, 5, 1], {0: 1, 2: 1, 4: 1}), + ([1, 5, 3], {0: 1, 2: 1, 4: 1}), + ([2, 1, 2, 1, 5, 2, 1], {0: 3, 1: 3, 4: 1}), ], ids=[ 'single_error', @@ -42,7 +42,7 @@ def test_statistics_initial_state() -> None: stats = Statistics() assert stats.calls == 0 assert stats.requests == 0 - assert stats.rate_limit_errors == [] + assert stats.rate_limit_errors == {} def test_add_rate_limit_error_type_validation() -> None: From a89255dacffde1b4d62b1fddb5b7b6602800edb5 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Tue, 4 Feb 2025 10:43:38 +0000 Subject: [PATCH 7/7] add pytest.param in test --- tests/unit/test_statistics.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/tests/unit/test_statistics.py b/tests/unit/test_statistics.py index 032410ef..9775aa01 100644 --- a/tests/unit/test_statistics.py +++ b/tests/unit/test_statistics.py @@ -6,20 +6,12 @@ @pytest.mark.parametrize( ('attempts', 'expected_errors'), [ - ([1], {0: 1}), - ([1, 5], {0: 1, 4: 1}), - ([5, 1], {0: 1, 4: 1}), - ([3, 5, 1], {0: 1, 2: 1, 4: 1}), - ([1, 5, 3], {0: 1, 2: 1, 4: 1}), - ([2, 1, 2, 1, 5, 2, 1], {0: 3, 1: 3, 4: 1}), - ], - ids=[ - 'single_error', - 'two_single_errors', - 'two_single_errors_reversed', - 'three_single_errors', - 'three_single_errors_reordered', - 'multiple_errors_per_attempt', + pytest.param([1], {0: 1}, id='single error'), + pytest.param([1, 5], {0: 1, 4: 1}, id='two single errors'), + pytest.param([5, 1], {0: 1, 4: 1}, id='two single errors reversed'), + pytest.param([3, 5, 1], {0: 1, 2: 1, 4: 1}, id='three single errors'), + pytest.param([1, 5, 3], {0: 1, 2: 1, 4: 1}, id='three single errors reordered'), + pytest.param([2, 1, 2, 1, 5, 2, 1], {0: 3, 1: 3, 4: 1}, id='multiple errors per attempt'), ], ) def test_add_rate_limit_error(attempts: list[int], expected_errors: list[int]) -> None: