From 6966bb925cea82ea6688f38f06e57d3f891e52ac Mon Sep 17 00:00:00 2001 From: Pat Phongsvirajati Date: Fri, 8 Mar 2024 09:17:59 -0500 Subject: [PATCH 1/2] Add capability to block user agents --- webservices/rest.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/webservices/rest.py b/webservices/rest.py index 2ba181c1b..f8cfe5236 100644 --- a/webservices/rest.py +++ b/webservices/rest.py @@ -145,9 +145,13 @@ def handle_error(error): RESTRICT_MESSAGE = "We apologize for the inconvenience, but we are temporarily " \ "blocking API traffic. Please contact apiinfo@fec.gov if this is an urgent issue." +# list of blocked user agent strings: ex: Googlebot, Bingbot, etc that will be result in the request +# being blocked if the user-agent header contains any of the specified strings +BLOCKED_USER_AGENTS = utils.split_env_var(env.get_credential('FEC_API_BLOCKED_USER_AGENTS', '')) + @app.before_request -def limit_remote_addr(): +def limit_access_based_on_request(): """ If `FEC_API_USE_PROXY` is set: - Reject all requests that are not routed through the API Umbrella @@ -176,6 +180,10 @@ def limit_remote_addr(): if request_api_key_id not in BYPASS_RESTRICTION_API_KEY_IDS: # Service unavailable abort(503, RESTRICT_MESSAGE) + user_agent = request.headers.get('User-Agent') + for blocked_agent in BLOCKED_USER_AGENTS: + if len(blocked_agent) > 0 and blocked_agent in user_agent: + abort(429) # Too many requests def get_cache_header(url): From 3ab6b494b1cf2b7b25087044d819f6ebaff9338b Mon Sep 17 00:00:00 2001 From: Pat Phongsvirajati Date: Fri, 8 Mar 2024 09:58:10 -0500 Subject: [PATCH 2/2] update to 403 and add check for user-agent first --- webservices/rest.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/webservices/rest.py b/webservices/rest.py index f8cfe5236..a7b1f09c0 100644 --- a/webservices/rest.py +++ b/webservices/rest.py @@ -181,9 +181,10 @@ def limit_access_based_on_request(): # Service unavailable abort(503, RESTRICT_MESSAGE) user_agent = request.headers.get('User-Agent') - for blocked_agent in BLOCKED_USER_AGENTS: - if len(blocked_agent) > 0 and blocked_agent in user_agent: - abort(429) # Too many requests + if user_agent and BLOCKED_USER_AGENTS: + for blocked_agent in BLOCKED_USER_AGENTS: + if len(blocked_agent) > 0 and blocked_agent in user_agent: + abort(403) # Too many requests def get_cache_header(url):