From b76577e1bd4ba3956774251036b915cdc2917f98 Mon Sep 17 00:00:00 2001 From: Irakli Mchedlishvili Date: Mon, 22 Jun 2020 16:31:57 +0400 Subject: [PATCH 1/2] Allow Blkaclisting URLs from config --- datapusher/jobs.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/datapusher/jobs.py b/datapusher/jobs.py index 3d06307..2e61511 100644 --- a/datapusher/jobs.py +++ b/datapusher/jobs.py @@ -61,6 +61,7 @@ 'resource_update': '{ckan_url}/api/action/resource_update' } +BLACKLIST = web.app.config.get('BLACKLIST_URLS', '').split(' ') class HTTPError(util.JobError): """Exception that's raised if a job fails due to an HTTP problem.""" @@ -351,6 +352,12 @@ def push_to_datastore(task_id, input, dry_run=False): raise util.JobError( 'Only http, https, and ftp resources may be fetched.' ) + + for black_url in BLACKLIST: + if black_url in url: + raise util.JobError( + 'Resouce can not be fetched - Given URL is blacklisted: %s' % url + ) # fetch the resource data logger.info('Fetching from: {0}'.format(url)) From 2dd12ad8558a322cd8ca09db99b9e76c8253b287 Mon Sep 17 00:00:00 2001 From: Irakli Mchedlishvili Date: Fri, 26 Jun 2020 14:43:31 +0400 Subject: [PATCH 2/2] Consider empty string for blacklist url --- datapusher/jobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datapusher/jobs.py b/datapusher/jobs.py index 2e61511..5c2dfb3 100644 --- a/datapusher/jobs.py +++ b/datapusher/jobs.py @@ -354,7 +354,7 @@ def push_to_datastore(task_id, input, dry_run=False): ) for black_url in BLACKLIST: - if black_url in url: + if black_url and black_url in url: raise util.JobError( 'Resouce can not be fetched - Given URL is blacklisted: %s' % url )