diff --git a/requirements.txt b/requirements.txt index 3be8e048..d4720db9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,11 @@ redis==2.10.5 giturl.py==0.2.1 GitPython==2.1.5 scancode-toolkit==2.0.1 +requests==2.12.4 # for testing pytest -pytest-django \ No newline at end of file +pytest-django + + + diff --git a/scanapp/tasks.py b/scanapp/tasks.py index f2fd7d2e..7fe7e971 100644 --- a/scanapp/tasks.py +++ b/scanapp/tasks.py @@ -25,9 +25,16 @@ import json import logging +import os import subprocess +import tarfile +import urllib +from cStringIO import StringIO +from urlparse import urlparse +import StringIO import requests +import zipfile from django.utils import timezone from scanapp.celery import app @@ -59,6 +66,33 @@ def scan_code_async(url, scan_id, path, file_name): apply_scan_async.delay(path, scan_id) +@app.task +def handle_archive_url(url, scan_id, path, file_name): + """ + Create and save a file at `path` present at `url` using `scan_id` and bare `path` and + `file_name` and apply the scan. + """ + r = requests.get(url) + path = path + url_parse = urlparse(url) + os.chdir(path) + + if r.status_code == 200: + if url_parse.path.endswith('zip'): + z = zipfile.ZipFile(StringIO.StringIO(r.content)) + z.extractall() + + else: + file_tmp = urllib.urlretrieve(url, filename=None)[0] + base_name = os.path.basename(url) + + file_name, file_extension = os.path.splitext(base_name) + tar = tarfile.open(file_tmp) + tar.extractall(file_name) + + apply_scan_async.delay(path, scan_id) + + @app.task def handle_special_urls(url, scan_id, path, host): """ diff --git a/scanapp/views.py b/scanapp/views.py index b49a28e2..cf463ec0 100644 --- a/scanapp/views.py +++ b/scanapp/views.py @@ -25,6 +25,7 @@ import logging import os import subprocess +from urlparse import urlparse from django.contrib.auth.models import User from django.core.files.storage import FileSystemStorage @@ -48,6 +49,7 @@ from scanapp.serializers import AllModelSerializerHelper from scanapp.tasks import apply_scan_async from scanapp.tasks import create_scan_id +from scanapp.tasks import handle_archive_url from scanapp.tasks import handle_special_urls from scanapp.tasks import scan_code_async @@ -152,6 +154,19 @@ def post(self, request, *args, **kwargs): scan_start_time = timezone.now() git_url_parser = GitURL(url) + allowed_exts = ('zip', 'tar', 'tar.gz', 'rar', 'tgz', 'tar.Z', 'tar.bz2', + 'tbz2', 'tar.lzma', 'tlz', 'gz') + url_parse = urlparse(url) + + is_zip_url = False + + try: + for i in allowed_exts: + if url_parse.path.endswith(i): + is_zip_url = True + finally: + logger.info('smooth work') + if git_url_parser.host == 'github.com': file_name = git_url_parser.repo scan_directory = file_name @@ -163,6 +178,19 @@ def post(self, request, *args, **kwargs): handle_special_urls.delay(url, scan_id, path, git_url_parser.host) logger.info('git repo detected') + + elif is_zip_url: + logger.info('zip url detected') + scan_directory = None + scan_id = create_scan_id(user, url, scan_directory, scan_start_time) + current_scan = Scan.objects.get(pk=scan_id) + path = '/'.join([path, '{}'.format(current_scan.pk)]) + + os.makedirs(path) + + file_name = '{}'.format(current_scan.pk) + handle_archive_url.delay(url, scan_id, path, file_name) + else: scan_directory = None scan_id = create_scan_id(user, url, scan_directory, scan_start_time) @@ -174,10 +202,9 @@ def post(self, request, *args, **kwargs): file_name = '{}'.format(current_scan.pk) scan_code_async.delay(url, scan_id, path, file_name) - return HttpResponseRedirect('/resultscan/' + '{}'.format(current_scan.pk)) + return HttpResponseRedirect('/resultscan/' + '{}'.format(current_scan.pk)) # API views -# API views class ScanApiView(APIView): def get(self, request, format=None, **kwargs): scan_id = kwargs['pk']