diff --git a/scanapp/migrations/0001_initial.py b/scanapp/migrations/0001_initial.py index e9b75390..2df892eb 100644 --- a/scanapp/migrations/0001_initial.py +++ b/scanapp/migrations/0001_initial.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Generated by Django 1.11.1 on 2017-08-14 17:19 +# Generated by Django 1.11.1 on 2017-08-24 09:46 from __future__ import unicode_literals from django.conf import settings @@ -80,7 +80,7 @@ class Migration(migrations.Migration): fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('url', models.URLField(blank=True, help_text='Url from where the code is fetched', max_length=2000, null=True)), - ('scan_directory', models.CharField(blank=True, help_text='Directory in which the code to be scanned is stored', max_length=200, null=True)), + ('scan_directory', models.CharField(blank=True, help_text='Directory in which the code to be scanned is stored', max_length=2000, null=True)), ('scancode_notice', models.CharField(blank=True, help_text='Notice by the scancode-toolkit', max_length=2000, null=True)), ('scancode_version', models.CharField(blank=True, help_text='Version of scancode being used', max_length=200, null=True)), ('files_count', models.IntegerField(blank=True, default=0, help_text='Number of files under scan', null=True)), @@ -101,6 +101,24 @@ class Migration(migrations.Migration): fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('path', models.CharField(help_text='Path of file scanned', max_length=400)), + ('type', models.CharField(help_text='Type of the entity being scanned', max_length=400)), + ('name', models.CharField(help_text='Name of the entity being scanned', max_length=400)), + ('base_name', models.CharField(help_text='Base name of entity without extension', max_length=400)), + ('extension', models.CharField(blank=True, help_text='Extension of the entity being scanned', max_length=400, null=True)), + ('date', models.DateTimeField(blank=True, help_text='Date of entity being created', null=True)), + ('size', models.IntegerField(blank=True, help_text='Size of the entity being scanned', null=True)), + ('sha1', models.CharField(blank=True, help_text='SHA1 Checksums of the file', max_length=400, null=True)), + ('md5', models.CharField(blank=True, help_text='MD5 checksums of the file', max_length=400, null=True)), + ('files_count', models.IntegerField(blank=True, help_text='number of files present if a directory', null=True)), + ('mime_type', models.CharField(blank=True, help_text='mime type of entity being scanned', max_length=400, null=True)), + ('file_type', models.CharField(blank=True, help_text='file type of entity being scanned. null if the entity is a directory', max_length=400, null=True)), + ('programming_language', models.CharField(blank=True, help_text='programming language used in the entity', max_length=400, null=True)), + ('is_binary', models.BooleanField(default=False, help_text='Whether the entity being scanned is binary or not')), + ('is_text', models.BooleanField(default=False, help_text='Whether the entity being scanned has text or not')), + ('is_archive', models.BooleanField(default=False, help_text='Whether the entity being scanned is archive or not')), + ('is_media', models.BooleanField(default=False, help_text='Whether the entity being scanned is media file or not')), + ('is_source', models.BooleanField(default=False, help_text='Whether the entity being scanned is source or not')), + ('is_script', models.BooleanField(default=False, help_text='Whether the entity being scanned is a script file or not')), ('scan', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='scanapp.Scan')), ], ), diff --git a/scanapp/migrations/0002_auto_20170815_2324.py b/scanapp/migrations/0002_auto_20170815_2324.py deleted file mode 100644 index 9ca3fdb8..00000000 --- a/scanapp/migrations/0002_auto_20170815_2324.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by Django 1.11.1 on 2017-08-15 23:24 -from __future__ import unicode_literals - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('scanapp', '0001_initial'), - ] - - operations = [ - migrations.AlterField( - model_name='scan', - name='scan_directory', - field=models.CharField(blank=True, help_text='Directory in which the code to be scanned is stored', max_length=2000, null=True), - ), - ] diff --git a/scanapp/models.py b/scanapp/models.py index 03a877af..40de3514 100644 --- a/scanapp/models.py +++ b/scanapp/models.py @@ -104,6 +104,87 @@ def __str__(self): scan = models.ForeignKey(Scan) path = models.CharField(max_length=400, help_text='Path of file scanned') + type = models.CharField(max_length=400, help_text='Type of the entity being scanned') + name = models.CharField(max_length=400, help_text='Name of the entity being scanned') + base_name = models.CharField( + max_length=400, + help_text='Base name of entity without extension', + ) + extension = models.CharField( + max_length=400, + help_text='Extension of the entity being scanned', + null=True, + blank=True + ) + date = models.DateTimeField( + help_text='Date of entity being created', + null=True, + blank=True + ) + size = models.IntegerField( + help_text='Size of the entity being scanned', + null=True, + blank=True + ) + sha1 = models.CharField( + max_length=400, + help_text='SHA1 Checksums of the file', + null=True, + blank=True + ) + md5 = models.CharField( + max_length=400, + help_text='MD5 checksums of the file', + null=True, + blank=True + ) + files_count = models.IntegerField( + help_text='number of files present if a directory', + null=True, + blank=True + ) + mime_type = models.CharField( + max_length=400, + help_text='mime type of entity being scanned', + null=True, + blank=True + ) + file_type = models.CharField( + max_length=400, + help_text='file type of entity being scanned. null if the entity is a directory', + null=True, + blank=True + ) + programming_language = models.CharField( + max_length=400, + help_text='programming language used in the entity', + null=True, + blank=True + ) + is_binary = models.BooleanField( + help_text='Whether the entity being scanned is binary or not', + default=False + ) + is_text = models.BooleanField( + help_text='Whether the entity being scanned has text or not', + default=False + ) + is_archive = models.BooleanField( + help_text='Whether the entity being scanned is archive or not', + default=False + ) + is_media = models.BooleanField( + help_text='Whether the entity being scanned is media file or not', + default=False + ) + is_source = models.BooleanField( + help_text='Whether the entity being scanned is source or not', + default=False + ) + is_script = models.BooleanField( + help_text='Whether the entity being scanned is a script file or not', + default=False + ) class License(models.Model): diff --git a/scanapp/tasks.py b/scanapp/tasks.py index f2fd7d2e..a96b3371 100644 --- a/scanapp/tasks.py +++ b/scanapp/tasks.py @@ -47,8 +47,8 @@ @app.task def scan_code_async(url, scan_id, path, file_name): """ - Create and save a file at `path` present at `url` using `scan_id` and bare `path` and - `file_name` and apply the scan. + Create and save a file at `path` present at `url` using `scan_id`, bare `path`, `file_name` + and apply the scan. """ r = requests.get(url) path = path + file_name @@ -81,14 +81,14 @@ def apply_scan_async(path, scan_id): # FIXME improve error checking when calling scan in subprocess. scan_result = subprocess.check_output(['scancode', path]) json_data = json.loads(scan_result) - save_results_to_db.delay(scan_id, json_data) + save_results_to_db.delay(scan_id, json_data, path) @app.task -def save_results_to_db(scan_id, json_data): +def save_results_to_db(scan_id, json_data, path): """ Fill database using `json_data` for given `scan_id` - and add `end_scan_time` to true. + and call `fill_rest_scanned_file_model` with the `path` """ scan = Scan.objects.get(pk=scan_id) scan = fill_unfilled_scan_model( @@ -167,6 +167,38 @@ def save_results_to_db(scan_id, json_data): ) scan_error.save() + fill_rest_scanned_file_model.delay(path, scan_id) + + +@app.task +def fill_rest_scanned_file_model(path, scan_id): + """ + Fill the rest ScannedFile model by using another subprocess call + """ + scanned_file_results = subprocess.check_output(['scancode', '--info', '-f', 'json-pp', path]) + json_scanned_file_results = json.loads(scanned_file_results) + for file in json_scanned_file_results['files']: + scanned_file = ScannedFile.objects.get(path=file['path']) + scanned_file.type = file['type'] + scanned_file.name = file['name'] + scanned_file.base_name = file['base_name'] + scanned_file.extension = file['extension'] + scanned_file.date = file['date'] + scanned_file.size = file['size'] + scanned_file.sha1 = file['sha1'] + scanned_file.md5 = file['md5'] + scanned_file.files_count = file['files_count'] + scanned_file.mime_type = file['mime_type'] + scanned_file.file_type = file['file_type'] + scanned_file.programming_language = file['programming_language'] + scanned_file.is_binary = file['is_binary'] + scanned_file.is_text = file['is_text'] + scanned_file.is_archive = file['is_archive'] + scanned_file.is_media = file['is_media'] + scanned_file.is_source = file['is_source'] + scanned_file.is_script = file['is_script'] + scanned_file.save() + scan = Scan.objects.get(pk=scan_id) scan.scan_end_time = timezone.now() scan.save() diff --git a/scanapp/tests/test_models.py b/scanapp/tests/test_models.py index 39f934b3..a13a156f 100644 --- a/scanapp/tests/test_models.py +++ b/scanapp/tests/test_models.py @@ -69,7 +69,27 @@ def test_scanned_file_added(self): scan_start_time=timezone.now(), scan_end_time=timezone.now() ) - scanned_file = ScannedFile.objects.create(scan=scan, path='/home/nexb/server/') + scanned_file = ScannedFile.objects.create( + scan=scan, + path='/home/nexb/server/', + type= 'file', + name= 'celery.py', + base_name= 'celery', + extension='.py', + date=timezone.now(), + size=1906, + sha1='2d3c6c804b356a3ef976295fe615e7892dd1e66c', + md5='1db2f0bc0920084fc0608fab696281ef', + mime_type='text/x-python', + file_type='Python script, ASCII text executable', + programming_language='Python', + is_binary=False, + is_text=True, + is_archive=False, + is_media=False, + is_source=True, + is_script=True + ) self.assertEqual('/home/nexb/server/', ScannedFile.objects.get(scan=scan).path) self.assertEqual(scanned_file.path, str(ScannedFile.objects.get(scan=scan).path)) @@ -89,7 +109,27 @@ def test_license_added(self): scan_start_time=timezone.now(), scan_end_time=timezone.now() ) - scanned_file = ScannedFile.objects.create(scan=scan, path='/home/nexb/server/') + scanned_file = ScannedFile.objects.create( + scan=scan, + path='/home/nexb/server/', + type= 'file', + name= 'celery.py', + base_name= 'celery', + extension='.py', + date=timezone.now(), + size=1906, + sha1='2d3c6c804b356a3ef976295fe615e7892dd1e66c', + md5='1db2f0bc0920084fc0608fab696281ef', + mime_type='text/x-python', + file_type='Python script, ASCII text executable', + programming_language='Python', + is_binary=False, + is_text=True, + is_archive=False, + is_media=False, + is_source=True, + is_script=True + ) license = License.objects.create( scanned_file=scanned_file, key='A', @@ -124,7 +164,27 @@ def test_copyright_added(self): scan_start_time=timezone.now(), scan_end_time=timezone.now() ) - scanned_file = ScannedFile.objects.create(scan=scan, path='/home/nexb/server/') + scanned_file = ScannedFile.objects.create( + scan=scan, + path='/home/nexb/server/', + type= 'file', + name= 'celery.py', + base_name= 'celery', + extension='.py', + date=timezone.now(), + size=1906, + sha1='2d3c6c804b356a3ef976295fe615e7892dd1e66c', + md5='1db2f0bc0920084fc0608fab696281ef', + mime_type='text/x-python', + file_type='Python script, ASCII text executable', + programming_language='Python', + is_binary=False, + is_text=True, + is_archive=False, + is_media=False, + is_source=True, + is_script=True + ) copyright = Copyright.objects.create( scanned_file=scanned_file, start_line=800, @@ -148,7 +208,27 @@ def test_copyright_holder_added(self): scan_start_time=timezone.now(), scan_end_time=timezone.now() ) - scanned_file = ScannedFile.objects.create(scan=scan, path='/home/nexb/server/') + scanned_file = ScannedFile.objects.create( + scan=scan, + path='/home/nexb/server/', + type= 'file', + name= 'celery.py', + base_name= 'celery', + extension='.py', + date=timezone.now(), + size=1906, + sha1='2d3c6c804b356a3ef976295fe615e7892dd1e66c', + md5='1db2f0bc0920084fc0608fab696281ef', + mime_type='text/x-python', + file_type='Python script, ASCII text executable', + programming_language='Python', + is_binary=False, + is_text=True, + is_archive=False, + is_media=False, + is_source=True, + is_script=True + ) copyright = Copyright.objects.create( scanned_file=scanned_file, start_line=800, @@ -174,7 +254,27 @@ def test_copyright_statement_added(self): scan_start_time=timezone.now(), scan_end_time=timezone.now() ) - scanned_file = ScannedFile.objects.create(scan=scan, path='/home/nexb/server/') + scanned_file = ScannedFile.objects.create( + scan=scan, + path='/home/nexb/server/', + type= 'file', + name= 'celery.py', + base_name= 'celery', + extension='.py', + date=timezone.now(), + size=1906, + sha1='2d3c6c804b356a3ef976295fe615e7892dd1e66c', + md5='1db2f0bc0920084fc0608fab696281ef', + mime_type='text/x-python', + file_type='Python script, ASCII text executable', + programming_language='Python', + is_binary=False, + is_text=True, + is_archive=False, + is_media=False, + is_source=True, + is_script=True + ) copyright = Copyright.objects.create( scanned_file=scanned_file, start_line=800, @@ -203,7 +303,27 @@ def test_copyright_author_added(self): scan_start_time=timezone.now(), scan_end_time=timezone.now() ) - scanned_file = ScannedFile.objects.create(scan=scan, path='/home/nexb/server/') + scanned_file = ScannedFile.objects.create( + scan=scan, + path='/home/nexb/server/', + type= 'file', + name= 'celery.py', + base_name= 'celery', + extension='.py', + date=timezone.now(), + size=1906, + sha1='2d3c6c804b356a3ef976295fe615e7892dd1e66c', + md5='1db2f0bc0920084fc0608fab696281ef', + mime_type='text/x-python', + file_type='Python script, ASCII text executable', + programming_language='Python', + is_binary=False, + is_text=True, + is_archive=False, + is_media=False, + is_source=True, + is_script=True + ) copyright = Copyright.objects.create( scanned_file=scanned_file, start_line=800, @@ -232,7 +352,27 @@ def test_package_added(self): scan_start_time=timezone.now(), scan_end_time=timezone.now() ) - scanned_file = ScannedFile.objects.create(scan=scan, path='/home/nexb/server/') + scanned_file = ScannedFile.objects.create( + scan=scan, + path='/home/nexb/server/', + type= 'file', + name= 'celery.py', + base_name= 'celery', + extension='.py', + date=timezone.now(), + size=1906, + sha1='2d3c6c804b356a3ef976295fe615e7892dd1e66c', + md5='1db2f0bc0920084fc0608fab696281ef', + mime_type='text/x-python', + file_type='Python script, ASCII text executable', + programming_language='Python', + is_binary=False, + is_text=True, + is_archive=False, + is_media=False, + is_source=True, + is_script=True + ) package = Package.objects.create(scanned_file=scanned_file, package='bootstrap') self.assertEqual('bootstrap', Package.objects.get(scanned_file=scanned_file).package) @@ -253,7 +393,27 @@ def test_scan_error_added(self): scan_start_time=timezone.now(), scan_end_time=timezone.now() ) - scanned_file = ScannedFile.objects.create(scan=scan, path='/home/nexb/server/') + scanned_file = ScannedFile.objects.create( + scan=scan, + path='/home/nexb/server/', + type= 'file', + name= 'celery.py', + base_name= 'celery', + extension='.py', + date=timezone.now(), + size=1906, + sha1='2d3c6c804b356a3ef976295fe615e7892dd1e66c', + md5='1db2f0bc0920084fc0608fab696281ef', + mime_type='text/x-python', + file_type='Python script, ASCII text executable', + programming_language='Python', + is_binary=False, + is_text=True, + is_archive=False, + is_media=False, + is_source=True, + is_script=True + ) scan_error = ScanError.objects.create( scanned_file=scanned_file, scan_error='Integration Issue' diff --git a/scanapp/tests/test_serializer.py b/scanapp/tests/test_serializer.py index 249fc55e..4b2e1a44 100644 --- a/scanapp/tests/test_serializer.py +++ b/scanapp/tests/test_serializer.py @@ -55,7 +55,27 @@ def test_all_model_serializer_helper(self): scan_start_time=timezone.now(), scan_end_time=timezone.now() ) - scanned_file = ScannedFile.objects.create(scan=scan, path='/home/nexb/server/') + scanned_file = ScannedFile.objects.create( + scan=scan, + path='/home/nexb/server/', + type= 'file', + name= 'celery.py', + base_name= 'celery', + extension='.py', + date=timezone.now(), + size=1906, + sha1='2d3c6c804b356a3ef976295fe615e7892dd1e66c', + md5='1db2f0bc0920084fc0608fab696281ef', + mime_type='text/x-python', + file_type='Python script, ASCII text executable', + programming_language='Python', + is_binary=False, + is_text=True, + is_archive=False, + is_media=False, + is_source=True, + is_script=True + ) license = License.objects.create( scanned_file=scanned_file, key='A', @@ -178,7 +198,27 @@ def test_all_model_serializer(self): scan_start_time=timezone.now(), scan_end_time=timezone.now() ) - scanned_file = ScannedFile.objects.create(scan=scan, path='/home/nexb/server/') + scanned_file = ScannedFile.objects.create( + scan=scan, + path='/home/nexb/server/', + type='file', + name='celery.py', + base_name='celery', + extension='.py', + size=1906, + sha1='2d3c6c804b356a3ef976295fe615e7892dd1e66c', + md5='1db2f0bc0920084fc0608fab696281ef', + files_count=0, + mime_type='text/x-python', + file_type='Python script, ASCII text executable', + programming_language='Python', + is_binary=False, + is_text=True, + is_archive=False, + is_media=False, + is_source=True, + is_script=True + ) license = License.objects.create( scanned_file=scanned_file, key='A', @@ -229,7 +269,24 @@ def test_all_model_serializer(self): "files_count": 200 }, "scanned_file": [{ - "path": "/home/nexb/server/" + "path": "/home/nexb/server/", + "type": "file", + "name": "celery.py", + "base_name": "celery", + "extension": ".py", + "size": 1906, + "sha1": "2d3c6c804b356a3ef976295fe615e7892dd1e66c", + "md5": "1db2f0bc0920084fc0608fab696281ef", + "files_count": 0, + "mime_type": "text/x-python", + "file_type": "Python script, ASCII text executable", + "programming_language": "Python", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": true }], "license": [{ "key": "A", @@ -242,7 +299,8 @@ def test_all_model_serializer(self): "dejacode_url": "https://github.com", "spdx_license_key": "mit", "spdx_url": "https://github.com/", - "start_line": 21, "end_line": 567, + "start_line": 21, + "end_line": 567, "matched_rule": {"url": ["https://github.com", "https://google.com"]} }], "copyright": [{"start_line": 800, "end_line": 1000}], @@ -272,3 +330,4 @@ def test_all_model_serializer(self): del scan_serializer.data['scan'][key_three] for all_model_serializer_json_items, scan_serializer_items in zip(sorted(json.loads(all_model_serializer_json)['scan'].items()), sorted(scan_serializer.data['scan'].items())): self.assertEqual(all_model_serializer_json_items, scan_serializer_items) +