Skip to content
This repository was archived by the owner on Feb 12, 2021. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions scanapp/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.1 on 2017-08-14 17:19
# Generated by Django 1.11.1 on 2017-08-24 09:46
from __future__ import unicode_literals

from django.conf import settings
Expand Down Expand Up @@ -80,7 +80,7 @@ class Migration(migrations.Migration):
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('url', models.URLField(blank=True, help_text='Url from where the code is fetched', max_length=2000, null=True)),
('scan_directory', models.CharField(blank=True, help_text='Directory in which the code to be scanned is stored', max_length=200, null=True)),
('scan_directory', models.CharField(blank=True, help_text='Directory in which the code to be scanned is stored', max_length=2000, null=True)),
('scancode_notice', models.CharField(blank=True, help_text='Notice by the scancode-toolkit', max_length=2000, null=True)),
('scancode_version', models.CharField(blank=True, help_text='Version of scancode being used', max_length=200, null=True)),
('files_count', models.IntegerField(blank=True, default=0, help_text='Number of files under scan', null=True)),
Expand All @@ -101,6 +101,24 @@ class Migration(migrations.Migration):
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('path', models.CharField(help_text='Path of file scanned', max_length=400)),
('type', models.CharField(help_text='Type of the entity being scanned', max_length=400)),
('name', models.CharField(help_text='Name of the entity being scanned', max_length=400)),
('base_name', models.CharField(help_text='Base name of entity without extension', max_length=400)),
('extension', models.CharField(blank=True, help_text='Extension of the entity being scanned', max_length=400, null=True)),
('date', models.DateTimeField(blank=True, help_text='Date of entity being created', null=True)),
('size', models.IntegerField(blank=True, help_text='Size of the entity being scanned', null=True)),
('sha1', models.CharField(blank=True, help_text='SHA1 Checksums of the file', max_length=400, null=True)),
('md5', models.CharField(blank=True, help_text='MD5 checksums of the file', max_length=400, null=True)),
('files_count', models.IntegerField(blank=True, help_text='number of files present if a directory', null=True)),
('mime_type', models.CharField(blank=True, help_text='mime type of entity being scanned', max_length=400, null=True)),
('file_type', models.CharField(blank=True, help_text='file type of entity being scanned. null if the entity is a directory', max_length=400, null=True)),
('programming_language', models.CharField(blank=True, help_text='programming language used in the entity', max_length=400, null=True)),
('is_binary', models.BooleanField(default=False, help_text='Whether the entity being scanned is binary or not')),
('is_text', models.BooleanField(default=False, help_text='Whether the entity being scanned has text or not')),
('is_archive', models.BooleanField(default=False, help_text='Whether the entity being scanned is archive or not')),
('is_media', models.BooleanField(default=False, help_text='Whether the entity being scanned is media file or not')),
('is_source', models.BooleanField(default=False, help_text='Whether the entity being scanned is source or not')),
('is_script', models.BooleanField(default=False, help_text='Whether the entity being scanned is a script file or not')),
('scan', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='scanapp.Scan')),
],
),
Expand Down
20 changes: 0 additions & 20 deletions scanapp/migrations/0002_auto_20170815_2324.py

This file was deleted.

81 changes: 81 additions & 0 deletions scanapp/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,87 @@ def __str__(self):

scan = models.ForeignKey(Scan)
path = models.CharField(max_length=400, help_text='Path of file scanned')
type = models.CharField(max_length=400, help_text='Type of the entity being scanned')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the moment, there is no proper documentation of what the fields are in ScanCode, so we may use this help text as a definition for the fields. Thanks for getting it started for us!

That said, there can be some improvements made to the text to make it more accurate. For example, in the case of type, it should just say "Type of resource: file or directory". Do not use "entity" as anything we scan is either a file or a directory. Also remove "being scanned" from the text, it is not necessary.

Some other suggestions:

name: "Name of the file or directory"
date: "Creation date of file or directory"

etc.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, that seems fair...

name = models.CharField(max_length=400, help_text='Name of the entity being scanned')
base_name = models.CharField(
max_length=400,
help_text='Base name of entity without extension',
)
extension = models.CharField(
max_length=400,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

400 seems to be big for a field that just stores the file extension. This can be changed to something smaller. You should also take a look at other field sizes and make them smaller.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK.

help_text='Extension of the entity being scanned',
null=True,
blank=True
)
date = models.DateTimeField(
help_text='Date of entity being created',
null=True,
blank=True
)
size = models.IntegerField(
help_text='Size of the entity being scanned',
null=True,
blank=True
)
sha1 = models.CharField(
max_length=400,
help_text='SHA1 Checksums of the file',
null=True,
blank=True
)
md5 = models.CharField(
max_length=400,
help_text='MD5 checksums of the file',
null=True,
blank=True
)
files_count = models.IntegerField(
help_text='number of files present if a directory',
null=True,
blank=True
)
mime_type = models.CharField(
max_length=400,
help_text='mime type of entity being scanned',
null=True,
blank=True
)
file_type = models.CharField(
max_length=400,
help_text='file type of entity being scanned. null if the entity is a directory',
null=True,
blank=True
)
programming_language = models.CharField(
max_length=400,
help_text='programming language used in the entity',
null=True,
blank=True
)
is_binary = models.BooleanField(
help_text='Whether the entity being scanned is binary or not',
default=False
)
is_text = models.BooleanField(
help_text='Whether the entity being scanned has text or not',
default=False
)
is_archive = models.BooleanField(
help_text='Whether the entity being scanned is archive or not',
default=False
)
is_media = models.BooleanField(
help_text='Whether the entity being scanned is media file or not',
default=False
)
is_source = models.BooleanField(
help_text='Whether the entity being scanned is source or not',
default=False
)
is_script = models.BooleanField(
help_text='Whether the entity being scanned is a script file or not',
default=False
)


class License(models.Model):
Expand Down
42 changes: 37 additions & 5 deletions scanapp/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@
@app.task
def scan_code_async(url, scan_id, path, file_name):
"""
Create and save a file at `path` present at `url` using `scan_id` and bare `path` and
`file_name` and apply the scan.
Create and save a file at `path` present at `url` using `scan_id`, bare `path`, `file_name`
and apply the scan.
"""
r = requests.get(url)
path = path + file_name
Expand Down Expand Up @@ -81,14 +81,14 @@ def apply_scan_async(path, scan_id):
# FIXME improve error checking when calling scan in subprocess.
scan_result = subprocess.check_output(['scancode', path])
json_data = json.loads(scan_result)
save_results_to_db.delay(scan_id, json_data)
save_results_to_db.delay(scan_id, json_data, path)


@app.task
def save_results_to_db(scan_id, json_data):
def save_results_to_db(scan_id, json_data, path):
"""
Fill database using `json_data` for given `scan_id`
and add `end_scan_time` to true.
and call `fill_rest_scanned_file_model` with the `path`
"""
scan = Scan.objects.get(pk=scan_id)
scan = fill_unfilled_scan_model(
Expand Down Expand Up @@ -167,6 +167,38 @@ def save_results_to_db(scan_id, json_data):
)
scan_error.save()

fill_rest_scanned_file_model.delay(path, scan_id)


@app.task
def fill_rest_scanned_file_model(path, scan_id):
"""
Fill the rest ScannedFile model by using another subprocess call
"""
scanned_file_results = subprocess.check_output(['scancode', '--info', '-f', 'json-pp', path])
json_scanned_file_results = json.loads(scanned_file_results)
for file in json_scanned_file_results['files']:
scanned_file = ScannedFile.objects.get(path=file['path'])
scanned_file.type = file['type']
scanned_file.name = file['name']
scanned_file.base_name = file['base_name']
scanned_file.extension = file['extension']
scanned_file.date = file['date']
scanned_file.size = file['size']
scanned_file.sha1 = file['sha1']
scanned_file.md5 = file['md5']
scanned_file.files_count = file['files_count']
scanned_file.mime_type = file['mime_type']
scanned_file.file_type = file['file_type']
scanned_file.programming_language = file['programming_language']
scanned_file.is_binary = file['is_binary']
scanned_file.is_text = file['is_text']
scanned_file.is_archive = file['is_archive']
scanned_file.is_media = file['is_media']
scanned_file.is_source = file['is_source']
scanned_file.is_script = file['is_script']
scanned_file.save()
scan = Scan.objects.get(pk=scan_id)
scan.scan_end_time = timezone.now()
scan.save()

Expand Down
Loading