aboutcode-org · singh1114 · Aug 22, 2017 · Aug 23, 2017 · Aug 24, 2017 · Dec 3, 2017
diff --git a/scanapp/migrations/0001_initial.py b/scanapp/migrations/0001_initial.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Generated by Django 1.11.1 on 2017-08-14 17:19
+# Generated by Django 1.11.1 on 2017-08-24 09:46
 from __future__ import unicode_literals
 
 from django.conf import settings
@@ -80,7 +80,7 @@ class Migration(migrations.Migration):
             fields=[
                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                 ('url', models.URLField(blank=True, help_text='Url from where the code is fetched', max_length=2000, null=True)),
-                ('scan_directory', models.CharField(blank=True, help_text='Directory in which the code to be scanned is stored', max_length=200, null=True)),
+                ('scan_directory', models.CharField(blank=True, help_text='Directory in which the code to be scanned is stored', max_length=2000, null=True)),
                 ('scancode_notice', models.CharField(blank=True, help_text='Notice by the scancode-toolkit', max_length=2000, null=True)),
                 ('scancode_version', models.CharField(blank=True, help_text='Version of scancode being used', max_length=200, null=True)),
                 ('files_count', models.IntegerField(blank=True, default=0, help_text='Number of files under scan', null=True)),
@@ -101,6 +101,24 @@ class Migration(migrations.Migration):
             fields=[
                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                 ('path', models.CharField(help_text='Path of file scanned', max_length=400)),
+                ('type', models.CharField(help_text='Type of the entity being scanned', max_length=400)),
+                ('name', models.CharField(help_text='Name of the entity being scanned', max_length=400)),
+                ('base_name', models.CharField(help_text='Base name of entity without extension', max_length=400)),
+                ('extension', models.CharField(blank=True, help_text='Extension of the entity being scanned', max_length=400, null=True)),
+                ('date', models.DateTimeField(blank=True, help_text='Date of entity being created', null=True)),
+                ('size', models.IntegerField(blank=True, help_text='Size of the entity being scanned', null=True)),
+                ('sha1', models.CharField(blank=True, help_text='SHA1 Checksums of the file', max_length=400, null=True)),
+                ('md5', models.CharField(blank=True, help_text='MD5 checksums of the file', max_length=400, null=True)),
+                ('files_count', models.IntegerField(blank=True, help_text='number of files present if a directory', null=True)),
+                ('mime_type', models.CharField(blank=True, help_text='mime type of entity being scanned', max_length=400, null=True)),
+                ('file_type', models.CharField(blank=True, help_text='file type of entity being scanned. null if the entity is a directory', max_length=400, null=True)),
+                ('programming_language', models.CharField(blank=True, help_text='programming language used in the entity', max_length=400, null=True)),
+                ('is_binary', models.BooleanField(default=False, help_text='Whether the entity being scanned is binary or not')),
+                ('is_text', models.BooleanField(default=False, help_text='Whether the entity being scanned has text or not')),
+                ('is_archive', models.BooleanField(default=False, help_text='Whether the entity being scanned is archive or not')),
+                ('is_media', models.BooleanField(default=False, help_text='Whether the entity being scanned is media file or not')),
+                ('is_source', models.BooleanField(default=False, help_text='Whether the entity being scanned is source or not')),
+                ('is_script', models.BooleanField(default=False, help_text='Whether the entity being scanned is a script file or not')),
                 ('scan', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='scanapp.Scan')),
             ],
         ),

diff --git a/scanapp/migrations/0002_auto_20170815_2324.py b/scanapp/migrations/0002_auto_20170815_2324.py
diff --git a/scanapp/models.py b/scanapp/models.py
@@ -104,6 +104,87 @@ def __str__(self):
 
     scan = models.ForeignKey(Scan)
     path = models.CharField(max_length=400, help_text='Path of file scanned')
+    type = models.CharField(max_length=400, help_text='Type of the entity being scanned')
+    name = models.CharField(max_length=400, help_text='Name of the entity being scanned')
+    base_name = models.CharField(
+        max_length=400,
+        help_text='Base name of entity without extension',
+    )
+    extension = models.CharField(
+        max_length=400,
+        help_text='Extension of the entity being scanned',
+        null=True,
+        blank=True
+    )
+    date = models.DateTimeField(
+        help_text='Date of entity being created',
+        null=True,
+        blank=True
+    )
+    size = models.IntegerField(
+        help_text='Size of the entity being scanned',
+        null=True,
+        blank=True
+    )
+    sha1 = models.CharField(
+        max_length=400,
+        help_text='SHA1 Checksums of the file',
+        null=True,
+        blank=True
+    )
+    md5 = models.CharField(
+        max_length=400,
+        help_text='MD5 checksums of the file',
+        null=True,
+        blank=True
+    )
+    files_count = models.IntegerField(
+        help_text='number of files present if a directory',
+        null=True,
+        blank=True
+    )
+    mime_type = models.CharField(
+        max_length=400,
+        help_text='mime type of entity being scanned',
+        null=True,
+        blank=True
+    )
+    file_type = models.CharField(
+        max_length=400,
+        help_text='file type of entity being scanned. null if the entity is a directory',
+        null=True,
+        blank=True
+    )
+    programming_language = models.CharField(
+        max_length=400,
+        help_text='programming language used in the entity',
+        null=True,
+        blank=True
+    )
+    is_binary = models.BooleanField(
+        help_text='Whether the entity being scanned is binary or not',
+        default=False
+    )
+    is_text = models.BooleanField(
+        help_text='Whether the entity being scanned has text or not',
+        default=False
+    )
+    is_archive = models.BooleanField(
+        help_text='Whether the entity being scanned is archive or not',
+        default=False
+    )
+    is_media = models.BooleanField(
+        help_text='Whether the entity being scanned is media file or not',
+        default=False
+    )
+    is_source = models.BooleanField(
+        help_text='Whether the entity being scanned is source or not',
+        default=False
+    )
+    is_script = models.BooleanField(
+        help_text='Whether the entity being scanned is a script file or not',
+        default=False
+    )
 
 
 class License(models.Model):

diff --git a/scanapp/tasks.py b/scanapp/tasks.py
@@ -47,8 +47,8 @@
 @app.task
 def scan_code_async(url, scan_id, path, file_name):
     """
-    Create and save a file at `path` present at `url` using `scan_id` and bare `path` and
-    `file_name` and apply the scan.
+    Create and save a file at `path` present at `url` using `scan_id`, bare `path`, `file_name`
+    and apply the scan.
     """
     r = requests.get(url)
     path = path + file_name
@@ -81,14 +81,14 @@ def apply_scan_async(path, scan_id):
     # FIXME improve error checking when calling scan in subprocess.
     scan_result = subprocess.check_output(['scancode', path])
     json_data = json.loads(scan_result)
-    save_results_to_db.delay(scan_id, json_data)
+    save_results_to_db.delay(scan_id, json_data, path)
 
 
 @app.task
-def save_results_to_db(scan_id, json_data):
+def save_results_to_db(scan_id, json_data, path):
     """
     Fill database using `json_data` for given `scan_id`
-    and add `end_scan_time` to true.
+    and call `fill_rest_scanned_file_model` with the `path`
     """
     scan = Scan.objects.get(pk=scan_id)
     scan = fill_unfilled_scan_model(
@@ -167,6 +167,38 @@ def save_results_to_db(scan_id, json_data):
             )
             scan_error.save()
 
+    fill_rest_scanned_file_model.delay(path, scan_id)
+
+
+@app.task
+def fill_rest_scanned_file_model(path, scan_id):
+    """
+    Fill the rest ScannedFile model by using another subprocess call
+    """
+    scanned_file_results = subprocess.check_output(['scancode', '--info', '-f', 'json-pp', path])
+    json_scanned_file_results = json.loads(scanned_file_results)
+    for file in json_scanned_file_results['files']:
+        scanned_file = ScannedFile.objects.get(path=file['path'])
+        scanned_file.type = file['type']
+        scanned_file.name = file['name']
+        scanned_file.base_name = file['base_name']
+        scanned_file.extension = file['extension']
+        scanned_file.date = file['date']
+        scanned_file.size = file['size']
+        scanned_file.sha1 = file['sha1']
+        scanned_file.md5 = file['md5']
+        scanned_file.files_count = file['files_count']
+        scanned_file.mime_type = file['mime_type']
+        scanned_file.file_type = file['file_type']
+        scanned_file.programming_language = file['programming_language']
+        scanned_file.is_binary = file['is_binary']
+        scanned_file.is_text = file['is_text']
+        scanned_file.is_archive = file['is_archive']
+        scanned_file.is_media = file['is_media']
+        scanned_file.is_source = file['is_source']
+        scanned_file.is_script = file['is_script']
+        scanned_file.save()
+    scan = Scan.objects.get(pk=scan_id)
     scan.scan_end_time = timezone.now()
     scan.save()