Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 43 additions & 123 deletions core_directory/management/commands/init_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,16 @@
and stores both local file content and GitHub URLs in the database using a serializer.
Utilizes environment variables for configuration.
"""

import os
import tempfile
import shutil
import stat

from dataclasses import dataclass

from io import BytesIO
from git import Repo
from git.exc import GitCommandError

from github import Github, GithubException
from github.Auth import Token as GitHubAuthToken

from django.db import IntegrityError
from django.core.exceptions import ValidationError as DjangoValidationError

from django.core.files.base import ContentFile
from django.core.management.base import BaseCommand

from rest_framework.exceptions import ValidationError as DRFValidationError

from core_directory.serializers import CoreSerializer
from core_directory.storages.github import GitHubStorage
from core_directory.models import CorePackage

class Command(BaseCommand):
Expand Down Expand Up @@ -62,124 +50,71 @@ def handle(self, *args, **kwargs):
"""
if not CorePackage.objects.exists():
self.stdout.write(self.style.SUCCESS('Database is empty. Initializing with data...'))
self.download_and_load_data()
self.initialize_from_storage()
self.stdout.write(self.style.SUCCESS('Database initialized successfully.'))
else:
self.stdout.write(self.style.WARNING('Database already initialized.'))

def get_repo_info(self):
def initialize_from_storage(self):
"""
Retrieves repository information and default branch using the GitHub API.
Loads core and signature files from the configured GitHub repository into the database.

Returns:
tuple: (repo_name, access_token, default_branch) if successful, otherwise (None, None, None).
"""
repo_name = os.getenv('GITHUB_REPO')
access_token = os.getenv('GITHUB_ACCESS_TOKEN')
if not repo_name:
self.stdout.write(self.style.ERROR('GITHUB_REPO environment variable is not set.'))
return None, None, None
if not access_token:
self.stdout.write(self.style.ERROR('GITHUB_ACCESS_TOKEN environment variable is not set.'))
return None, None, None
g = Github(auth=GitHubAuthToken(os.getenv('GITHUB_ACCESS_TOKEN')))
try:
repo = g.get_repo(repo_name)
default_branch = repo.default_branch
return repo_name, access_token, default_branch
except GithubException as e:
self.stdout.write(self.style.ERROR(f'GitHub API error fetching repo info: {e}'))
return None, None, None
except AttributeError as e:
self.stdout.write(self.style.ERROR(f'Attribute error fetching repo info: {e}'))
return None, None, None

def download_and_load_data(self):
"""
Clones the GitHub repo locally and loads .core and .sig files from the root directory into the database.
This method uses the GitHubStorage backend to list and retrieve all `.core` files (and their
corresponding `.sig` files, if present) from the root of the repository. For each core file,
it creates a Django ContentFile object and passes it, along with the signature file (if available),
to the CoreSerializer for validation and saving. If the storage backend supports cache prefill,
the cache is prefilled before processing files.

Any errors encountered during validation or saving are printed to the command output.

Raises:
RuntimeError: If the storage cache cannot be prefilled or if required files are missing.
FileNotFoundError: If a core or signature file cannot be found in the repository.

Constructs GitHub URLs for each file and stores both the local file content and the GitHub URL
in the database using the CoreSerializer.
Side Effects:
- Populates the database with CorePackage and related objects for each valid core file.
- Prints progress and error messages to the command output.
"""
@dataclass
class RepositoryData:
"""
Container for GitHub repository authentication and metadata.

Attributes:
name (str): The full repository name in 'owner/repo' format.
access_token (str): The GitHub access token for authentication.
default_branch (str): The default branch of the repository.

Properties:
url (str): The HTTPS URL of the repository.
url_with_access_token (str): The HTTPS URL of the repository including the access token.
base_url_raw_files (str): The base URL for downloading raw files from the repository's default branch.
"""
name: str
access_token: str
default_branch: str

@property
def url(self):
"""Returns the URL of the repository."""
return f"https://github.com/{self.name}.git"

@property
def url_with_access_token(self):
"""Returns the URL of the repository including access token."""
return f"https://{self.access_token}@github.com/{self.name}.git"

@property
def base_url_raw_files(self):
"""Returns the base URL for raw file download from github."""
return f"https://raw.githubusercontent.com/{self.name}/refs/heads/{self.default_branch}"

repo = RepositoryData(*self.get_repo_info())
if not repo.name:
return

temp_dir = tempfile.mkdtemp()
self.stdout.write(f'Cloning repository {repo.url} to {temp_dir}...')
try:
Repo.clone_from(repo.url_with_access_token, temp_dir)
except GitCommandError as e:
self.stdout.write(self.style.ERROR(f'Git error cloning repository: {e}'))
shutil.rmtree(temp_dir)
return
except OSError as e:
self.stdout.write(self.style.ERROR(f'Filesystem error cloning repository: {e}'))
shutil.rmtree(temp_dir)
return

# Only process files in the root directory
files_in_root = os.listdir(temp_dir)

storage = GitHubStorage()

# Prefill cache if supported
prefill = getattr(storage, "prefill_cache", None)
if callable(prefill):
self.stdout.write('Prefilling storage cache from GitHub zip archive...')
try:
prefill()
self.stdout.write(self.style.SUCCESS('Cache prefilled.'))
except RuntimeError as e:
self.stdout.write(self.style.ERROR(f'Error during cache prefill: {e}'))

_, files_in_root = storage.listdir('')
core_files = [f for f in files_in_root if f.endswith('.core')]
sig_files = {f.removesuffix('.sig'): f for f in files_in_root if f.endswith('.sig')}

for core_filename in core_files:
self.stdout.write(f'Processing {core_filename}...')

with open(os.path.join(temp_dir, core_filename), 'rb') as f:
core_file_object = BytesIO(f.read())
with storage.open(core_filename, 'rb') as f:
core_file_object = ContentFile(f.read())
core_file_object.name = core_filename
core_file_object.size = core_file_object.getbuffer().nbytes
core_file_object.size = core_file_object.size

data = {
'core_file': core_file_object,
'core_url': f"{repo.base_url_raw_files}/{core_filename}"
'core_url': storage.url(core_filename)
}

# Attach signature file if present
if core_filename in sig_files:
sig_filename = sig_files[core_filename]

with open(os.path.join(temp_dir, sig_filename), 'rb') as f:
sig_file_object = BytesIO(f.read())
with storage.open(sig_filename, 'rb') as f:
sig_file_object = ContentFile(f.read())
sig_file_object.name = sig_filename
sig_file_object.size = sig_file_object.getbuffer().nbytes
data['sig_file'] = sig_file_object
data['sig_url'] = f"{repo.base_url_raw_files}/{sig_filename}"
sig_file_object.size = sig_file_object.size
data['signature_file'] = sig_file_object
data['sig_url'] = storage.url(sig_filename)

# Use the serializer to create database entries
serializer = CoreSerializer(data=data)
Expand All @@ -191,18 +126,3 @@ def base_url_raw_files(self):
self.stdout.write(self.style.ERROR(f'Error creating database object for {core_filename}: {e}'))
else:
self.stdout.write(self.style.ERROR(f'Errors in {core_filename}: {serializer.errors}'))

shutil.rmtree(temp_dir, onexc=self._on_rm_exc)

@staticmethod
def _on_rm_exc(func, path, excinfo):
"""
Error handler for `shutil.rmtree` using the `onexc` parameter (Python 3.12+).

If the removal failed, make the file writable and try again.
"""
if not os.access(path, os.W_OK):
os.chmod(path, stat.S_IWUSR)
func(path)
else:
raise excinfo[1]
6 changes: 3 additions & 3 deletions core_directory/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Django 5.2.1 on 2025-09-10 14:44
# Generated by Django 5.2.1 on 2025-10-09 17:04

import django.db.models.deletion
import utils.spdx
Expand All @@ -24,8 +24,8 @@ class Migration(migrations.Migration):
('version_minor', models.IntegerField(help_text="Minor version number (e.g. 2 for version '1.2.3-abc').")),
('version_patch', models.IntegerField(help_text="Patch version number (e.g. 3 for version '1.2.3-abc').")),
('version_prerelease', models.CharField(blank=True, help_text="Pre-release label (e.g. 'abc' for version '1.2.3-abc').", max_length=20, null=True)),
('core_url', models.URLField(help_text='URL to download the .core file from GitHub or another source.')),
('sig_url', models.URLField(blank=True, help_text='Optional URL to download the .sig file for signature verification.', null=True)),
('core_file', models.FileField(help_text='The FuseSoC .core file for this package. This file is required and contains the core metadata.', upload_to='')),
('signature_file', models.FileField(blank=True, help_text="Optional signature (.sig) file for verifying the core file's authenticity.", null=True, upload_to='')),
('description', models.CharField(help_text='A short description of the core package.', max_length=255)),
('spdx_license', models.CharField(blank=True, choices=utils.spdx.get_spdx_choices, help_text='SPDX license identifier (e.g., MIT, GPL-3.0-or-later, or LicenseRef-...)', max_length=64, null=True, validators=[utils.spdx.validate_spdx])),
],
Expand Down
12 changes: 7 additions & 5 deletions core_directory/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,15 @@ class CorePackage(UniqueSanitizedNameMixin):
null=True,
help_text="Pre-release label (e.g. 'abc' for version '1.2.3-abc')."
)
core_url = models.URLField(
help_text='URL to download the .core file from GitHub or another source.'
core_file = models.FileField(
blank=False,
null=False,
help_text='The FuseSoC .core file for this package. This file is required and contains the core metadata.'
)
sig_url = models.URLField(
signature_file = models.FileField(
blank=True,
null=True,
help_text='Optional URL to download the .sig file for signature verification.'
help_text='Optional signature (.sig) file for verifying the core file\'s authenticity.'
)
description = models.CharField(
max_length=255,
Expand All @@ -142,7 +144,7 @@ def is_signed(self):
Returns True if sig_url is set and valid, False otherwise.
You can add more validation logic if needed.
"""
return bool(self.sig_url)
return bool(self.signature_file)

@property
def sanitized_vlnv(self):
Expand Down
41 changes: 31 additions & 10 deletions core_directory/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@
from jsonschema import validate, ValidationError, SchemaError
from fusesoc.capi2.coreparser import Core2Parser

from utils.files import filefield_value_for_storage
from utils.sanitize import sanitize_string
from utils.spdx import validate_spdx
from utils.vlnv import VLNV
from .models import Project, Vendor, Library, CorePackage, Fileset, FilesetDependency, Target, TargetConfiguration


class CoreSerializer(serializers.Serializer):
"""
Serializer for validating core and signature files.
Expand All @@ -60,12 +62,8 @@ class CoreSerializer(serializers.Serializer):
"""

# User-uploaded files
core_file = serializers.FileField()
signature_file = serializers.FileField(required=False)

# Optionally, allow user to provide URLs
core_url = serializers.URLField(required=False)
sig_url = serializers.URLField(required=False, allow_null=True)
core_file = serializers.FileField(required=True)
signature_file = serializers.FileField(required=False, allow_null=True)

# Read-only fields extracted from the core file
vlnv_name = serializers.CharField(read_only=True, max_length=255)
Expand All @@ -77,6 +75,14 @@ class CoreSerializer(serializers.Serializer):
description = serializers.CharField(read_only=True, max_length=255, required=False)
spdx_license = serializers.CharField(read_only=True, max_length=64)

class Meta:
model = CorePackage
fields = [
'core_file', 'sig_file',
'vlnv_name', 'sanitized_name', 'vendor_name', 'library_name', 'project_name',
'version', 'description', 'spdx_license'
]

def validate_core_file(self, value):
"""
Validates the core file's extension and size.
Expand Down Expand Up @@ -207,7 +213,7 @@ def validate(self, attrs):

return attrs

def create(self, validated_data):
def create(self, validated_data): # pylint: disable=too-many-locals
with transaction.atomic():
# Get or create Vendor, Library, Project
vendor, _ = Vendor.objects.get_or_create(name=validated_data['vendor_name'])
Expand All @@ -218,15 +224,30 @@ def create(self, validated_data):
name=validated_data['project_name']
)

# Prepare file field values
core_file_obj = validated_data['core_file']
core_file_name = core_file_obj.name
core_file_obj.name = f"{validated_data['sanitized_name']}.core"

sig_file_obj = validated_data.get('signature_file')
sig_filename = sig_file_obj.name if sig_file_obj else None
if sig_file_obj:
sig_file_obj.name = f"{validated_data['sanitized_name']}.core.sig"

# Use the helper to avoid duplicate uploads
core_file_field_value = filefield_value_for_storage(core_file_name, core_file_obj)
sig_file_field_value = filefield_value_for_storage(sig_filename, sig_file_obj) if sig_file_obj else None


# Create an save the model instance
instance = CorePackage.objects.create(
project=project,
vlnv_name=validated_data['vlnv_name'],
version=validated_data['version'],
core_url=validated_data.get('core_url'),
sig_url=validated_data.get('sig_url'),
description=validated_data.get('description'),
spdx_license=validated_data.get('spdx_license')
spdx_license=validated_data.get('spdx_license'),
core_file=core_file_field_value,
signature_file=sig_file_field_value
)

# Create Filesets and their Dependencies
Expand Down
Empty file.
Loading