Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/pulp_file/4708.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added ability to sync a git repository with the new FileGitRemote.
28 changes: 28 additions & 0 deletions pulp_file/app/migrations/0019_add_filegitremote.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Generated by Django 5.2.10 on 2026-02-05 19:22

import django.db.models.deletion
import pulpcore.app.models.access_policy
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('core', '0145_domainize_import_export'),
('file', '0018_alter_filecontent_options'),
]

operations = [
migrations.CreateModel(
name='FileGitRemote',
fields=[
('remote_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='core.remote')),
('git_ref', models.TextField(default='HEAD')),
],
options={
'permissions': [('manage_roles_filegitremote', 'Can manage roles on file git remotes')],
'default_related_name': '%(app_label)s_%(model_name)s',
},
bases=('core.remote', pulpcore.app.models.access_policy.AutoAddObjPermsMixin),
),
]
21 changes: 20 additions & 1 deletion pulp_file/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,33 @@ class Meta:
]


class FileGitRemote(Remote, AutoAddObjPermsMixin):
"""
Remote for syncing files from a Git repository (without PULP_MANIFEST).

The URL should point to a Git repository. The ``git_ref`` field can be used to specify a
branch, tag, or commit to sync from (defaults to ``HEAD``).
"""

TYPE = "git"

git_ref = models.TextField(default="HEAD")

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
permissions = [
("manage_roles_filegitremote", "Can manage roles on file git remotes"),
]


class FileRepository(Repository, AutoAddObjPermsMixin):
"""
The "file" repository type.
"""

TYPE = "file"
CONTENT_TYPES = [FileContent]
REMOTE_TYPES = [FileRemote]
REMOTE_TYPES = [FileRemote, FileGitRemote]

manifest = models.TextField(default="PULP_MANIFEST", null=True)
autopublish = models.BooleanField(default=False)
Expand Down
19 changes: 19 additions & 0 deletions pulp_file/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
FileAlternateContentSource,
FileContent,
FileDistribution,
FileGitRemote,
FileRemote,
FileRepository,
FilePublication,
Expand Down Expand Up @@ -160,6 +161,24 @@ class Meta:
model = FileRemote


class FileGitRemoteSerializer(RemoteSerializer):
"""
Serializer for File Git Remotes.
"""

git_ref = serializers.CharField(
help_text=_("The git ref (branch, tag, or commit hash) to sync from. Defaults to HEAD."),
default="HEAD",
required=False,
)

policy = serializers.HiddenField(default=models.Remote.IMMEDIATE)

class Meta:
fields = RemoteSerializer.Meta.fields + ("git_ref",)
model = FileGitRemote


class FilePublicationSerializer(PublicationSerializer):
"""
Serializer for File Publications.
Expand Down
199 changes: 195 additions & 4 deletions pulp_file/app/tasks/synchronizing.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,33 @@
import logging
import os
import shutil
import tempfile

from gettext import gettext as _
from urllib.parse import quote, urlparse, urlunparse

import aiohttp.client_exceptions
import git as gitpython
from django.core.files import File

from pulpcore.plugin.exceptions import SyncError
from pulpcore.plugin.models import Artifact, ProgressReport, Remote, PublishedMetadata
from pulpcore.plugin.serializers import RepositoryVersionSerializer
from pulpcore.plugin.stages import (
ArtifactDownloader,
DeclarativeArtifact,
DeclarativeContent,
DeclarativeVersion,
RemoteArtifactSaver,
Stage,
)

from pulp_file.app.models import FileContent, FileRemote, FileRepository, FilePublication
from pulp_file.app.models import (
FileContent,
FileGitRemote,
FileRepository,
FilePublication,
)
from pulp_file.manifest import Manifest


Expand All @@ -43,14 +53,24 @@ def synchronize(remote_pk, repository_pk, mirror, url=None):
SyncError: If the remote does not specify a URL to sync.

"""
remote = FileRemote.objects.get(pk=remote_pk)
remote = Remote.objects.get(pk=remote_pk).cast()
repository = FileRepository.objects.get(pk=repository_pk)

if not remote.url:
raise SyncError(_("A remote must have a url specified to synchronize."))

first_stage = FileFirstStage(remote, url)
dv = DeclarativeVersion(first_stage, repository, mirror=mirror, acs=True)
if isinstance(remote, FileGitRemote):
first_stage = GitFirstStage(remote)
dv = DeclarativeVersion(first_stage, repository, mirror=mirror)
old_pipeline_stages = dv.pipeline_stages
dv.pipeline_stages = lambda new_version: [
stage
for stage in old_pipeline_stages(new_version)
if not isinstance(stage, (ArtifactDownloader, RemoteArtifactSaver))
]
else:
first_stage = FileFirstStage(remote, url)
dv = DeclarativeVersion(first_stage, repository, mirror=mirror, acs=True)
rv = dv.create()
if rv and mirror:
# TODO: this is awful, we really should rewrite the DeclarativeVersion API to
Expand Down Expand Up @@ -146,3 +166,174 @@ def _get_safe_path(root_dir, entry, scheme):
relative_path = entry.relative_path.lstrip("/")
path = os.path.join(root_dir, relative_path)
return path if scheme == "file" else quote(path, safe=":/")


def _build_clone_env(remote):
"""
Build environment variables for git clone that apply the remote's auth and proxy settings.

Args:
remote (FileGitRemote): The remote with auth/proxy/TLS configuration.

Returns:
dict: Environment variables to pass to git commands.
"""
env = os.environ.copy()

# Proxy configuration
if remote.proxy_url:
proxy_url = remote.proxy_url
if remote.proxy_username and remote.proxy_password:
parsed = urlparse(proxy_url)
proxy_url = urlunparse(
parsed._replace(
netloc=f"{remote.proxy_username}:{remote.proxy_password}@{parsed.hostname}"
+ (f":{parsed.port}" if parsed.port else "")
)
)
env["http_proxy"] = proxy_url
env["https_proxy"] = proxy_url

# TLS validation
if not remote.tls_validation:
env["GIT_SSL_NO_VERIFY"] = "true"

# CA certificate
if remote.ca_cert:
ca_cert_file = tempfile.NamedTemporaryFile(dir=".", suffix=".pem", delete=False, mode="w")
ca_cert_file.write(remote.ca_cert)
ca_cert_file.close()
env["GIT_SSL_CAINFO"] = ca_cert_file.name

# Client certificate and key
if remote.client_cert:
client_cert_file = tempfile.NamedTemporaryFile(
dir=".", suffix=".pem", delete=False, mode="w"
)
client_cert_file.write(remote.client_cert)
client_cert_file.close()
env["GIT_SSL_CERT"] = client_cert_file.name

if remote.client_key:
client_key_file = tempfile.NamedTemporaryFile(
dir=".", suffix=".key", delete=False, mode="w"
)
client_key_file.write(remote.client_key)
client_key_file.close()
env["GIT_SSL_KEY"] = client_key_file.name

return env


def _build_clone_url(remote):
"""
Build the clone URL, embedding basic auth credentials if present on the remote.

Args:
remote (FileGitRemote): The remote with URL and optional credentials.

Returns:
str: The URL to use for git clone.
"""
url = remote.url
if remote.username and remote.password:
parsed = urlparse(url)
if parsed.scheme in ("http", "https"):
url = urlunparse(
parsed._replace(
netloc=f"{remote.username}:{remote.password}@{parsed.hostname}"
+ (f":{parsed.port}" if parsed.port else "")
)
)
return url


class GitFirstStage(Stage):
"""
The first stage of a pulp_file sync pipeline for Git repositories.

Performs a bare clone of the Git repository, resolves the specified git_ref, and
walks the tree to emit ``DeclarativeContent`` for each blob. Computes sha256 for
each blob so that ``QueryExistingArtifacts`` can match already-known artifacts and
``FileContent.digest`` is available for content matching.
"""

def __init__(self, remote):
"""
Args:
remote (FileGitRemote): The git remote data to be used when syncing.
"""
super().__init__()
self.remote = remote

async def run(self):
"""
Build and emit `DeclarativeContent` from the Git repository tree.
"""

remote = self.remote
git_ref = remote.git_ref or "HEAD"
clone_url = _build_clone_url(remote)
clone_env = _build_clone_env(remote)

clone_dir = tempfile.mkdtemp(dir=".", prefix="pulp_file_git_")

async with ProgressReport(message="Cloning Git Repository", code="sync.git.cloning") as pb:
try:
try:
repo = gitpython.Repo.clone_from(
clone_url,
clone_dir,
bare=True,
depth=1,
branch=git_ref,
env=clone_env,
)
except gitpython.exc.GitCommandError:
# depth/branch fails for commit hashes; retry with full bare clone
repo = gitpython.Repo.clone_from(clone_url, clone_dir, bare=True, env=clone_env)
except gitpython.exc.GitCommandError as e:
raise SyncError(
_("Failed to clone git repository '{url}': {error}").format(
url=remote.url, error=str(e)
)
)
await pb.aincrement()

async with ProgressReport(message="Resolving Git ref", code="sync.git.resolving_ref") as pb:
try:
commit = repo.commit(git_ref)
except Exception as e:
raise SyncError(
_("Could not resolve git ref '{ref}': {error}").format(
ref=git_ref, error=str(e)
)
)
await pb.aincrement()

async with ProgressReport(
message="Parsing Git tree",
code="sync.git.parsing_tree",
) as pb:
blobs = [item for item in commit.tree.traverse() if item.type == "blob"]
pb.total = len(blobs)
await pb.asave()

for blob in blobs:
relative_path = blob.path
size = blob.size
with tempfile.NamedTemporaryFile(dir=".", delete=False, mode="wb") as file:
shutil.copyfileobj(blob.data_stream, file)

artifact = Artifact.init_and_validate(file.name, expected_size=size)
file_content = FileContent(relative_path=relative_path, digest=artifact.sha256)
da = DeclarativeArtifact(
artifact=artifact,
url=remote.url,
relative_path=relative_path,
remote=remote,
deferred_download=False,
)
dc = DeclarativeContent(content=file_content, d_artifacts=[da])
await pb.aincrement()
await self.put(dc)
Loading