From 531971cb774ab0f03f50b352b7fe6dd63da944fc Mon Sep 17 00:00:00 2001 From: Andrej Shadura Date: Sun, 4 Apr 2021 22:17:42 +0200 Subject: [PATCH 1/3] Add pristine-lfs support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add pristine-lfs support similar to the existing pristine-tar support. Since pristine-lfs has a Python interface, use that instead of the command-line interface, but use it in a fail-safe way. Unlike pristine-tar, pristine-lfs supports looking up tarballs by a version, so use that instead of picking tarballs manually. If both pristine-lfs and pristine-tar are enabled, import tarballs into both; when checking out, prefer pristine-lfs, but also do pristine-tar. The rationale for the above behaviour: pristine-lfs and pristine-tar are built on different principles. What is stored: * pristine-tar records the hash of the Git tree the tarball corresponds to, the hash of the tarball itself, and a binary delta between its Git export and the actual tarball * pristine-lfs records the hash of the tarball only; the actual tarball is stored and transferred out-of-band using Git LFS mechanisms. How tarballs are recreated: * pristine-tar only needs the Git repository itself: it uses the Git tree referenced in its metadata and the binary delta to recreate the tarball. It needs to explicitly support each compressor and its quirks to be able to efficiently store the tarball and precisely recreate it * pristine-lfs needs the Git repository itself, but it also needs to download binary blobs for each tarball separately. They’re stored inside .git but not as Git objects. When a tarball is checked out, it’s basically downloaded if it’s missing and then copied from the internal storage to the desired location. This means that for off-line operation all tarballs need to be cached beforehand. Conclusions: * for a lot of tarballs especially with tricky compressors, when you need just one, the bandwidth to recreate it will be less for pristine-lfs than pristine-tar since pristine-lfs branch only contains text metadata but no tarball data — with pristine-tar the branch will contain all binary deltas for all tarballs, whereas pristine-lfs only downloads tarballs on-demand. (On the other hand, the tarball data is already on the upstream code branch.) * when you need all tarball ever imported, pristine-tar will probably save you a lot of bandwidth * pristine-lfs doesn’t care about compressors or the tarball format * pristine-tar doesn’t require any server-side support, pristine-lfs needs a Git LFS server. Given the above, it makes sense that during a checkout, the first try should be pristine-lfs (if available) and then pristine-tar as a fallback. During import, --pristine-lfs --pristine-tar on import should do both, not just one. This approach may be revisited in future. Signed-off-by: Andrej Shadura --- gbp/config.py | 8 ++++ gbp/deb/git.py | 38 ++++++++++++++++ gbp/deb/pristinelfs.py | 86 +++++++++++++++++++++++++++++++++++++ gbp/pkg/git.py | 2 + gbp/scripts/buildpackage.py | 1 + gbp/scripts/clone.py | 2 + gbp/scripts/export_orig.py | 26 ++++++++++- gbp/scripts/import_orig.py | 20 ++++++--- 8 files changed, 177 insertions(+), 6 deletions(-) create mode 100644 gbp/deb/pristinelfs.py diff --git a/gbp/config.py b/gbp/config.py index 4f9e91d3..d2ad94aa 100644 --- a/gbp/config.py +++ b/gbp/config.py @@ -174,6 +174,8 @@ class GbpOptionParser(OptionParser): 'pq-from': 'DEBIAN', 'prebuild': '', 'preexport': '', + 'pristine-lfs': 'False', + 'pristine-lfs-commit': 'False', 'pristine-tar': 'False', 'pristine-tar-commit': 'False', 'purge': 'True', @@ -238,6 +240,12 @@ class GbpOptionParser(OptionParser): 'commit-msg': "Format string for commit message used to commit, " "the changelog, default is '%(commit-msg)s'", + 'pristine-lfs': + "Use pristine-lfs to create orig tarball, " + "default is '%(pristine-lfs)s'", + 'pristine-lfs-commit': + "When generating a tarball, commit it to the pristine-lfs branch '%(pristine-lfs-commit)s' " + "default is '%(pristine-lfs-commit)s'", 'pristine-tar': "Use pristine-tar to create orig tarball, " "default is '%(pristine-tar)s'", diff --git a/gbp/deb/git.py b/gbp/deb/git.py index ed97f3e6..3e1a5a36 100644 --- a/gbp/deb/git.py +++ b/gbp/deb/git.py @@ -22,6 +22,7 @@ from gbp.command_wrappers import CommandExecFailed from gbp.git import GitRepositoryError from gbp.deb.pristinetar import DebianPristineTar +from gbp.deb.pristinelfs import PristineLfs from gbp.paths import to_bin from gbp.pkg.git import PkgGitRepository from gbp.pkg.pkgpolicy import PkgPolicy @@ -339,6 +340,43 @@ def create_upstream_tarball_via_pristine_tar(self, source, output_dir, comp, ups e)) return True + @property + def pristine_lfs_branch(self): + """ + The name of the pristine-lfs branch, whether it already exists or + not. + """ + return PristineLfs.branch + + def has_pristine_lfs_branch(self): + """ + Whether the repo has a I{pristine-lfs} branch. + + @return: C{True} if the repo has pristine-lfs commits already, C{False} + otherwise + @rtype: C{Bool} + """ + return True if self.has_branch(self.pristine_lfs_branch) else False + + def create_pristine_lfs_commits(self, sources): + """ + Create pristine-lfs commits for a package with main tarball + and (optional) component tarballs based on upstream_tree + + @param soures: C{list} of tarball as I{UpstreamSource}. First one being the main + tarball the other ones additional tarballs. + """ + all_files = [ + source.path for source in sources + ] + [ + source.signaturefile for source in sources if source.signaturefile + ] + + try: + self.pristine_lfs.commit(all_files) + except CommandExecFailed as e: + raise GitRepositoryError(str(e)) + def create_upstream_tarball_via_git_archive(self, source, output_dir, treeish, comp, with_submodules, component=None): """ diff --git a/gbp/deb/pristinelfs.py b/gbp/deb/pristinelfs.py new file mode 100644 index 00000000..c764fea2 --- /dev/null +++ b/gbp/deb/pristinelfs.py @@ -0,0 +1,86 @@ +# vim: set fileencoding=utf-8 : +# +# (C) 2021 Andrej Shadura +# (C) 2021 Collabora Ltd +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, please see +# +"""Check in and check out archives from the pristine-lfs branch""" + +from __future__ import annotations + +import logging + +from gbp.command_wrappers import CommandExecFailed +from gbp.git import GitRepository, GitRepositoryError + +try: + from pristine_lfs.main import do_list, do_verify, do_commit_files, do_checkout + from pristine_lfs.errors import CommandFailed, DifferentFilesExist, GitError +except ImportError: + def pristine_lfs_not_found(*args, **kwargs): + raise CommandExecFailed("pristine-lfs not installed") + + do_list = do_verify = do_checkout = do_commit_files = pristine_lfs_not_found + + class DifferentFilesExist(Exception): + pass + + GitError = DifferentFilesExist + +logger = logging.getLogger('pristine-lfs') + + +class PristineLfs: + branch = 'pristine-lfs' + + def __init__(self, repo: GitRepository): + self.repo = repo + + def commit(self, files: list[str], quiet: bool = False): + """ + Commit files I{files} to the pristine-lfs branch + + @param files: list of files to commit + @type files: C{list} + """ + logger.setLevel(logging.WARNING if quiet else logging.INFO) + + try: + ios = [open(f, 'rb') for f in files] + do_commit_files(tarballs=ios, branch=self.branch) + except (OSError, CommandFailed) as e: + raise CommandExecFailed(str(e)) + except (DifferentFilesExist, GitError) as e: + raise GitRepositoryError(str(e)) + + def checkout(self, package: str, version: str, output_dir: str, quiet: bool = False): + """ + Check out all orig tarballs for package I{package} of I{version} to + I{output_dir} + + @param package: the package to check out the orig tarballs for + @type package: C{str} + @param version: the version to check out the orig tarballs for + @type version: C{str} + @param output_dir: the directory to put the tarballs into + @type output_dir: C{str} + """ + logger.setLevel(logging.WARNING if quiet else logging.INFO) + + try: + do_checkout(package=package, version=version, branch=self.branch, outdir=output_dir) + except (OSError, CommandFailed) as e: + raise CommandExecFailed(str(e)) + except GitError as e: + raise GitRepositoryError(str(e)) diff --git a/gbp/pkg/git.py b/gbp/pkg/git.py index 7dc32e68..08d7ad03 100644 --- a/gbp/pkg/git.py +++ b/gbp/pkg/git.py @@ -24,6 +24,7 @@ from gbp.command_wrappers import (CatenateTarArchive, CatenateZipArchive) from gbp.git import GitRepository, GitRepositoryError from gbp.deb.pristinetar import DebianPristineTar +from gbp.deb.pristinelfs import PristineLfs import gbp.log @@ -35,6 +36,7 @@ class PkgGitRepository(GitRepository): def __init__(self, *args, **kwargs): super(PkgGitRepository, self).__init__(*args, **kwargs) self.pristine_tar = DebianPristineTar(self) + self.pristine_lfs = PristineLfs(self) @staticmethod def sanitize_prefix(prefix): diff --git a/gbp/scripts/buildpackage.py b/gbp/scripts/buildpackage.py index f8677181..02aa121a 100755 --- a/gbp/scripts/buildpackage.py +++ b/gbp/scripts/buildpackage.py @@ -385,6 +385,7 @@ def build_parser(name, prefix=None): tag_group.add_config_file_option(option_name="debian-tag-msg", dest="debian_tag_msg") tag_group.add_config_file_option(option_name="upstream-tag", dest="upstream_tag") orig_group.add_config_file_option(option_name="upstream-tree", dest="upstream_tree") + orig_group.add_boolean_config_file_option(option_name="pristine-lfs", dest="pristine_lfs") orig_group.add_boolean_config_file_option(option_name="pristine-tar", dest="pristine_tar") orig_group.add_boolean_config_file_option(option_name="pristine-tar-commit", dest="pristine_tar_commit") diff --git a/gbp/scripts/clone.py b/gbp/scripts/clone.py index 7e02f0e2..8c1938d5 100755 --- a/gbp/scripts/clone.py +++ b/gbp/scripts/clone.py @@ -224,6 +224,8 @@ def main(argv): branches = [options.debian_branch, options.upstream_branch] if options.pristine_tar: branches += [repo.pristine_tar_branch] + if options.pristine_lfs: + branches += [repo.pristine_lfs_branch] gbp.log.debug('Will track branches: %s' % branches) for branch in branches: remote = 'origin/%s' % branch diff --git a/gbp/scripts/export_orig.py b/gbp/scripts/export_orig.py index 4475da3e..80e514c7 100755 --- a/gbp/scripts/export_orig.py +++ b/gbp/scripts/export_orig.py @@ -36,6 +36,7 @@ def prepare_upstream_tarballs(repo, source, options, tarball_dir, output_dir): """ Make sure we have the needed upstream tarballs. The default order is: - look in tarball_dir and if found symlink to it + - create tarball using pristine-lfs - create tarball using pristine-tar - create tarball using git-archive @@ -68,7 +69,8 @@ def prepare_upstream_tarballs(repo, source, options, tarball_dir, output_dir): # Create tarball if missing or forced if not du.DebianPkgPolicy.has_origs(orig_files, output_dir) or options.force_create: - if not pristine_tar_build_origs(repo, source, output_dir, options): + if (not pristine_lfs_checkout_origs(repo, source, output_dir, options) and + not pristine_tar_build_origs(repo, source, output_dir, options)): git_archive_build_origs(repo, source, output_dir, options) maybe_pristine_tar_commit(repo, source, options, output_dir, orig_files) pristine_tar_verify_origs(repo, source, options, output_dir, orig_files) @@ -92,6 +94,27 @@ def pristine_tar_prepare_orig_tree(repo, source, options): "orig tarball via pristine-tar" % tree_name) +def pristine_lfs_checkout_origs(repo, source, output_dir, options): + """ + Check out orig tarballs using pristine-lfs + + @returns: C{True} if tarball was built, C{False} otherwise + """ + gbp.log.info(options) + if not options.pristine_lfs: + return False + + if not repo.has_branch(repo.pristine_lfs_branch): + gbp.log.warn('Pristine-lfs branch "%s" not found' % + repo.pristine_lfs_branch) + + try: + repo.pristine_lfs.checkout(source.name, source.version, output_dir, quiet=not options.verbose) + return True + except CommandExecFailed: + return False + + def pristine_tar_build_origs(repo, source, output_dir, options): """ Build orig tarball using pristine-tar @@ -297,6 +320,7 @@ def build_parser(name): tag_group.add_config_file_option(option_name="upstream-tag", dest="upstream_tag") orig_group.add_config_file_option(option_name="upstream-tree", dest="upstream_tree") orig_group.add_boolean_config_file_option(option_name="pristine-tar", dest="pristine_tar") + orig_group.add_boolean_config_file_option(option_name="pristine-lfs", dest="pristine_lfs") orig_group.add_config_file_option(option_name="force-create", dest="force_create", help="force creation of orig tarball", action="store_true") orig_group.add_config_file_option(option_name="tarball-dir", dest="tarball_dir", type="path", diff --git a/gbp/scripts/import_orig.py b/gbp/scripts/import_orig.py index db7f1181..1f0b3e98 100644 --- a/gbp/scripts/import_orig.py +++ b/gbp/scripts/import_orig.py @@ -321,10 +321,12 @@ def unpack_tarballs(repo, name, sources, version, options): def set_bare_repo_options(options): """Modify options for import into a bare repository""" if options.pristine_tar or options.merge: - gbp.log.info("Bare repository: setting %s%s options" + gbp.log.info("Bare repository: setting %s%s%s options" % (["", " '--no-pristine-tar'"][options.pristine_tar], + ["", " '--no-pristine-lfs'"][options.pristine_lfs], ["", " '--no-merge'"][options.merge])) options.pristine_tar = False + options.pristine_lfs = False options.merge = False @@ -378,6 +380,8 @@ def build_parser(name): dest="upstream_tag") import_group.add_config_file_option(option_name="filter", dest="filters", action="append") + import_group.add_boolean_config_file_option(option_name="pristine-lfs", + dest="pristine_lfs") import_group.add_boolean_config_file_option(option_name="pristine-tar", dest="pristine_tar") import_group.add_boolean_config_file_option(option_name="filter-pristine-tar", @@ -514,9 +518,12 @@ def main(argv): create_missing_branch=is_empty, ) - if options.pristine_tar: + if options.pristine_tar or options.pristine_lfs: if pristine_orig: - repo.rrr_branch(repo.pristine_tar_branch) + if options.pristine_lfs: + repo.rrr_branch(repo.pristine_lfs_branch) + if options.pristine_tar: + repo.rrr_branch(repo.pristine_tar_branch) for source in sources: # Enforce signature file exists with --upstream-signatures=on if options.upstream_signatures.is_on() and not source.signaturefile: @@ -526,9 +533,12 @@ def main(argv): # For all practical purposes we're interested in pristine_orig's path if pristine_orig != sources[0].path: sources[0]._path = pristine_orig - repo.create_pristine_tar_commits(import_branch, sources) + if options.pristine_lfs: + repo.create_pristine_lfs_commits(sources) + if options.pristine_tar: + repo.create_pristine_tar_commits(import_branch, sources) else: - gbp.log.warn("'%s' not an archive, skipping pristine-tar" % sources[0].path) + gbp.log.warn("'%s' not an archive, skipping pristine-tar/pristine-lfs" % sources[0].path) repo.create_tag(name=tag, msg="Upstream version %s" % version, From ce66550669c3b7ed0ecdd7e7359b7bec8727104e Mon Sep 17 00:00:00 2001 From: Andrej Shadura Date: Thu, 15 Apr 2021 12:20:41 +0200 Subject: [PATCH 2/3] import-dsc: Add pristine-lfs support Signed-off-by: Andrej Shadura --- gbp/scripts/clone.py | 1 + gbp/scripts/import_dsc.py | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/gbp/scripts/clone.py b/gbp/scripts/clone.py index 8c1938d5..2910bc43 100755 --- a/gbp/scripts/clone.py +++ b/gbp/scripts/clone.py @@ -141,6 +141,7 @@ def build_parser(name): branch_group.add_config_file_option(option_name="upstream-branch", dest="upstream_branch") branch_group.add_config_file_option(option_name="debian-branch", dest="debian_branch") branch_group.add_boolean_config_file_option(option_name="pristine-tar", dest="pristine_tar") + branch_group.add_boolean_config_file_option(option_name="pristine-lfs", dest="pristine_lfs") branch_group.add_option("--depth", action="store", dest="depth", default=0, help="git history depth (for creating shallow clones)") branch_group.add_option("--reference", action="store", dest="reference", default=None, diff --git a/gbp/scripts/import_dsc.py b/gbp/scripts/import_dsc.py index a74477a4..036fd0a2 100644 --- a/gbp/scripts/import_dsc.py +++ b/gbp/scripts/import_dsc.py @@ -307,12 +307,19 @@ def move_tag_stamp(repo, format, version): def disable_pristine_tar(options, reason): - """Disable pristine tar if enabled""" + """Disable pristine-tar if enabled""" if options.pristine_tar: gbp.log.info("%s: setting '--no-pristine-tar' option" % reason) options.pristine_tar = False +def disable_pristine_lfs(options, reason): + """Disable pristine-lfs if enabled""" + if options.pristine_lfs: + gbp.log.info("%s: setting '--no-pristine-lfs' option" % reason) + options.pristine_lfs = False + + def build_parser(name): try: parser = GbpOptionParserDebian(command=os.path.basename(name), prefix='', @@ -359,6 +366,8 @@ def build_parser(name): dest="filters", action="append") import_group.add_boolean_config_file_option(option_name="pristine-tar", dest="pristine_tar") + import_group.add_boolean_config_file_option(option_name="pristine-lfs", + dest="pristine_lfs") import_group.add_option("--allow-same-version", action="store_true", dest="allow_same_version", default=False, help="allow import of already imported version") @@ -487,6 +496,7 @@ def main(argv): if repo.bare: disable_pristine_tar(options, "Bare repository") + disable_pristine_lfs(options, "Bare repository") # unpack dirs['tmp'] = os.path.abspath(tempfile.mkdtemp(dir='..')) @@ -532,8 +542,11 @@ def main(argv): else: gbp.log.warn("Didn't find a diff to apply.") - if imported and options.pristine_tar: - repo.create_pristine_tar_commits(commit, sources) + if imported: + if options.pristine_lfs: + repo.create_pristine_lfs_commits(sources) + if options.pristine_tar: + repo.create_pristine_tar_commits(commit, sources) if repo.get_branch() == options.debian_branch or repo.empty: # Update HEAD if we modified the checked out branch repo.force_head(options.debian_branch, hard=True) From 309165b76e0b9860155abfb40520455a3ec6c25e Mon Sep 17 00:00:00 2001 From: Andrej Shadura Date: Wed, 14 Dec 2022 19:32:22 +0100 Subject: [PATCH 3/3] Document pristine-lfs support Signed-off-by: Andrej Shadura --- docs/common.ent | 1 + docs/manpages/gbp-buildpackage.xml | 37 ++++++++++++++++++++++++++++-- docs/manpages/gbp-export-orig.xml | 28 ++++++++++++++++++++-- docs/manpages/gbp-import-dsc.xml | 10 ++++++++ docs/manpages/gbp-import-orig.xml | 10 ++++++++ docs/manpages/man.seealso.xml | 4 ++++ 6 files changed, 86 insertions(+), 4 deletions(-) diff --git a/docs/common.ent b/docs/common.ent index 794d47ce..1e83276f 100644 --- a/docs/common.ent +++ b/docs/common.ent @@ -71,6 +71,7 @@ Pbuilder"> pk4"> pristine-tar"> + pristine-lfs"> svn-buildpackage"> debian/changelog"> diff --git a/docs/manpages/gbp-buildpackage.xml b/docs/manpages/gbp-buildpackage.xml index 972aa318..85f9d4d5 100644 --- a/docs/manpages/gbp-buildpackage.xml +++ b/docs/manpages/gbp-buildpackage.xml @@ -60,6 +60,8 @@ TREEISH + + @@ -105,7 +107,7 @@ - Build an orig tarball if it doesn't exist. Optionally using &pristine-tar;. + Build an orig tarball if it doesn't exist. Optionally using &pristine-lfs; or &pristine-tar;. @@ -150,7 +152,8 @@ Upstream tarball creation options When &gbp-buildpackage; doesn't find a suitable upstream - tarball it will create one either using &pristine-tar; + tarball, it will first try to check it out from &pristine-lfs;. If &pristine-lfs; + doesn't produce a tarball, &gbp-buildpackage; will create one either using &pristine-tar; or git archive. These options determine how the tarball is created: @@ -164,6 +167,26 @@ the , options have no effect when creating tarballs. + + If both and + are specified, takes precedence. + + + + + + + + + Use pristine-lfs when generating the upstream tarball if + it doesn't exist. If this mode is enabled + the , + options have no effect when creating tarballs. + + + If both and + are specified, takes precedence. + @@ -256,6 +279,16 @@ tarball was generated and the pristine-tar data isn't already there. + + + + + + + Commit the tarball to the pristine-lfs branch if a new + tarball was generated and is hasn't been already imported there. + + diff --git a/docs/manpages/gbp-export-orig.xml b/docs/manpages/gbp-export-orig.xml index 36b38b8f..ffcd8656 100644 --- a/docs/manpages/gbp-export-orig.xml +++ b/docs/manpages/gbp-export-orig.xml @@ -32,6 +32,8 @@ component + + [auto|on|off] @@ -137,7 +139,7 @@ This doesn't have any effect if - is being used. + or is being used. @@ -203,7 +205,8 @@ Use pristine-tar when generating the upstream tarball if it doesn't - exist. + exist. If used with , pristine-tar + is only used when pristine-lfs didn’t produce a tarball. @@ -217,6 +220,27 @@ + + + + + + Use pristine-lfs when generating the upstream tarball if it doesn't + exist. If used with , pristine-tar + is only used when pristine-lfs didn’t produce a tarball. + + + + + + + + + Commit the tarball to the pristine-lfs branch if a new + tarball was generated and is hasn't been already imported there. + + + [auto|on|off] diff --git a/docs/manpages/gbp-import-dsc.xml b/docs/manpages/gbp-import-dsc.xml index 73c16764..7d3c4176 100644 --- a/docs/manpages/gbp-import-dsc.xml +++ b/docs/manpages/gbp-import-dsc.xml @@ -29,6 +29,7 @@ gpg-keyid + branch_name @@ -192,6 +193,15 @@ + + + + + + Import tarballs into pristine-lfs. + + + diff --git a/docs/manpages/gbp-import-orig.xml b/docs/manpages/gbp-import-orig.xml index dc2ce199..b39a1af6 100644 --- a/docs/manpages/gbp-import-orig.xml +++ b/docs/manpages/gbp-import-orig.xml @@ -34,6 +34,7 @@ component + @@ -289,6 +290,15 @@ + + + + + + Import tarballs into pristine-lfs. + + + diff --git a/docs/manpages/man.seealso.xml b/docs/manpages/man.seealso.xml index b7af1337..80152951 100644 --- a/docs/manpages/man.seealso.xml +++ b/docs/manpages/man.seealso.xml @@ -10,5 +10,9 @@ pristine-tar 1 , + + pristine-lfs + 1 + , The Git-Buildpackage Manual