diff --git a/tools/ci-scripts/linux/image-cleanup/collect_images.py b/tools/ci-scripts/linux/image-cleanup/collect_images.py index d407e15143..66e8f8cd4f 100755 --- a/tools/ci-scripts/linux/image-cleanup/collect_images.py +++ b/tools/ci-scripts/linux/image-cleanup/collect_images.py @@ -20,75 +20,117 @@ commits included by the provided root_filters, or b) if they are younger than the provided min-age.""" + def semi_list(semi_str: str) -> Iterable[str]: """Splits a semi-colon delimited string into a list of string.""" - return semi_str.split(';') + return semi_str.split(";") + def main() -> None: """Program main entry point.""" parser = argparse.ArgumentParser(description=PROGRAM_DESCRIPTION) - parser.add_argument('--repo-path', '-p', - required=True, - help='path to the repository to inspect') - parser.add_argument('root_filters', - nargs='+', - type=semi_list, - help="""semi-colon seperated list of arguments for one invocation of `git + parser.add_argument( + "--repo-path", "-p", required=True, help="path to the repository to inspect" + ) + parser.add_argument( + "root_filters", + nargs="+", + type=semi_list, + help="""semi-colon seperated list of arguments for one invocation of `git rev-parse`. The commits in the union of these rev-parse invocations are used to find container images that are referenced by source code. Ex: - '--remotes=origin;--since=2~weeks~ago' or '--tags;-n;1'""") - parser.add_argument('--min-age', '-m', - required=True, - type=int, - help="""the minimum length of time, in days, before images are eligible for - deletion, even if they are not referenced by the source code.""") - parser.add_argument('--dry-run', '-n', - action='store_true', - help="""report which images would be deleted, but do not actually delete - them""") - parser.add_argument('--verbosity', '-v', - default='WARNING', - choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL', 'NOTSET'], - help='log level to emit to stderr. Defaults to WARNING.') + '--remotes=origin;--since=2~weeks~ago' or '--tags;-n;1'""", + ) + parser.add_argument( + "--min-age", + "-m", + required=True, + type=int, + help="""the minimum length of time, in days, before images are eligible for + deletion, even if they are not referenced by the source code.""", + ) + parser.add_argument( + "--dry-run", + "-n", + action="store_true", + help="""report which images would be deleted, but do not actually delete + them""", + ) + parser.add_argument( + "--verbosity", + "-v", + default="WARNING", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", "NOTSET"], + help="log level to emit to stderr. Defaults to WARNING.", + ) + parser.add_argument( + "--docker_registry_name", + "-r", + default="bondciimages", + help="Defaults to bondciimages.", + ) + parser.add_argument( + "--docker_repository_names", + "-d", + nargs="+", + type=semi_list, + default=["ubuntu-1604", "ubuntu-1804", "ubuntu-2004"], + help="""semi-colon seperated list of docker repo names. + '-d=ubuntu-1604;ubuntu-1804'""", + ) + args = parser.parse_args() assert args.root_filters + assert args.docker_registry_name + assert args.docker_repository_names numeric_level = getattr(logging, args.verbosity.upper(), None) if not isinstance(numeric_level, int): - raise ValueError('Invalid --verbosity level: {}'.format(args.verbosity)) + raise ValueError("Invalid --verbosity level: {}".format(args.verbosity)) logging.basicConfig(level=numeric_level) if args.min_age < 0: - raise ValueError('--min-age must be non-negative, but got {}'.format(args.min_age)) + raise ValueError( + "--min-age must be non-negative, but got {}".format(args.min_age) + ) min_age_before_gc = timedelta(days=args.min_age) try: - active_tags = live_tags(args.repo_path, args.root_filters) + active_tags = live_tags( + args.repo_path, + args.root_filters, + args.docker_registry_name, + args.docker_repository_names, + ) if not active_tags: - raise ValueError('No active tags. This can delete all images, so aborting.') - logging.info('Active tags: {%s}', ','.join(active_tags)) + raise ValueError("No active tags. This can delete all images, so aborting.") + logging.info("Active tags: {%s}", ",".join(active_tags)) manifests = get_image_manifests() - for manifest in find_garbage_manifests(min_age_before_gc, active_tags, manifests): + for named_manifest in find_garbage_manifests( + min_age_before_gc, active_tags, manifests + ): if args.dry_run: - print('{}: would delete'.format(manifest.digest)) + print("{}: would delete".format(named_manifest.manifest.digest)) else: - delete_image_by_manifest(manifest) - print('{}: deleted'.format(manifest.digest)) + delete_image_by_manifest(named_manifest) + print("{}: deleted".format(named_manifest.manifest.digest)) except subprocess.CalledProcessError as cpe: - print('Subprocess {} failed with exit code {}'.format( - cpe.cmd, - cpe.returncode, - file=sys.stderr)) - print('STDOUT:\n', str(cpe.stdout, encoding='utf-8'), file=sys.stderr) - print('STDERR:\n', str(cpe.stderr, encoding='utf-8'), file=sys.stderr) + print( + "Subprocess {} failed with exit code {}".format( + cpe.cmd, cpe.returncode, file=sys.stderr + ) + ) + print("STDOUT:\n", str(cpe.stdout, encoding="utf-8"), file=sys.stderr) + print("STDERR:\n", str(cpe.stderr, encoding="utf-8"), file=sys.stderr) raise -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/tools/ci-scripts/linux/image-cleanup/collector/acr.py b/tools/ci-scripts/linux/image-cleanup/collector/acr.py index a130e1777a..4f6774db98 100644 --- a/tools/ci-scripts/linux/image-cleanup/collector/acr.py +++ b/tools/ci-scripts/linux/image-cleanup/collector/acr.py @@ -6,9 +6,7 @@ import subprocess from datetime import datetime, timezone -from typing import Mapping, Iterable - -from .config import REGISTRY_NAME, REPOSITORY_NAME +from typing import Mapping, Iterable, Generator, NamedTuple _LOGGER = logging.getLogger(__name__) @@ -70,32 +68,40 @@ def __init__(self, **kwargs: str) -> None: except: raise ManifestParseError(kwargs) -def get_image_manifests() -> Iterable[ImageManifest]: +class NamedImageManifest(NamedTuple): + """Represents an ACR image manifest with a human readeable name.""" + manifest: ImageManifest + registry_name: str + repository_name: str + +def get_image_manifests(registry_name: str, repository_names: Iterable[str]) -> Generator[NamedImageManifest, None, None]: """Get the current ACR image manifests. Invokes the ``az`` CLI tool to discover the current images. """ - az_show_manifests_cmd_line = ['az', 'acr', 'repository', 'show-manifests', - '--name', REGISTRY_NAME, - '--repository', REPOSITORY_NAME, - '--output', 'json'] - _LOGGER.debug('Invoking %s', az_show_manifests_cmd_line) - output = subprocess.check_output(az_show_manifests_cmd_line, - stderr=subprocess.PIPE) - manifests = json.loads(str(output, encoding='utf-8')) - - if not isinstance(manifests, list): - msg = 'Expected an array of manifests ("[{{...}},{{...}}]" but got {}'.format( - type(manifests).__name__) - raise ValueError(msg) - - return [ImageManifest(**o) for o in manifests] - -def delete_image_by_manifest(manifest: ImageManifest) -> None: + for repository_name in repository_names: + az_show_manifests_cmd_line = ['az', 'acr', 'repository', 'show-manifests', + '--name', registry_name, + '--repository', repository_name, + '--output', 'json'] + _LOGGER.debug('Invoking %s', az_show_manifests_cmd_line) + output = subprocess.check_output(az_show_manifests_cmd_line, + stderr=subprocess.PIPE) + manifests = json.loads(str(output, encoding='utf-8')) + + if not isinstance(manifests, list): + msg = 'Expected an array of manifests ("[{{...}},{{...}}]" but got {}'.format( + type(manifests).__name__) + raise ValueError(msg) + + for o in manifests: + yield NamedImageManifest(ImageManifest(**o), registry_name, repository_name) + +def delete_image_by_manifest(named_manifest: NamedImageManifest) -> None: """Delete an ACR image (and all its tags).""" - image_name = '{}@{}'.format(REPOSITORY_NAME, manifest.digest) + image_name = '{}@{}'.format(named_manifest.repository_name, named_manifest.manifest.digest) az_delete_cmd_line = ['az', 'acr', 'repository', 'delete', - '--name', REGISTRY_NAME, + '--name', named_manifest.registry_name, '--image', image_name, '--yes'] _LOGGER.debug('Invoking %s', az_delete_cmd_line) diff --git a/tools/ci-scripts/linux/image-cleanup/collector/config.py b/tools/ci-scripts/linux/image-cleanup/collector/config.py deleted file mode 100644 index bc08de6878..0000000000 --- a/tools/ci-scripts/linux/image-cleanup/collector/config.py +++ /dev/null @@ -1,4 +0,0 @@ -"""Constants for the garbage image collector.""" - -REGISTRY_NAME = 'bondciimages' -REPOSITORY_NAME = 'ubuntu-1604' diff --git a/tools/ci-scripts/linux/image-cleanup/collector/garbage_manifests.py b/tools/ci-scripts/linux/image-cleanup/collector/garbage_manifests.py index 6d456aa8a5..52758b395e 100644 --- a/tools/ci-scripts/linux/image-cleanup/collector/garbage_manifests.py +++ b/tools/ci-scripts/linux/image-cleanup/collector/garbage_manifests.py @@ -5,7 +5,7 @@ from datetime import datetime, timedelta, timezone from typing import AbstractSet, Iterable -from .acr import ImageManifest +from .acr import NamedImageManifest from .live_images import ImageTag _LOGGER = logging.getLogger(__name__) @@ -13,7 +13,7 @@ def find_garbage_manifests( min_age: timedelta, active_tags: AbstractSet[ImageTag], - image_manifests: Iterable[ImageManifest]) -> Iterable[ImageManifest]: + image_manifests: Iterable[NamedImageManifest]) -> Iterable[NamedImageManifest]: """Yield a sequence of the manifests that are considered garbage. Image manifests are considered garbage if they are both: @@ -25,7 +25,8 @@ def find_garbage_manifests( keep_newer_than_time = datetime.now(timezone.utc) - min_age _LOGGER.debug('Keeping images newer than %s', keep_newer_than_time) - for manifest in image_manifests: + for named_manifest in image_manifests: + manifest = named_manifest.manifest # It would probably be faster to check timestamps before tags, but # the tags are more important of a reason to keep an image, so we # check tags first so that the tag reference gets logged instead of @@ -40,4 +41,4 @@ def find_garbage_manifests( continue _LOGGER.info('%s: garbage', manifest.digest) - yield manifest + yield named_manifest diff --git a/tools/ci-scripts/linux/image-cleanup/collector/live_images.py b/tools/ci-scripts/linux/image-cleanup/collector/live_images.py index 5f124cba46..b62aa54b96 100644 --- a/tools/ci-scripts/linux/image-cleanup/collector/live_images.py +++ b/tools/ci-scripts/linux/image-cleanup/collector/live_images.py @@ -6,8 +6,6 @@ from typing import AbstractSet, Iterable, NewType, Sequence, Set # pylint: disable=unused-import -from .config import REGISTRY_NAME, REPOSITORY_NAME - _LOGGER = logging.getLogger(__name__) _BlobId = NewType('_BlobId', str) # pylint: disable=invalid-name @@ -143,7 +141,7 @@ def live_images( _blobs_from_roots(repo_path, roots)) def live_tags(repo_path: str, - roots: RevListRoots) -> AbstractSet[ImageTag]: + roots: RevListRoots, registry_name: str, repository_names: Iterable[str]) -> AbstractSet[ImageTag]: """Return the image tags that are referenced by .travis.yml or Linux GitHub Action workflow files in the commits specified by the given `roots`. @@ -152,18 +150,19 @@ def live_tags(repo_path: str, :param roots: A collection of argument lists to pass to ``git rev-list`` to limit the matched commits. """ - expected_prefix = '{}.azurecr.io/{}'.format(REGISTRY_NAME, REPOSITORY_NAME) + prefixes = ['{}.azurecr.io/{}'.format(registry_name, repository_name) for repository_name in repository_names] + def matches_expected_prefix(image_name: ImageName) -> bool: """Check (and log) whether an image name is from the expected repository.""" - if image_name.startswith(expected_prefix): + if any(image_name.startswith(x) for x in prefixes): return True _LOGGER.info( - 'Discarding image "%s" that does not match expected prefix "%s"', + 'Discarding image "%s" that does not match any expected prefixes "%s"', image_name, - expected_prefix) + prefixes) return False return frozenset((ImageTag(image_name.split(':')[1]) - for image_name in live_images(repo_path, roots) - if matches_expected_prefix(image_name))) + for image_name in live_images(repo_path, roots) + if matches_expected_prefix(image_name)))