Skip to content
This repository was archived by the owner on Apr 1, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 80 additions & 38 deletions tools/ci-scripts/linux/image-cleanup/collect_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,75 +20,117 @@
commits included by the provided root_filters, or b) if they are younger
than the provided min-age."""


def semi_list(semi_str: str) -> Iterable[str]:
"""Splits a semi-colon delimited string into a list of string."""
return semi_str.split(';')
return semi_str.split(";")


def main() -> None:
"""Program main entry point."""
parser = argparse.ArgumentParser(description=PROGRAM_DESCRIPTION)
parser.add_argument('--repo-path', '-p',
required=True,
help='path to the repository to inspect')
parser.add_argument('root_filters',
nargs='+',
type=semi_list,
help="""semi-colon seperated list of arguments for one invocation of `git
parser.add_argument(
"--repo-path", "-p", required=True, help="path to the repository to inspect"
)
parser.add_argument(
"root_filters",
nargs="+",
type=semi_list,
help="""semi-colon seperated list of arguments for one invocation of `git
rev-parse`. The commits in the union of these
rev-parse invocations are used to find container images
that are referenced by source code. Ex:
'--remotes=origin;--since=2~weeks~ago' or '--tags;-n;1'""")
parser.add_argument('--min-age', '-m',
required=True,
type=int,
help="""the minimum length of time, in days, before images are eligible for
deletion, even if they are not referenced by the source code.""")
parser.add_argument('--dry-run', '-n',
action='store_true',
help="""report which images would be deleted, but do not actually delete
them""")
parser.add_argument('--verbosity', '-v',
default='WARNING',
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL', 'NOTSET'],
help='log level to emit to stderr. Defaults to WARNING.')
'--remotes=origin;--since=2~weeks~ago' or '--tags;-n;1'""",
)
parser.add_argument(
"--min-age",
"-m",
required=True,
type=int,
help="""the minimum length of time, in days, before images are eligible for
deletion, even if they are not referenced by the source code.""",
)
parser.add_argument(
"--dry-run",
"-n",
action="store_true",
help="""report which images would be deleted, but do not actually delete
them""",
)
parser.add_argument(
"--verbosity",
"-v",
default="WARNING",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", "NOTSET"],
help="log level to emit to stderr. Defaults to WARNING.",
)
parser.add_argument(
"--docker_registry_name",
"-r",
default="bondciimages",
help="Defaults to bondciimages.",
)
parser.add_argument(
"--docker_repository_names",
"-d",
nargs="+",
type=semi_list,
default=["ubuntu-1604", "ubuntu-1804", "ubuntu-2004"],
help="""semi-colon seperated list of docker repo names.
'-d=ubuntu-1604;ubuntu-1804'""",
)

args = parser.parse_args()

assert args.root_filters
assert args.docker_registry_name
assert args.docker_repository_names

numeric_level = getattr(logging, args.verbosity.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError('Invalid --verbosity level: {}'.format(args.verbosity))
raise ValueError("Invalid --verbosity level: {}".format(args.verbosity))

logging.basicConfig(level=numeric_level)

if args.min_age < 0:
raise ValueError('--min-age must be non-negative, but got {}'.format(args.min_age))
raise ValueError(
"--min-age must be non-negative, but got {}".format(args.min_age)
)

min_age_before_gc = timedelta(days=args.min_age)

try:
active_tags = live_tags(args.repo_path, args.root_filters)
active_tags = live_tags(
args.repo_path,
args.root_filters,
args.docker_registry_name,
args.docker_repository_names,
)
if not active_tags:
raise ValueError('No active tags. This can delete all images, so aborting.')
logging.info('Active tags: {%s}', ','.join(active_tags))
raise ValueError("No active tags. This can delete all images, so aborting.")
logging.info("Active tags: {%s}", ",".join(active_tags))

manifests = get_image_manifests()

for manifest in find_garbage_manifests(min_age_before_gc, active_tags, manifests):
for named_manifest in find_garbage_manifests(
min_age_before_gc, active_tags, manifests
):
if args.dry_run:
print('{}: would delete'.format(manifest.digest))
print("{}: would delete".format(named_manifest.manifest.digest))
else:
delete_image_by_manifest(manifest)
print('{}: deleted'.format(manifest.digest))
delete_image_by_manifest(named_manifest)
print("{}: deleted".format(named_manifest.manifest.digest))

except subprocess.CalledProcessError as cpe:
print('Subprocess {} failed with exit code {}'.format(
cpe.cmd,
cpe.returncode,
file=sys.stderr))
print('STDOUT:\n', str(cpe.stdout, encoding='utf-8'), file=sys.stderr)
print('STDERR:\n', str(cpe.stderr, encoding='utf-8'), file=sys.stderr)
print(
"Subprocess {} failed with exit code {}".format(
cpe.cmd, cpe.returncode, file=sys.stderr
)
)
print("STDOUT:\n", str(cpe.stdout, encoding="utf-8"), file=sys.stderr)
print("STDERR:\n", str(cpe.stderr, encoding="utf-8"), file=sys.stderr)
raise

if __name__ == '__main__':

if __name__ == "__main__":
main()
52 changes: 29 additions & 23 deletions tools/ci-scripts/linux/image-cleanup/collector/acr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
import subprocess

from datetime import datetime, timezone
from typing import Mapping, Iterable

from .config import REGISTRY_NAME, REPOSITORY_NAME
from typing import Mapping, Iterable, Generator, NamedTuple

_LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -70,32 +68,40 @@ def __init__(self, **kwargs: str) -> None:
except:
raise ManifestParseError(kwargs)

def get_image_manifests() -> Iterable[ImageManifest]:
class NamedImageManifest(NamedTuple):
"""Represents an ACR image manifest with a human readeable name."""
manifest: ImageManifest
registry_name: str
repository_name: str

def get_image_manifests(registry_name: str, repository_names: Iterable[str]) -> Generator[NamedImageManifest, None, None]:
"""Get the current ACR image manifests.

Invokes the ``az`` CLI tool to discover the current images.
"""
az_show_manifests_cmd_line = ['az', 'acr', 'repository', 'show-manifests',
'--name', REGISTRY_NAME,
'--repository', REPOSITORY_NAME,
'--output', 'json']
_LOGGER.debug('Invoking %s', az_show_manifests_cmd_line)
output = subprocess.check_output(az_show_manifests_cmd_line,
stderr=subprocess.PIPE)
manifests = json.loads(str(output, encoding='utf-8'))

if not isinstance(manifests, list):
msg = 'Expected an array of manifests ("[{{...}},{{...}}]" but got {}'.format(
type(manifests).__name__)
raise ValueError(msg)

return [ImageManifest(**o) for o in manifests]

def delete_image_by_manifest(manifest: ImageManifest) -> None:
for repository_name in repository_names:
az_show_manifests_cmd_line = ['az', 'acr', 'repository', 'show-manifests',
'--name', registry_name,
'--repository', repository_name,
'--output', 'json']
_LOGGER.debug('Invoking %s', az_show_manifests_cmd_line)
output = subprocess.check_output(az_show_manifests_cmd_line,
stderr=subprocess.PIPE)
manifests = json.loads(str(output, encoding='utf-8'))

if not isinstance(manifests, list):
msg = 'Expected an array of manifests ("[{{...}},{{...}}]" but got {}'.format(
type(manifests).__name__)
raise ValueError(msg)

for o in manifests:
yield NamedImageManifest(ImageManifest(**o), registry_name, repository_name)

def delete_image_by_manifest(named_manifest: NamedImageManifest) -> None:
"""Delete an ACR image (and all its tags)."""
image_name = '{}@{}'.format(REPOSITORY_NAME, manifest.digest)
image_name = '{}@{}'.format(named_manifest.repository_name, named_manifest.manifest.digest)
az_delete_cmd_line = ['az', 'acr', 'repository', 'delete',
'--name', REGISTRY_NAME,
'--name', named_manifest.registry_name,
'--image', image_name,
'--yes']
_LOGGER.debug('Invoking %s', az_delete_cmd_line)
Expand Down
4 changes: 0 additions & 4 deletions tools/ci-scripts/linux/image-cleanup/collector/config.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
from datetime import datetime, timedelta, timezone
from typing import AbstractSet, Iterable

from .acr import ImageManifest
from .acr import NamedImageManifest
from .live_images import ImageTag

_LOGGER = logging.getLogger(__name__)

def find_garbage_manifests(
min_age: timedelta,
active_tags: AbstractSet[ImageTag],
image_manifests: Iterable[ImageManifest]) -> Iterable[ImageManifest]:
image_manifests: Iterable[NamedImageManifest]) -> Iterable[NamedImageManifest]:
"""Yield a sequence of the manifests that are considered garbage.

Image manifests are considered garbage if they are both:
Expand All @@ -25,7 +25,8 @@ def find_garbage_manifests(
keep_newer_than_time = datetime.now(timezone.utc) - min_age
_LOGGER.debug('Keeping images newer than %s', keep_newer_than_time)

for manifest in image_manifests:
for named_manifest in image_manifests:
manifest = named_manifest.manifest
# It would probably be faster to check timestamps before tags, but
# the tags are more important of a reason to keep an image, so we
# check tags first so that the tag reference gets logged instead of
Expand All @@ -40,4 +41,4 @@ def find_garbage_manifests(
continue

_LOGGER.info('%s: garbage', manifest.digest)
yield manifest
yield named_manifest
17 changes: 8 additions & 9 deletions tools/ci-scripts/linux/image-cleanup/collector/live_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@

from typing import AbstractSet, Iterable, NewType, Sequence, Set # pylint: disable=unused-import

from .config import REGISTRY_NAME, REPOSITORY_NAME

_LOGGER = logging.getLogger(__name__)

_BlobId = NewType('_BlobId', str) # pylint: disable=invalid-name
Expand Down Expand Up @@ -143,7 +141,7 @@ def live_images(
_blobs_from_roots(repo_path, roots))

def live_tags(repo_path: str,
roots: RevListRoots) -> AbstractSet[ImageTag]:
roots: RevListRoots, registry_name: str, repository_names: Iterable[str]) -> AbstractSet[ImageTag]:
"""Return the image tags that are referenced by .travis.yml or Linux GitHub
Action workflow files in the commits specified by the given `roots`.

Expand All @@ -152,18 +150,19 @@ def live_tags(repo_path: str,
:param roots: A collection of argument lists to pass to ``git rev-list``
to limit the matched commits.
"""
expected_prefix = '{}.azurecr.io/{}'.format(REGISTRY_NAME, REPOSITORY_NAME)
prefixes = ['{}.azurecr.io/{}'.format(registry_name, repository_name) for repository_name in repository_names]

def matches_expected_prefix(image_name: ImageName) -> bool:
"""Check (and log) whether an image name is from the expected repository."""
if image_name.startswith(expected_prefix):
if any(image_name.startswith(x) for x in prefixes):
return True

_LOGGER.info(
'Discarding image "%s" that does not match expected prefix "%s"',
'Discarding image "%s" that does not match any expected prefixes "%s"',
image_name,
expected_prefix)
prefixes)
return False

return frozenset((ImageTag(image_name.split(':')[1])
for image_name in live_images(repo_path, roots)
if matches_expected_prefix(image_name)))
for image_name in live_images(repo_path, roots)
if matches_expected_prefix(image_name)))