From 95e772f8f48cfdecfcdd26b01a546dc80c1f6ff0 Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Sat, 25 Nov 2017 13:58:43 -0800 Subject: [PATCH 01/27] add certificates arg --- tools/fast_puller_.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/fast_puller_.py b/tools/fast_puller_.py index c01d6aaf0..5607bd681 100755 --- a/tools/fast_puller_.py +++ b/tools/fast_puller_.py @@ -45,6 +45,13 @@ parser.add_argument('--directory', action='store', help='Where to save the image\'s files.') +parser.add_argument('--certificate', nargs='*', help='A comma separated ' + + 'tuple of key file, cert, and domain. (From httplib2 ' + + 'docs) Add a key and cert that will be used for an SSL ' + + 'connection to the specified domain. keyfile is the name ' + + 'of a PEM formatted file that contains your private key. ' + + 'certfile is a PEM formatted certificate chain file.') + _THREADS = 8 From d05966fe42f7f20186014430f80d76d2ad43be17 Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Sat, 25 Nov 2017 14:05:29 -0800 Subject: [PATCH 02/27] add certificate --- tools/fast_puller_.py | 6 +++++- transport/transport_pool_.py | 12 ++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/fast_puller_.py b/tools/fast_puller_.py index 5607bd681..1ff9115f1 100755 --- a/tools/fast_puller_.py +++ b/tools/fast_puller_.py @@ -45,7 +45,7 @@ parser.add_argument('--directory', action='store', help='Where to save the image\'s files.') -parser.add_argument('--certificate', nargs='*', help='A comma separated ' + +parser.add_argument('--certificates', nargs='*', help='A comma separated ' + 'tuple of key file, cert, and domain. (From httplib2 ' + 'docs) Add a key and cert that will be used for an SSL ' + 'connection to the specified domain. keyfile is the name ' + @@ -65,6 +65,10 @@ def main(): transport = transport_pool.Http(httplib2.Http, size=_THREADS) + for item in args.certificates: + key, cert, domain = item.split(',') + transport.add_certificate(key, cert, domain) + if '@' in args.name: name = docker_name.Digest(args.name) else: diff --git a/transport/transport_pool_.py b/transport/transport_pool_.py index fda5550a7..8c9f0e7b4 100755 --- a/transport/transport_pool_.py +++ b/transport/transport_pool_.py @@ -47,6 +47,18 @@ def _return_transport(self, transport): # We returned an item, notify a waiting thread. self._condition.notify(n=1) + def add_certificate(self, key, cert, domain): + """Adds a certificate to all of the underlying transports. + + From httplib2 docs: + + Add a key and cert that will be used for an SSL connection to the + specified domain. keyfile is the name of a PEM formatted file that contains + your private key. certfile is a PEM formatted certificate chain file. + """ + for transport in self._transports: + transport.add_certificate(key, cert, domain) + def request(self, *args, **kwargs): """This awaits a transport and delegates the request call. From 26fd6553021ef1b2474bfc66de160f6b08512d5f Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Sat, 25 Nov 2017 14:06:22 -0800 Subject: [PATCH 03/27] certificates --- tools/fast_pusher_.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/fast_pusher_.py b/tools/fast_pusher_.py index 5a3f8468b..2dcee186e 100755 --- a/tools/fast_pusher_.py +++ b/tools/fast_pusher_.py @@ -61,6 +61,13 @@ parser.add_argument('--oci', action='store_true', help='Push the image with an OCI Manifest.') +parser.add_argument('--certificates', nargs='*', help='A comma separated ' + + 'tuple of key file, cert, and domain. (From httplib2 ' + + 'docs) Add a key and cert that will be used for an SSL ' + + 'connection to the specified domain. keyfile is the name ' + + 'of a PEM formatted file that contains your private key. ' + + 'certfile is a PEM formatted certificate chain file.') + _THREADS = 8 @@ -123,6 +130,10 @@ def main(): transport = transport_pool.Http(httplib2.Http, size=_THREADS) + for item in args.certificates: + key, cert, domain = item.split(',') + transport.add_certificate(key, cert, domain) + # Resolve the appropriate credential to use based on the standard Docker # client logic. creds = docker_creds.DefaultKeychain.Resolve(name) From 5127610dd7583b3dedf137b699ded1e4270a137b Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Sat, 25 Nov 2017 14:13:08 -0800 Subject: [PATCH 04/27] verify first --- tools/fast_puller_.py | 8 +++++--- tools/fast_pusher_.py | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/fast_puller_.py b/tools/fast_puller_.py index 1ff9115f1..8527eca8b 100755 --- a/tools/fast_puller_.py +++ b/tools/fast_puller_.py @@ -65,9 +65,11 @@ def main(): transport = transport_pool.Http(httplib2.Http, size=_THREADS) - for item in args.certificates: - key, cert, domain = item.split(',') - transport.add_certificate(key, cert, domain) + if args.certificates: + for item in args.certificates: + logging.info('Adding certificate %s', item) + key, cert, domain = item.split(',') + transport.add_certificate(key, cert, domain) if '@' in args.name: name = docker_name.Digest(args.name) diff --git a/tools/fast_pusher_.py b/tools/fast_pusher_.py index 2dcee186e..0f89797dc 100755 --- a/tools/fast_pusher_.py +++ b/tools/fast_pusher_.py @@ -130,9 +130,11 @@ def main(): transport = transport_pool.Http(httplib2.Http, size=_THREADS) - for item in args.certificates: - key, cert, domain = item.split(',') - transport.add_certificate(key, cert, domain) + if args.certificates: + for item in args.certificates: + logging.info('Adding certificate %s', item) + key, cert, domain = item.split(',') + transport.add_certificate(key, cert, domain) # Resolve the appropriate credential to use based on the standard Docker # client logic. From 55bc69bf02ecc3e0394b6d7721c58eb91e78e584 Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Fri, 9 Feb 2018 14:14:45 -0800 Subject: [PATCH 05/27] Speed up pull by parallelizing layer fetching --- client/v2_2/v2_compat_.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/client/v2_2/v2_compat_.py b/client/v2_2/v2_compat_.py index 1d97f161c..259219d6f 100755 --- a/client/v2_2/v2_compat_.py +++ b/client/v2_2/v2_compat_.py @@ -19,6 +19,7 @@ import hashlib import json +import concurrent.futures from containerregistry.client.v2 import docker_image as v2_image from containerregistry.client.v2 import util as v2_util from containerregistry.client.v2_2 import docker_http @@ -112,14 +113,21 @@ def _ProcessImage(self): # Compute the config_file for the v2.2 image. # TODO(b/62576117): Remove the pytype disable. + + + with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor: + diff_id_futures = [ + executor.submit(self._GetDiffId, digest) + for digest in reversed( + self._v2_image.fs_layers()) # pytype: disable=wrong-arg-types + ] + + diff_ids = [f.result() for f in diff_id_futures] + self._config_file = config_file([ json.loads(history.get('v1Compatibility', '{}')) for history in reversed(manifest_schema1.get('history', [])) - ], [ - self._GetDiffId(digest) - for digest in reversed( - self._v2_image.fs_layers()) # pytype: disable=wrong-arg-types - ]) + ], diff_ids) config_descriptor = { 'mediaType': docker_http.CONFIG_JSON_MIME, From acb6c8c2ba0e14e65b958613020746d0b7783ecb Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Fri, 9 Feb 2018 14:16:45 -0800 Subject: [PATCH 06/27] fix style --- client/v2_2/v2_compat_.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/client/v2_2/v2_compat_.py b/client/v2_2/v2_compat_.py index 259219d6f..c77dd2300 100755 --- a/client/v2_2/v2_compat_.py +++ b/client/v2_2/v2_compat_.py @@ -113,8 +113,6 @@ def _ProcessImage(self): # Compute the config_file for the v2.2 image. # TODO(b/62576117): Remove the pytype disable. - - with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor: diff_id_futures = [ executor.submit(self._GetDiffId, digest) From 97284c4334b280a74658f80f6e0d334c7d98b2f2 Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Fri, 9 Feb 2018 14:39:03 -0800 Subject: [PATCH 07/27] zcat --- client/v2/docker_image_.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/client/v2/docker_image_.py b/client/v2/docker_image_.py index c2f84d4d1..4b65ef3f5 100755 --- a/client/v2/docker_image_.py +++ b/client/v2/docker_image_.py @@ -23,6 +23,7 @@ import httplib import json import os +import subprocess import tarfile from containerregistry.client import docker_creds @@ -84,8 +85,12 @@ def blob(self, digest): def uncompressed_blob(self, digest): """Same as blob() but uncompressed.""" buf = cStringIO.StringIO(self.blob(digest)) - f = gzip.GzipFile(mode='rb', fileobj=buf) - return f.read() + # f = gzip.GzipFile(mode='rb', fileobj=buf) + # return f.read() + p = subprocess.Popen(["zcat"], stdout = subprocess.PIPE, stdin = subprocess.PIPE) + fh = cStringIO.StringIO(p.communicate(buf.read())[0]) + assert p.returncode == 0 + return fh.read() # __enter__ and __exit__ allow use as a context manager. @abc.abstractmethod From 2fcc7004bc53115d5285c5615a85bea0c0ff5c77 Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Sun, 11 Feb 2018 18:36:43 -0800 Subject: [PATCH 08/27] disable v2 --- tools/fast_puller_.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/fast_puller_.py b/tools/fast_puller_.py index e3a3dc153..97daacb4f 100755 --- a/tools/fast_puller_.py +++ b/tools/fast_puller_.py @@ -105,6 +105,9 @@ def main(): save.fast(v2_2_img, args.directory, threads=_THREADS) return + logging.fatal('Could not find V2.2 Image %r', name) + sys.exit(1) + logging.info('Pulling v2 image from %r ...', name) with v2_image.FromRegistry(name, creds, transport) as v2_img: with v2_compat.V22FromV2(v2_img) as v2_2_img: From fcad3e60870edc878a406f4e91a4f7d91e19ed73 Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Sun, 11 Feb 2018 20:01:04 -0800 Subject: [PATCH 09/27] WIP2 --- client/v2/docker_image_.py | 12 ++++++------ tools/fast_puller_.py | 7 +++---- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/client/v2/docker_image_.py b/client/v2/docker_image_.py index 4b65ef3f5..88458fb28 100755 --- a/client/v2/docker_image_.py +++ b/client/v2/docker_image_.py @@ -85,12 +85,12 @@ def blob(self, digest): def uncompressed_blob(self, digest): """Same as blob() but uncompressed.""" buf = cStringIO.StringIO(self.blob(digest)) - # f = gzip.GzipFile(mode='rb', fileobj=buf) - # return f.read() - p = subprocess.Popen(["zcat"], stdout = subprocess.PIPE, stdin = subprocess.PIPE) - fh = cStringIO.StringIO(p.communicate(buf.read())[0]) - assert p.returncode == 0 - return fh.read() + f = gzip.GzipFile(mode='rb', fileobj=buf) + return f.read() + # p = subprocess.Popen(["zcat"], stdout = subprocess.PIPE, stdin = subprocess.PIPE) + # fh = cStringIO.StringIO(p.communicate(buf.read())[0]) + # assert p.returncode == 0 + # return fh.read() # __enter__ and __exit__ allow use as a context manager. @abc.abstractmethod diff --git a/tools/fast_puller_.py b/tools/fast_puller_.py index 97daacb4f..11a0812fa 100755 --- a/tools/fast_puller_.py +++ b/tools/fast_puller_.py @@ -21,6 +21,7 @@ import argparse import logging import sys +import traceback from containerregistry.client import docker_creds from containerregistry.client import docker_name @@ -54,7 +55,7 @@ 'of a PEM formatted file that contains your private key. ' + 'certfile is a PEM formatted certificate chain file.') -_THREADS = 8 +_THREADS = 50 def main(): @@ -105,9 +106,6 @@ def main(): save.fast(v2_2_img, args.directory, threads=_THREADS) return - logging.fatal('Could not find V2.2 Image %r', name) - sys.exit(1) - logging.info('Pulling v2 image from %r ...', name) with v2_image.FromRegistry(name, creds, transport) as v2_img: with v2_compat.V22FromV2(v2_img) as v2_2_img: @@ -116,6 +114,7 @@ def main(): # pylint: disable=broad-except except Exception as e: logging.fatal('Error pulling and saving image %s: %s', name, e) + traceback.print_exc() sys.exit(1) From 558bf466714f17e2f8666d77fde674ad68bfcfd3 Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Sun, 11 Feb 2018 22:56:31 -0800 Subject: [PATCH 10/27] revert stuff --- client/v2/docker_image_.py | 5 ----- tools/fast_puller_.py | 4 +--- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/client/v2/docker_image_.py b/client/v2/docker_image_.py index 88458fb28..c2f84d4d1 100755 --- a/client/v2/docker_image_.py +++ b/client/v2/docker_image_.py @@ -23,7 +23,6 @@ import httplib import json import os -import subprocess import tarfile from containerregistry.client import docker_creds @@ -87,10 +86,6 @@ def uncompressed_blob(self, digest): buf = cStringIO.StringIO(self.blob(digest)) f = gzip.GzipFile(mode='rb', fileobj=buf) return f.read() - # p = subprocess.Popen(["zcat"], stdout = subprocess.PIPE, stdin = subprocess.PIPE) - # fh = cStringIO.StringIO(p.communicate(buf.read())[0]) - # assert p.returncode == 0 - # return fh.read() # __enter__ and __exit__ allow use as a context manager. @abc.abstractmethod diff --git a/tools/fast_puller_.py b/tools/fast_puller_.py index 11a0812fa..e3a3dc153 100755 --- a/tools/fast_puller_.py +++ b/tools/fast_puller_.py @@ -21,7 +21,6 @@ import argparse import logging import sys -import traceback from containerregistry.client import docker_creds from containerregistry.client import docker_name @@ -55,7 +54,7 @@ 'of a PEM formatted file that contains your private key. ' + 'certfile is a PEM formatted certificate chain file.') -_THREADS = 50 +_THREADS = 8 def main(): @@ -114,7 +113,6 @@ def main(): # pylint: disable=broad-except except Exception as e: logging.fatal('Error pulling and saving image %s: %s', name, e) - traceback.print_exc() sys.exit(1) From af00d7f639209a9153795e36ea5da2ff9377c09f Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Sun, 11 Feb 2018 22:57:56 -0800 Subject: [PATCH 11/27] require v2.2 image --- tools/fast_puller_.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/fast_puller_.py b/tools/fast_puller_.py index e3a3dc153..19da38faf 100755 --- a/tools/fast_puller_.py +++ b/tools/fast_puller_.py @@ -105,6 +105,9 @@ def main(): save.fast(v2_2_img, args.directory, threads=_THREADS) return + logging.fatal('v2.2 image not found: %r', name) + sys.exit(1) + logging.info('Pulling v2 image from %r ...', name) with v2_image.FromRegistry(name, creds, transport) as v2_img: with v2_compat.V22FromV2(v2_img) as v2_2_img: From bd5ead382cbbfed0307c82f54489a1a0055c579a Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Mon, 12 Feb 2018 16:53:04 -0800 Subject: [PATCH 12/27] allow v2 --- tools/fast_puller_.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/fast_puller_.py b/tools/fast_puller_.py index 19da38faf..6b777d1ce 100755 --- a/tools/fast_puller_.py +++ b/tools/fast_puller_.py @@ -47,6 +47,9 @@ parser.add_argument('--directory', action='store', help='Where to save the image\'s files.') +parser.add_argument('--allow-v2', action='store_true', + help='Allow pulling V2 Images') + parser.add_argument('--certificates', nargs='*', help='A comma separated ' + 'tuple of key file, cert, and domain. (From httplib2 ' + 'docs) Add a key and cert that will be used for an SSL ' + @@ -105,8 +108,9 @@ def main(): save.fast(v2_2_img, args.directory, threads=_THREADS) return - logging.fatal('v2.2 image not found: %r', name) - sys.exit(1) + if not args.allow_v2: + logging.fatal('v2.2 image not found: %r', name) + sys.exit(1) logging.info('Pulling v2 image from %r ...', name) with v2_image.FromRegistry(name, creds, transport) as v2_img: From 5dc1ab09c4e45f93c1537b013b765900e055b3b5 Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Mon, 19 Feb 2018 22:10:02 -0800 Subject: [PATCH 13/27] pull individual layers --- client/v2_2/save_.py | 7 ++++++- tools/fast_puller_.py | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/client/v2_2/save_.py b/client/v2_2/save_.py index 9aa377e07..1d4a0a0f8 100755 --- a/client/v2_2/save_.py +++ b/client/v2_2/save_.py @@ -121,7 +121,8 @@ def tarball( def fast( image, directory, - threads = 1 + threads = 1, + first_layer = 0, ): """Produce a FromDisk compatible file layout under the provided directory. @@ -167,6 +168,10 @@ def write_file( layers = [] for blob in reversed(image.fs_layers()): # Create a local copy + if idx < first_layer: + idx += 1 + continue + digest_name = os.path.join(directory, '%03d.sha256' % idx) f = executor.submit(write_file, digest_name, # Strip the sha256: prefix diff --git a/tools/fast_puller_.py b/tools/fast_puller_.py index 6b777d1ce..0c85db979 100755 --- a/tools/fast_puller_.py +++ b/tools/fast_puller_.py @@ -50,6 +50,9 @@ parser.add_argument('--allow-v2', action='store_true', help='Allow pulling V2 Images') +parser.add_argument('--first-layer', action='store', type=int, + help=('WIP')) + parser.add_argument('--certificates', nargs='*', help='A comma separated ' + 'tuple of key file, cert, and domain. (From httplib2 ' + 'docs) Add a key and cert that will be used for an SSL ' + @@ -105,7 +108,7 @@ def main(): logging.info('Pulling v2.2 image from %r ...', name) with v2_2_image.FromRegistry(name, creds, transport, accept) as v2_2_img: if v2_2_img.exists(): - save.fast(v2_2_img, args.directory, threads=_THREADS) + save.fast(v2_2_img, args.directory, threads=_THREADS, first_layer=args.first_layer) return if not args.allow_v2: From a5117e19f3167da52c1a43bed8be1862e27990bc Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 21 Sep 2018 23:23:05 -0700 Subject: [PATCH 14/27] [PROD-17859] Properly handle opaque whiteout entries in Docker layer flattener Flattened version of upstream PR https://github.com/google/containerregistry/pull/110 --- BUILD.bazel | 11 +++ client/v2_2/docker_image_.py | 21 +++-- client_v2_2_unit_tests.py | 167 +++++++++++++++++++++++++++++++++++ 3 files changed, 193 insertions(+), 6 deletions(-) create mode 100644 client_v2_2_unit_tests.py diff --git a/BUILD.bazel b/BUILD.bazel index 1a81f8466..156859036 100755 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -101,3 +101,14 @@ sh_test( ":pusher.par", ], ) + +py_test( + name = "client_v2_2_unit_tests", + size = "large", + srcs = [ + "client_v2_2_unit_tests.py", + ":containerregistry", + ], + main = "client_v2_2_unit_tests.py", +) + diff --git a/client/v2_2/docker_image_.py b/client/v2_2/docker_image_.py index 8bfb51ccf..8eb644777 100755 --- a/client/v2_2/docker_image_.py +++ b/client/v2_2/docker_image_.py @@ -697,20 +697,18 @@ def __exit__(self, unused_type, unused_value, unused_traceback): pass -def _in_whiteout_dir( - fs, - name -): +def _in_whiteout_dir(fs, opaque_whiteouts, name): while name: dirname = os.path.dirname(name) if name == dirname: break - if fs.get(dirname): + if fs.get(dirname) or dirname in opaque_whiteouts: return True name = dirname return False _WHITEOUT_PREFIX = '.wh.' +_OPAQUE_WHITEOUT_FILENAME = '.wh..wh..opq' def extract(image, tar): @@ -724,17 +722,27 @@ def extract(image, tar): # to whether they are a tombstone or not. fs = {} + opaque_whiteouts_in_higher_layers = set() + # Walk the layers, topmost first and add files. If we've seen them in a # higher layer then we skip them for layer in image.diff_ids(): buf = cStringIO.StringIO(image.uncompressed_layer(layer)) with tarfile.open(mode='r:', fileobj=buf) as layer_tar: + opaque_whiteouts_in_this_layer = [] for tarinfo in layer_tar: # If we see a whiteout file, then don't add anything to the tarball # but ensure that any lower layers don't add a file with the whited # out name. basename = os.path.basename(tarinfo.name) dirname = os.path.dirname(tarinfo.name) + + # If we see an opaque whiteout file, then don't add anything to the + # tarball but ensure that any lower layers don't add files or + # directories which are siblings of the whiteout file. + if basename == _OPAQUE_WHITEOUT_FILENAME: + opaque_whiteouts_in_this_layer.append(dirname) + tombstone = basename.startswith(_WHITEOUT_PREFIX) if tombstone: basename = basename[len(_WHITEOUT_PREFIX):] @@ -746,7 +754,7 @@ def extract(image, tar): continue # Check for a whited out parent directory - if _in_whiteout_dir(fs, name): + if _in_whiteout_dir(fs, opaque_whiteouts_in_higher_layers, name): continue # Mark this file as handled by adding its name. @@ -758,3 +766,4 @@ def extract(image, tar): tar.addfile(tarinfo, fileobj=layer_tar.extractfile(tarinfo)) else: tar.addfile(tarinfo, fileobj=None) + opaque_whiteouts_in_higher_layers.update(opaque_whiteouts_in_this_layer) diff --git a/client_v2_2_unit_tests.py b/client_v2_2_unit_tests.py new file mode 100644 index 000000000..55792c41d --- /dev/null +++ b/client_v2_2_unit_tests.py @@ -0,0 +1,167 @@ +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict +import io +from StringIO import StringIO +import tarfile +import unittest + +from containerregistry.client.v2_2 import docker_image as v2_2_image + + +class MockImage(object): + """Mock of DockerImage, implementing only the methods called by extract().""" + + def __init__(self): + self._fs_layers = OrderedDict() + + def add_layer(self, filenames): + """Add a layer to the image. + + Args: + filenames: a list of filenames or (filename, content) pairs. Filenames + with trailing slashes become directory entries in the generated tar + """ + buf = io.BytesIO() + with tarfile.open(mode='w:', fileobj=buf) as tf: + for entry in filenames: + if (isinstance(entry, basestring)): + name = entry + content = "" + else: + (name, content) = entry + tarinfo = tarfile.TarInfo(name) + tarinfo.size = len(content) + if name.endswith("/"): + tarinfo.type = tarfile.DIRTYPE + tf.addfile(tarinfo, fileobj=(StringIO(content) if content else None)) + buf.seek(0) + new_layer_id = str(len(self._fs_layers)) + self._fs_layers[new_layer_id] = buf.getvalue() + + def diff_ids(self): + return reversed(self._fs_layers.keys()) + + def uncompressed_layer(self, layer_id): + return self._fs_layers[layer_id] + + +class TestExtract(unittest.TestCase): + + def _test_flatten(self, layer_filenames, expected_flattened_output): + # Construct a mock DockerImage with the specified layers: + img = MockImage() + for filenames in layer_filenames: + img.add_layer(filenames) + buf = io.BytesIO() + + # Run the actual extract logic: + with tarfile.open(mode='w:', fileobj=buf) as tar: + v2_2_image.extract(img, tar) + + # Compare the extract() output to the expected results: + buf.seek(0) + flattened_output = [] + with tarfile.open(mode='r', fileobj=buf) as tar: + for tarinfo in tar: + if tarinfo.isdir(): + flattened_output.append(tarinfo.name + "/") + else: + contents = tar.extractfile(tarinfo).read() + if contents: + flattened_output.append((tarinfo.name, contents)) + else: + flattened_output.append(tarinfo.name) + self.assertEqual(flattened_output, expected_flattened_output) + + def test_single_layer(self): + self._test_flatten( + [["/directory/", "/file"]], + ["/directory/", "/file"] + ) + + def test_purely_additive_layers(self): + self._test_flatten( + [ + ["dir/", "dir/file1", "file"], + ["dir/file2", "file2"] + ], + ["dir/file2", "file2", "dir/", "dir/file1", "file"] + ) + + def test_highest_layer_of_file_takes_precedence(self): + self._test_flatten( + [ + [("file", "a")], + [("file", "b")] + ], + [("file", "b")] + ) + + def test_single_file_whiteout(self): + self._test_flatten( + [ + ["/foo"], + ["/.wh.foo"] + ], + [] + ) + + def test_parent_directory_whiteout(self): + self._test_flatten( + [ + ["/x/a/", "/x/b/", "/x/b/1"], + ["/x/.wh.b"] + ], + ["/x/a/"] + ) + + def test_opaque_whiteout(self): + # Example from https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts + self._test_flatten( + [ + ["a/", "a/b/", "a/b/c/", "a/b/c/bar"], + ["a/", "a/.wh..wh..opq", "a/b/", "a/b/c/", "a/b/c/foo"], + ], + ["a/", "a/b/", "a/b/c/", "a/b/c/foo"], + ) + + self._test_flatten( + [ + ["a/", "a/b/", "a/b/c/", "a/b/c/bar"], + ["a/", "a/b/", "a/b/c/", "a/b/c/foo", "a/.wh..wh..opq"], + ], + ["a/", "a/b/", "a/b/c/", "a/b/c/foo"], + ) + + def test_opaque_whiteout_preserves_parent_directory(self): + # Example from https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts + self._test_flatten( + [ + [ + "bin/", + "bin/my-app-binary", + "bin/my-app-tools", + "bin/tools/", + "bin/tools/my-app-tool-one" + ], + ["bin/.wh..wh..opq"], + ], + ["bin/"], + ) + + +if __name__ == "__main__": + unittest.main(verbosity=2) From 5e6e83e5d56eb2a3f2bfd1b71d5f90df75c6da46 Mon Sep 17 00:00:00 2001 From: alphalzh Date: Fri, 31 May 2019 11:32:00 -0700 Subject: [PATCH 15/27] make it azure compatible --- client/v2_2/docker_http_.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/client/v2_2/docker_http_.py b/client/v2_2/docker_http_.py index a45efd834..7a78bdce3 100755 --- a/client/v2_2/docker_http_.py +++ b/client/v2_2/docker_http_.py @@ -301,12 +301,13 @@ def _Refresh(self): (resp.status, content)) wrapper_object = json.loads(content) - _CheckState('token' in wrapper_object, + token = wrapper_object.get('token') or wrapper_object.get('access_token') + _CheckState(token is not None, 'Malformed JSON response: %s' % content) with self._lock: # We have successfully reauthenticated. - self._creds = v2_2_creds.Bearer(wrapper_object['token']) + self._creds = v2_2_creds.Bearer(token) # pylint: disable=invalid-name def Request( From 02fd12f509f05eaff25ba1937395f7799d27fa61 Mon Sep 17 00:00:00 2001 From: Zach Reardon <31675661+zreardond@users.noreply.github.com> Date: Fri, 19 Jul 2019 15:24:56 -0700 Subject: [PATCH 16/27] Add the ability to accept multiple cert tuples like the description says (#6) --- tools/fast_pusher_.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/tools/fast_pusher_.py b/tools/fast_pusher_.py index 8764244c0..9b8a0f36d 100755 --- a/tools/fast_pusher_.py +++ b/tools/fast_pusher_.py @@ -23,6 +23,7 @@ import argparse import logging import sys +import os from containerregistry.client import docker_creds from containerregistry.client import docker_name @@ -36,7 +37,6 @@ import httplib2 - parser = argparse.ArgumentParser( description='Push images to a Docker Registry, faaaaaast.') @@ -68,7 +68,8 @@ 'docs) Add a key and cert that will be used for an SSL ' + 'connection to the specified domain. keyfile is the name ' + 'of a PEM formatted file that contains your private key. ' + - 'certfile is a PEM formatted certificate chain file.') + 'certfile is a PEM formatted certificate chain file. ' + + 'If the key/cert does not exist it will be ignored.') _THREADS = 8 @@ -139,10 +140,28 @@ def main(): transport = transport_pool.Http(retry_factory.Build, size=_THREADS) if args.certificates: + found_one = False + for item in args.certificates: logging.info('Adding certificate %s', item) key, cert, domain = item.split(',') - transport.add_certificate(key, cert, domain) + # httplib2 does not like taking cert files that do not exist, so check that they exist + if os.path.isfile(key) and os.path.isfile(cert): + transport.add_certificate(key, cert, domain) + test_cert_response = transport.request("https://" + domain + "/certtestignore", "POST", body="test") + # If the return value is success, redirect, or page not found we know that we are authorized, + # but that the image /certtestignore is not a real image. Which is expected. + if test_cert_response[0].status == 200 or \ + test_cert_response[0].status == 201 or \ + test_cert_response[0].status == 307 or \ + test_cert_response[0].status == 404: + found_one = True + break; + transport = transport_pool.Http(retry_factory.Build, size=_THREADS) + + if not found_one: + logging.fatal("Local kube cert expired/is not present. Please run './eng-tools/bin/get-kube-access dev'") + sys.exit(1) logging.info('Loading v2.2 image from disk ...') with v2_2_image.FromDisk(config, zip(args.digest or [], args.layer or []), From 74884be0d78e97809e1a8864cea24ef21a82a6ad Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Fri, 5 Jun 2020 20:28:18 -0700 Subject: [PATCH 17/27] Fix missing dependency --- BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/BUILD.bazel b/BUILD.bazel index 34451bd25..99e85edba 100755 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -23,6 +23,7 @@ py_library( "transport/**/*.py", ]), deps = [ + "@concurrent", "@httplib2", "@oauth2client", "@six", From 47f213833f0ec83191c1d69d0c4f1bf8fbb6f213 Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Fri, 12 Jun 2020 14:04:14 -0700 Subject: [PATCH 18/27] remove outdated concurrent library --- BUILD.bazel | 1 - 1 file changed, 1 deletion(-) diff --git a/BUILD.bazel b/BUILD.bazel index 99e85edba..34451bd25 100755 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -23,7 +23,6 @@ py_library( "transport/**/*.py", ]), deps = [ - "@concurrent", "@httplib2", "@oauth2client", "@six", From d48d058b538792defbc27aaae115b8ac95d3847d Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Wed, 17 Jun 2020 20:07:45 -0700 Subject: [PATCH 19/27] remove subpar dependency --- BUILD.bazel | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index 34451bd25..9af528cd0 100755 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -29,9 +29,7 @@ py_library( ], ) -load("@subpar//:subpar.bzl", "par_binary") - -par_binary( +py_binary( name = "appender", srcs = ["tools/docker_appender_.py"], main = "tools/docker_appender_.py", @@ -39,7 +37,7 @@ par_binary( deps = [":containerregistry"], ) -par_binary( +py_binary( name = "puller", srcs = ["tools/fast_puller_.py"], main = "tools/fast_puller_.py", @@ -47,7 +45,7 @@ par_binary( deps = [":containerregistry"], ) -par_binary( +py_binary( name = "flatten", srcs = ["tools/fast_flatten_.py"], main = "tools/fast_flatten_.py", @@ -55,7 +53,7 @@ par_binary( deps = [":containerregistry"], ) -par_binary( +py_binary( name = "importer", srcs = ["tools/fast_importer_.py"], main = "tools/fast_importer_.py", @@ -63,7 +61,7 @@ par_binary( deps = [":containerregistry"], ) -par_binary( +py_binary( name = "pusher", srcs = ["tools/fast_pusher_.py"], main = "tools/fast_pusher_.py", @@ -71,7 +69,7 @@ par_binary( deps = [":containerregistry"], ) -par_binary( +py_binary( name = "digester", srcs = ["tools/image_digester_.py"], main = "tools/image_digester_.py", From 53482a6a2b416b7e38b9887834b39e75bff9d2d6 Mon Sep 17 00:00:00 2001 From: Kevin Qian Date: Thu, 14 Mar 2024 15:35:28 -0700 Subject: [PATCH 20/27] Remove 2GiB layer size limit by writing in chunks --- client/v2_2/docker_image_.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/client/v2_2/docker_image_.py b/client/v2_2/docker_image_.py index 84251209f..a56a6bc56 100755 --- a/client/v2_2/docker_image_.py +++ b/client/v2_2/docker_image_.py @@ -406,6 +406,18 @@ def is_compressed(name): return name[0:2] == b'\x1f\x8b' +# Python earlier than 3.7 has a gzip.GzipFile bug that does not support writing +# content longer than 2^31 bytes. To work around this, we write the content in +# smaller chunks if exceed size limit. +def _write_large_content_to_zipped_file(zipped, content, chunk_size=2**31-1): + if len(content) > chunk_size: + # Write the content in chunks + for i in range(0, len(content), chunk_size): + zipped.write(content[i:i+chunk_size]) + else: + zipped.write(content) + + class FromTarball(DockerImage): """This decodes the image tarball output of docker_build for upload.""" @@ -458,7 +470,7 @@ def _content(self, zipped = gzip.GzipFile( mode='wb', compresslevel=self._compresslevel, fileobj=buf) try: - zipped.write(content) + _write_large_content_to_zipped_file(zipped, content) finally: zipped.close() content = buf.getvalue() From 20acfa354bf7e5dcbb6a24365628b4b37676585b Mon Sep 17 00:00:00 2001 From: Kevin Qian Date: Fri, 15 Mar 2024 12:06:22 -0700 Subject: [PATCH 21/27] Support chunked layer upload if layer size too large --- client/v2_2/docker_http_.py | 8 +++- client/v2_2/docker_session_.py | 70 +++++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/client/v2_2/docker_http_.py b/client/v2_2/docker_http_.py index b3462c76b..e00626c6f 100755 --- a/client/v2_2/docker_http_.py +++ b/client/v2_2/docker_http_.py @@ -335,7 +335,8 @@ def Request(self, method = None, body = None, content_type = None, - accepted_mimes = None + accepted_mimes = None, + additional_headers = {} ): """Wrapper containing much of the boilerplate REST logic for Registry calls. @@ -347,7 +348,8 @@ def Request(self, body: the body to pass into the PUT request (or None for GET) content_type: the mime-type of the request (or None for JSON). content_type is ignored when body is None. - accepted_mimes: the list of acceptable mime-types + accepted_mimes: the list of acceptable mime-types. + additional_headers: additional headers to include in the request. Raises: BadStateException: an unexpected internal state has been encountered. @@ -382,6 +384,8 @@ def Request(self, if method in ('POST', 'PUT') and not body: headers['content-length'] = '0' + headers.update(additional_headers) + resp, content = self._transport.request( url, method, body=body, headers=headers) diff --git a/client/v2_2/docker_session_.py b/client/v2_2/docker_session_.py index 7aff6ae35..22c592f4f 100755 --- a/client/v2_2/docker_session_.py +++ b/client/v2_2/docker_session_.py @@ -32,6 +32,14 @@ import six.moves.urllib.parse +# Theoretically max is INT_MAX but we need some extra space for headers. +UPLOAD_CHUNK_MAX_SIZE = 2000000000 + + +def _exceed_max_chunk_size(image_body): + return len(image_body) > UPLOAD_CHUNK_MAX_SIZE + + def _tag_or_digest(name): if isinstance(name, docker_name.Tag): return name.tag @@ -150,10 +158,62 @@ def _put_upload(self, image, digest): method='PUT', body=self._get_blob(image, digest), accepted_codes=[six.moves.http_client.CREATED]) + + # pylint: disable=missing-docstring + def _patch_chunked_upload_image_body(self, image_body, digest): + if len(image_body) == 0: + raise ValueError('Empty image body') + + # See https://github.com/opencontainers/distribution-spec/blob/main/spec.md#pushing-a-blob-in-chunks + + mounted, location = self._start_upload(digest, self._mount) + + if mounted: + logging.info('Layer %s mounted.', digest) + return + + location = self._get_absolute_url(location) + + # Upload the content in chunks. Invoked at least once to get the response. + for i in range(0, len(image_body), UPLOAD_CHUNK_MAX_SIZE): + chunk = image_body[i:i + UPLOAD_CHUNK_MAX_SIZE] + chunk_start, chunk_end_inclusive = i, i + len(chunk) - 1 + logging.info('Uploading chunk range %d-%d', chunk_start, chunk_end_inclusive) + + resp, unused_content = self._transport.Request( + location, + method='PATCH', + body=chunk, + content_type='application/octet-stream', + additional_headers={ + 'Content-Range': '{start}-{end}'.format(start=chunk_start, end=chunk_end_inclusive) + }, + accepted_codes=[ + six.moves.http_client.NO_CONTENT, six.moves.http_client.ACCEPTED, + six.moves.http_client.CREATED + ]) + # Need to use the new location in the response. + location = self._get_absolute_url(resp.get('location')) + + location = self._add_digest(resp['location'], digest) + location = self._get_absolute_url(location) + self._transport.Request( + location, + method='PUT', + body=None, + accepted_codes=[six.moves.http_client.CREATED]) # pylint: disable=missing-docstring def _patch_upload(self, image, digest): + image_body = self._get_blob(image, digest) + # When the layer is too large, registry might reject. + # In this case, we need to do chunk upload. + if _exceed_max_chunk_size(image_body): + logging.info('Uploading layer %s in chunks.', digest) + self._patch_chunked_upload_image_body(image_body, digest) + return + mounted, location = self._start_upload(digest, self._mount) if mounted: @@ -165,7 +225,7 @@ def _patch_upload(self, image, resp, unused_content = self._transport.Request( location, method='PATCH', - body=self._get_blob(image, digest), + body=image_body, content_type='application/octet-stream', accepted_codes=[ six.moves.http_client.NO_CONTENT, six.moves.http_client.ACCEPTED, @@ -199,6 +259,14 @@ def _put_blob(self, image, digest): # POST /v2//blobs/uploads/ (no body*) # PATCH /v2//blobs/uploads/ (full body) # PUT /v2//blobs/uploads/ (no body) + # self._patch_upload(image, digest) + # + # When the layer is too large (> INT_MAX), we need to do chunk upload. + # POST /v2//blobs/uploads/ (no body*) + # PATCH /v2//blobs/uploads/ (body chunk 1) + # PATCH /v2//blobs/uploads/ (body chunk ...) + # PUT /v2//blobs/uploads/ (no body) + # self._patch_chunked_upload_image_body(image_body, digest) # # * We attempt to perform a cross-repo mount if any repositories are # specified in the "mount" parameter. This does a fast copy from a From dd11af6110ea3a978299007e1152494f0698bc81 Mon Sep 17 00:00:00 2001 From: "ruvan.jayaweera" Date: Thu, 11 Jul 2024 13:38:53 -0700 Subject: [PATCH 22/27] . --- client/docker_creds_.py | 29 +++++++++++++++++++++++++++-- client/v2_2/docker_http_.py | 3 +++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/client/docker_creds_.py b/client/docker_creds_.py index b320d04e3..fb375ab3f 100755 --- a/client/docker_creds_.py +++ b/client/docker_creds_.py @@ -92,6 +92,29 @@ def suffix(self): p = self.password.encode('utf8') return base64.b64encode(u + b':' + p).decode('utf8') +class IdentityToken(SchemeProvider): + """Implementation for providing ID token credentials.""" + + def __init__(self, token): + super(IdentityToken, self).__init__('Basic') + self._password = token + + @property + def username(self): + # MSFT for some reason requires the user name to be set to this value when using an identity token + # https://learn.microsoft.com/en-us/azure/container-registry/container-registry-authentication?tabs=azure-cli + return '00000000-0000-0000-0000-000000000000' + + @property + def password(self): + return self._password + + @property + def suffix(self): + u = self.username.encode('utf8') + p = self.password.encode('utf8') + return base64.b64encode(u + b':' + p).decode('utf8') + _USERNAME = '_token' @@ -278,14 +301,16 @@ def Resolve(self, name): for form in _FORMATS: if form % name.registry in auths: entry = auths[form % name.registry] - if 'auth' in entry: + if 'identitytoken' in entry: + token = entry['identitytoken'] + return IdentityToken(token) + elif 'auth' in entry: decoded = base64.b64decode(entry['auth']).decode('utf8') username, password = decoded.split(':', 1) return Basic(username, password) elif 'username' in entry and 'password' in entry: return Basic(entry['username'], entry['password']) else: - # TODO(user): Support identitytoken # TODO(user): Support registrytoken raise Exception( 'Unsupported entry in "auth" section of Docker config: ' + diff --git a/client/v2_2/docker_http_.py b/client/v2_2/docker_http_.py index e00626c6f..8a1c34e42 100755 --- a/client/v2_2/docker_http_.py +++ b/client/v2_2/docker_http_.py @@ -224,6 +224,9 @@ def _Ping(self): 'content-type': 'application/json', 'user-agent': docker_name.USER_AGENT, } + url = '{scheme}://{registry}/v2/'.format( + scheme=Scheme(self._name.registry), registry=self._name.registry), + resp, content = self._transport.request( '{scheme}://{registry}/v2/'.format( scheme=Scheme(self._name.registry), registry=self._name.registry), From 611e74f4a1409541af8932e34c2a06ba77ca91e1 Mon Sep 17 00:00:00 2001 From: "ruvan.jayaweera" Date: Wed, 17 Jul 2024 13:01:06 -0700 Subject: [PATCH 23/27] . --- client/v2_2/docker_http_.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/client/v2_2/docker_http_.py b/client/v2_2/docker_http_.py index 8a1c34e42..e00626c6f 100755 --- a/client/v2_2/docker_http_.py +++ b/client/v2_2/docker_http_.py @@ -224,9 +224,6 @@ def _Ping(self): 'content-type': 'application/json', 'user-agent': docker_name.USER_AGENT, } - url = '{scheme}://{registry}/v2/'.format( - scheme=Scheme(self._name.registry), registry=self._name.registry), - resp, content = self._transport.request( '{scheme}://{registry}/v2/'.format( scheme=Scheme(self._name.registry), registry=self._name.registry), From 8fee04620adfd777be23be1c36379ce1882d100a Mon Sep 17 00:00:00 2001 From: "ruvan.jayaweera" Date: Tue, 25 Mar 2025 17:46:22 +0000 Subject: [PATCH 24/27] Change http chunk size from 2e9 to 2e7 bytes --- client/v2_2/docker_session_.py | 6 ++---- tools/fast_pusher_.py | 1 + 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/client/v2_2/docker_session_.py b/client/v2_2/docker_session_.py index 22c592f4f..7c9715eee 100755 --- a/client/v2_2/docker_session_.py +++ b/client/v2_2/docker_session_.py @@ -32,8 +32,8 @@ import six.moves.urllib.parse -# Theoretically max is INT_MAX but we need some extra space for headers. -UPLOAD_CHUNK_MAX_SIZE = 2000000000 +# 200 MB chunk to balance performance in poor networking conditions +UPLOAD_CHUNK_MAX_SIZE = int(2e8) def _exceed_max_chunk_size(image_body): @@ -178,8 +178,6 @@ def _patch_chunked_upload_image_body(self, image_body, digest): for i in range(0, len(image_body), UPLOAD_CHUNK_MAX_SIZE): chunk = image_body[i:i + UPLOAD_CHUNK_MAX_SIZE] chunk_start, chunk_end_inclusive = i, i + len(chunk) - 1 - logging.info('Uploading chunk range %d-%d', chunk_start, chunk_end_inclusive) - resp, unused_content = self._transport.Request( location, method='PATCH', diff --git a/tools/fast_pusher_.py b/tools/fast_pusher_.py index ef21fd0b9..05896762d 100755 --- a/tools/fast_pusher_.py +++ b/tools/fast_pusher_.py @@ -24,6 +24,7 @@ import argparse import logging +logging.getLogger().setLevel(logging.INFO) import sys import os From 1827af354be8547e775025a071a6a4f46491c266 Mon Sep 17 00:00:00 2001 From: ruvan Date: Tue, 25 Mar 2025 18:18:14 -0700 Subject: [PATCH 25/27] Retry on socket timeout (#12) --- client/v2_2/docker_session_.py | 1 + transport/retry_.py | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/client/v2_2/docker_session_.py b/client/v2_2/docker_session_.py index 7c9715eee..fcc33b4c0 100755 --- a/client/v2_2/docker_session_.py +++ b/client/v2_2/docker_session_.py @@ -178,6 +178,7 @@ def _patch_chunked_upload_image_body(self, image_body, digest): for i in range(0, len(image_body), UPLOAD_CHUNK_MAX_SIZE): chunk = image_body[i:i + UPLOAD_CHUNK_MAX_SIZE] chunk_start, chunk_end_inclusive = i, i + len(chunk) - 1 + logging.info('Pushing chunk(%d) for layer %s', i, digest) resp, unused_content = self._transport.Request( location, method='PATCH', diff --git a/transport/retry_.py b/transport/retry_.py index 2224e2b56..e6722b1a3 100755 --- a/transport/retry_.py +++ b/transport/retry_.py @@ -17,6 +17,8 @@ import logging import time +import socket +import errno from containerregistry.transport import nested @@ -28,11 +30,14 @@ DEFAULT_BACKOFF_FACTOR = 0.5 RETRYABLE_EXCEPTION_TYPES = [ six.moves.http_client.IncompleteRead, - six.moves.http_client.ResponseNotReady + six.moves.http_client.ResponseNotReady, + socket.timeout, ] def ShouldRetry(err): + if isinstance(err, OSError) and err.errno == errno.ETIMEDOUT: + return True for exception_type in RETRYABLE_EXCEPTION_TYPES: if isinstance(err, exception_type): return True From 1cd0c9ffdf5ed390287e1e9cfd386f7db873a874 Mon Sep 17 00:00:00 2001 From: ruvan Date: Fri, 28 Mar 2025 16:22:13 -0700 Subject: [PATCH 26/27] Set chunk size to 2GB (#13) --- client/v2_2/docker_session_.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/client/v2_2/docker_session_.py b/client/v2_2/docker_session_.py index fcc33b4c0..5014a7269 100755 --- a/client/v2_2/docker_session_.py +++ b/client/v2_2/docker_session_.py @@ -32,8 +32,10 @@ import six.moves.urllib.parse -# 200 MB chunk to balance performance in poor networking conditions -UPLOAD_CHUNK_MAX_SIZE = int(2e8) +# 200 MB chunk balances performance in poor networking conditions +# However we need to use 2GB for now for CMv2 to avoid hitting the partial upload api +# GCR registry unexpectedly drops partial upload connections (us-central1-docker.pkg.dev) +UPLOAD_CHUNK_MAX_SIZE = int(2e9) def _exceed_max_chunk_size(image_body): From 0802a556cff35a79e42b514e313231d31ed2d597 Mon Sep 17 00:00:00 2001 From: kevinqian-db <151585552+kevinqian-db@users.noreply.github.com> Date: Tue, 8 Apr 2025 16:51:14 -0700 Subject: [PATCH 27/27] Make push chunk size configurable (#14) * Make fast pusher chunk size configurable * Set 1GB as default * Typo * Revert to 2GB default --- client/v2_2/docker_session_.py | 21 ++++++++++----------- tools/fast_pusher_.py | 12 +++++++++++- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/client/v2_2/docker_session_.py b/client/v2_2/docker_session_.py index 5014a7269..d19cdc152 100755 --- a/client/v2_2/docker_session_.py +++ b/client/v2_2/docker_session_.py @@ -32,14 +32,11 @@ import six.moves.urllib.parse -# 200 MB chunk balances performance in poor networking conditions -# However we need to use 2GB for now for CMv2 to avoid hitting the partial upload api +# 200 MB chunk balances performance in poor networking conditions. Should only be used for slower registries. +# We need to use 2GB for now for CMv2 to avoid hitting the partial upload api # GCR registry unexpectedly drops partial upload connections (us-central1-docker.pkg.dev) -UPLOAD_CHUNK_MAX_SIZE = int(2e9) - - -def _exceed_max_chunk_size(image_body): - return len(image_body) > UPLOAD_CHUNK_MAX_SIZE +UPLOAD_CHUNK_SIZE_MAX = int(2e9) +UPLOAD_CHUNK_SIZE_MIN = int(2e7) def _tag_or_digest(name): @@ -58,7 +55,8 @@ def __init__(self, creds, transport, mount = None, - threads = 1): + threads = 1, + chunk_size = UPLOAD_CHUNK_SIZE_MAX): """Constructor. If multiple threads are used, the caller *must* ensure that the provided @@ -80,6 +78,7 @@ def __init__(self, docker_http.PUSH) self._mount = mount self._threads = threads + self._chunk_size = chunk_size def _scheme_and_host(self): return '{scheme}://{registry}'.format( @@ -177,8 +176,8 @@ def _patch_chunked_upload_image_body(self, image_body, digest): location = self._get_absolute_url(location) # Upload the content in chunks. Invoked at least once to get the response. - for i in range(0, len(image_body), UPLOAD_CHUNK_MAX_SIZE): - chunk = image_body[i:i + UPLOAD_CHUNK_MAX_SIZE] + for i in range(0, len(image_body), self._chunk_size): + chunk = image_body[i:i + self._chunk_size] chunk_start, chunk_end_inclusive = i, i + len(chunk) - 1 logging.info('Pushing chunk(%d) for layer %s', i, digest) resp, unused_content = self._transport.Request( @@ -210,7 +209,7 @@ def _patch_upload(self, image, image_body = self._get_blob(image, digest) # When the layer is too large, registry might reject. # In this case, we need to do chunk upload. - if _exceed_max_chunk_size(image_body): + if len(image_body) > self._chunk_size: logging.info('Uploading layer %s in chunks.', digest) self._patch_chunked_upload_image_body(image_body, digest) return diff --git a/tools/fast_pusher_.py b/tools/fast_pusher_.py index 05896762d..3b6a18a5e 100755 --- a/tools/fast_pusher_.py +++ b/tools/fast_pusher_.py @@ -95,6 +95,13 @@ 'certfile is a PEM formatted certificate chain file. ' + 'If the key/cert does not exist it will be ignored.') +parser.add_argument( + '--chunk-size', + type=int, + default=docker_session.UPLOAD_CHUNK_SIZE_MAX, + required=False, + help='The size of the upload chunk in bytes. Defaults to 2e9 (2 GB).') + _THREADS = 8 @@ -193,6 +200,9 @@ def main(): logging.fatal("Local kube cert expired/is not present. Please run './eng-tools/bin/get-kube-access dev'") sys.exit(1) + if args.chunk_size < docker_session.UPLOAD_CHUNK_SIZE_MIN or args.chunk_size > docker_session.UPLOAD_CHUNK_SIZE_MAX: + logging.warning('The upload chunk size ' + str(args.chunk_size) + ' is not within [20MB, 2GB] range. This may cause performance issues.') + logging.info('Loading v2.2 image from disk ...') with v2_2_image.FromDisk( config, @@ -210,7 +220,7 @@ def main(): try: with docker_session.Push( - name, creds, transport, threads=_THREADS) as session: + name, creds, transport, threads=_THREADS, chunk_size=args.chunk_size) as session: logging.info('Starting upload ...') if args.oci: with oci_compat.OCIFromV22(v2_2_img) as oci_img: