From 2801d26061e34609d6f6154fafdac668907493d2 Mon Sep 17 00:00:00 2001 From: Corey Zumar Date: Tue, 26 Jun 2018 16:52:34 -0500 Subject: [PATCH 1/8] DB deployment support for cluster IAM role --- mlflow/sagemaker/__init__.py | 77 +++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/mlflow/sagemaker/__init__.py b/mlflow/sagemaker/__init__.py index b276e6778d969..5766cf6065cae 100644 --- a/mlflow/sagemaker/__init__.py +++ b/mlflow/sagemaker/__init__.py @@ -15,6 +15,11 @@ DEFAULT_IMAGE_NAME = "mlflow_sage" +PYFUNC_IMAGE_URL = "707343435239.dkr.ecr.us-west-2.amazonaws.com/mlflow-pyfunc-test:latest" +DEFAULT_IMAGE_URL = PYFUNC_IMAGE_URL + +DEFAULT_BUCKET_NAME_PREFIX = "mlflow-sagemaker" + _DOCKERFILE_TEMPLATE = """ # Build an image that can serve pyfunc model in SageMaker FROM ubuntu:16.04 @@ -134,8 +139,8 @@ def push_image_to_ecr(image=DEFAULT_IMAGE_NAME): os.system(cmd) -def deploy(app_name, model_path, execution_role_arn, bucket, run_id=None, - image="mlflow_sage", region_name="us-west-2"): +def deploy(app_name, model_path, bucket=None, image_url=DEFAULT_IMAGE_URL, run_id=None, + region_name="us-west-2", execution_role_arn=None): """ Deploy model on Sagemaker. Current active AWS account needs to have correct permissions setup. @@ -146,23 +151,28 @@ def deploy(app_name, model_path, execution_role_arn, bucket, run_id=None, :param execution_role_arn: Amazon execution role with sagemaker rights :param bucket: S3 bucket where model artifacts are gonna be stored :param run_id: MLflow run id. - :param image: name of the Docker image to be used. + :param image_url: URL of the ECR-hosted docker image to be used :param region_name: Name of the AWS region to deploy to. """ prefix = model_path if run_id: model_path = _get_model_log_dir(model_path, run_id) - prefix = run_id + "/" + prefix - run_id = _check_compatible(model_path) + prefix = os.path.join(run_id, prefix) + run_id = _check_compatible(model_path) + + if bucket is None: + # Attempt to create a default bucket + eprint("No model data bucket specified, using the default bucket") + bucket = _get_default_s3_bucket() + model_s3_path = _upload_s3(local_model_path=model_path, bucket=bucket, prefix=prefix) _deploy(role=execution_role_arn, - image=image, + image_url=image_url, app_name=app_name, model_s3_path=model_s3_path, run_id=run_id, region_name=region_name) - def run_local(model_path, run_id=None, port=5000, image=DEFAULT_IMAGE_NAME): """ Serve model locally in a SageMaker compatible Docker container. @@ -212,6 +222,47 @@ def _make_tarfile(output_filename, source_dir): for f in os.listdir(source_dir): tar.add(os.path.join(source_dir, f), arcname=f) +def _get_account_id(): + sess = boto3.Session() + sts_client = sess.client("sts") + identity_info = sts_client.get_caller_identity() + account_id = identity_info["Account"] + return account_id + +def _get_assumed_role_arn(): + """ + :return: ARN of the user's current IAM role + """ + sess = boto3.Session() + sts_client = sess.client("sts") + identity_info = sts_client.get_caller_identity() + sts_arn = identity_info["Arn"] + role_name = sts_arn.split("/")[1] + iam_client = sess.client("iam") + role_response = iam_client.get_role(RoleName=role_name) + return role_response["Role"]["Arn"] + +def _get_default_s3_bucket(): + # create bucket if it does not exist + account_id = _get_account_id(sess) + bucket_name = "{pfx}-{aid}".format(pfx=DEFAULT_BUCKET_NAME_PREFIX, aid=account_id) + sess = boto3.Session() + s3 = sess.client('s3') + response = s3.list_buckets() + buckets = [b['Name'] for b in response["Buckets"]] + if not bucket_name in buckets: + print("Default bucket `%s` not found. Creating..." % bucket_name) + response = s3.create_bucket( + ACL='bucket-owner-full-control', + Bucket=bucket_name, + CreateBucketConfiguration={ + 'LocationConstraint': _get_region(), + }, + ) + print(response) + else: + print("Default bucket `%s` already exists. Skipping creation." % bucket_name) + return bucket_name def _upload_s3(local_model_path, bucket, prefix): """ @@ -236,28 +287,24 @@ def _upload_s3(local_model_path, bucket, prefix): Tagging={'TagSet': [{'Key': 'SageMaker', 'Value': 'true'}, ]} ) eprint('tag response', response) - return '{}/{}/{}'.format(s3.meta.endpoint_url, bucket, key) - + return os.path.join(s3.meta.endpoint_url, bucket, key) -def _deploy(role, image, app_name, model_s3_path, run_id, region_name): +def _deploy(role, image_url, app_name, model_s3_path, run_id, region_name): """ Deploy model on sagemaker. :param role: SageMaker execution ARN role - :param image: Name of the Docker image the model is being deployed into + :param image_url: URL of the ECR-hosted docker image the model is being deployed into :param app_name: Name of the deployed app :param model_s3_path: s3 path where we stored the model artifacts :param run_id: RunId that generated this model """ sage_client = boto3.client('sagemaker', region_name=region_name) - ecr_client = boto3.client("ecr") - repository_conf = ecr_client.describe_repositories( - repositoryNames=[image])['repositories'][0] model_name = app_name + '-model' model_response = sage_client.create_model( ModelName=model_name, PrimaryContainer={ 'ContainerHostname': 'mlflow-serve-%s' % model_name, - 'Image': repository_conf["repositoryUri"], + 'Image': image_url, 'ModelDataUrl': model_s3_path, 'Environment': {}, }, From 435adc03ff4f622bb4e5036d4076f2061e3328d2 Mon Sep 17 00:00:00 2001 From: Corey Zumar Date: Tue, 26 Jun 2018 16:54:25 -0500 Subject: [PATCH 2/8] fix --- mlflow/sagemaker/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlflow/sagemaker/__init__.py b/mlflow/sagemaker/__init__.py index 5766cf6065cae..253bbeb35861d 100644 --- a/mlflow/sagemaker/__init__.py +++ b/mlflow/sagemaker/__init__.py @@ -244,7 +244,7 @@ def _get_assumed_role_arn(): def _get_default_s3_bucket(): # create bucket if it does not exist - account_id = _get_account_id(sess) + account_id = _get_account_id() bucket_name = "{pfx}-{aid}".format(pfx=DEFAULT_BUCKET_NAME_PREFIX, aid=account_id) sess = boto3.Session() s3 = sess.client('s3') From 463e251bfb139442fe615de42f6296428bfed8ca Mon Sep 17 00:00:00 2001 From: Corey Zumar Date: Tue, 26 Jun 2018 17:02:29 -0500 Subject: [PATCH 3/8] Fix region name --- mlflow/sagemaker/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlflow/sagemaker/__init__.py b/mlflow/sagemaker/__init__.py index 253bbeb35861d..315726c9bc168 100644 --- a/mlflow/sagemaker/__init__.py +++ b/mlflow/sagemaker/__init__.py @@ -163,7 +163,7 @@ def deploy(app_name, model_path, bucket=None, image_url=DEFAULT_IMAGE_URL, run_i if bucket is None: # Attempt to create a default bucket eprint("No model data bucket specified, using the default bucket") - bucket = _get_default_s3_bucket() + bucket = _get_default_s3_bucket(region_name) model_s3_path = _upload_s3(local_model_path=model_path, bucket=bucket, prefix=prefix) _deploy(role=execution_role_arn, @@ -242,7 +242,7 @@ def _get_assumed_role_arn(): role_response = iam_client.get_role(RoleName=role_name) return role_response["Role"]["Arn"] -def _get_default_s3_bucket(): +def _get_default_s3_bucket(region_name): # create bucket if it does not exist account_id = _get_account_id() bucket_name = "{pfx}-{aid}".format(pfx=DEFAULT_BUCKET_NAME_PREFIX, aid=account_id) @@ -256,7 +256,7 @@ def _get_default_s3_bucket(): ACL='bucket-owner-full-control', Bucket=bucket_name, CreateBucketConfiguration={ - 'LocationConstraint': _get_region(), + 'LocationConstraint': region_name, }, ) print(response) From 8da00a67274d43a4b5edeafbf832411b5f1d876c Mon Sep 17 00:00:00 2001 From: Corey Zumar Date: Fri, 29 Jun 2018 00:55:03 -0500 Subject: [PATCH 4/8] use assumed role ARN if none specified --- mlflow/sagemaker/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mlflow/sagemaker/__init__.py b/mlflow/sagemaker/__init__.py index 315726c9bc168..12b048859a329 100644 --- a/mlflow/sagemaker/__init__.py +++ b/mlflow/sagemaker/__init__.py @@ -18,7 +18,7 @@ PYFUNC_IMAGE_URL = "707343435239.dkr.ecr.us-west-2.amazonaws.com/mlflow-pyfunc-test:latest" DEFAULT_IMAGE_URL = PYFUNC_IMAGE_URL -DEFAULT_BUCKET_NAME_PREFIX = "mlflow-sagemaker" +DEFAULT_BUCKET_NAME_PREFIX = "mlflow-sagemaker" _DOCKERFILE_TEMPLATE = """ # Build an image that can serve pyfunc model in SageMaker @@ -165,6 +165,9 @@ def deploy(app_name, model_path, bucket=None, image_url=DEFAULT_IMAGE_URL, run_i eprint("No model data bucket specified, using the default bucket") bucket = _get_default_s3_bucket(region_name) + if execution_role_arn is None: + execution_role_arn = _get_assumed_role_arn() + model_s3_path = _upload_s3(local_model_path=model_path, bucket=bucket, prefix=prefix) _deploy(role=execution_role_arn, image_url=image_url, From 9af366998178525203d24e3d0599e2af551bdcb1 Mon Sep 17 00:00:00 2001 From: Corey Zumar Date: Fri, 29 Jun 2018 13:14:07 -0500 Subject: [PATCH 5/8] Container env fix --- mlflow/sagemaker/container/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mlflow/sagemaker/container/__init__.py b/mlflow/sagemaker/container/__init__.py index c904ee601a714..7fd4a66716f5d 100644 --- a/mlflow/sagemaker/container/__init__.py +++ b/mlflow/sagemaker/container/__init__.py @@ -66,6 +66,9 @@ def _serve(): print("activating custom environment") env = conf[pyfunc.ENV] env_path_dst = os.path.join("/opt/mlflow/", env) + env_path_dst_dir = os.path.dirname(env_path_dst) + if not os.path.exists(env_path_dst_dir): + os.makedirs(env_path_dst_dir) # /opt/ml/ is read-only, we need to copy the env elsewhere before importing it shutil.copy(src=os.path.join("/opt/ml/model/", env), dst=env_path_dst) os.system("conda env create -n custom_env -f {}".format(env_path_dst)) From 888c0666dce7d0ca63488edc2001c55760d84932 Mon Sep 17 00:00:00 2001 From: Corey Zumar Date: Fri, 29 Jun 2018 13:24:28 -0500 Subject: [PATCH 6/8] Add region to default bucket name --- mlflow/sagemaker/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mlflow/sagemaker/__init__.py b/mlflow/sagemaker/__init__.py index 12b048859a329..2aac2517b97ea 100644 --- a/mlflow/sagemaker/__init__.py +++ b/mlflow/sagemaker/__init__.py @@ -247,9 +247,10 @@ def _get_assumed_role_arn(): def _get_default_s3_bucket(region_name): # create bucket if it does not exist - account_id = _get_account_id() - bucket_name = "{pfx}-{aid}".format(pfx=DEFAULT_BUCKET_NAME_PREFIX, aid=account_id) sess = boto3.Session() + account_id = _get_account_id() + region_name = sess.region_name or "us-west-2" + bucket_name = "{pfx}-{rn}-{aid}".format(pfx=DEFAULT_BUCKET_NAME_PREFIX, rn=region_name, aid=account_id) s3 = sess.client('s3') response = s3.list_buckets() buckets = [b['Name'] for b in response["Buckets"]] From c2343b8a523e17e970ca83cdbe7d2254aa16bb5a Mon Sep 17 00:00:00 2001 From: Corey Zumar Date: Fri, 29 Jun 2018 13:26:31 -0500 Subject: [PATCH 7/8] print >> eprint --- mlflow/sagemaker/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlflow/sagemaker/__init__.py b/mlflow/sagemaker/__init__.py index 2aac2517b97ea..5ad491e689896 100644 --- a/mlflow/sagemaker/__init__.py +++ b/mlflow/sagemaker/__init__.py @@ -255,7 +255,7 @@ def _get_default_s3_bucket(region_name): response = s3.list_buckets() buckets = [b['Name'] for b in response["Buckets"]] if not bucket_name in buckets: - print("Default bucket `%s` not found. Creating..." % bucket_name) + eprint("Default bucket `%s` not found. Creating..." % bucket_name) response = s3.create_bucket( ACL='bucket-owner-full-control', Bucket=bucket_name, @@ -263,9 +263,9 @@ def _get_default_s3_bucket(region_name): 'LocationConstraint': region_name, }, ) - print(response) + eprint(response) else: - print("Default bucket `%s` already exists. Skipping creation." % bucket_name) + eprint("Default bucket `%s` already exists. Skipping creation." % bucket_name) return bucket_name def _upload_s3(local_model_path, bucket, prefix): From 7ead7a466275092ce041be03d26ef4684c49937e Mon Sep 17 00:00:00 2001 From: Corey Zumar Date: Fri, 29 Jun 2018 14:31:36 -0500 Subject: [PATCH 8/8] Address comments --- mlflow/sagemaker/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlflow/sagemaker/__init__.py b/mlflow/sagemaker/__init__.py index 5ad491e689896..c0a7ee404c17d 100644 --- a/mlflow/sagemaker/__init__.py +++ b/mlflow/sagemaker/__init__.py @@ -158,7 +158,7 @@ def deploy(app_name, model_path, bucket=None, image_url=DEFAULT_IMAGE_URL, run_i if run_id: model_path = _get_model_log_dir(model_path, run_id) prefix = os.path.join(run_id, prefix) - run_id = _check_compatible(model_path) + run_id = _check_compatible(model_path) if bucket is None: # Attempt to create a default bucket @@ -291,7 +291,7 @@ def _upload_s3(local_model_path, bucket, prefix): Tagging={'TagSet': [{'Key': 'SageMaker', 'Value': 'true'}, ]} ) eprint('tag response', response) - return os.path.join(s3.meta.endpoint_url, bucket, key) + return "/".join(map(lambda x: str(x).rstrip('/'), [s3.meta.endpoint_url, bucket, key])) def _deploy(role, image_url, app_name, model_s3_path, run_id, region_name): """