diff --git a/mlflow/sagemaker/__init__.py b/mlflow/sagemaker/__init__.py index b276e6778d969..c0a7ee404c17d 100644 --- a/mlflow/sagemaker/__init__.py +++ b/mlflow/sagemaker/__init__.py @@ -15,6 +15,11 @@ DEFAULT_IMAGE_NAME = "mlflow_sage" +PYFUNC_IMAGE_URL = "707343435239.dkr.ecr.us-west-2.amazonaws.com/mlflow-pyfunc-test:latest" +DEFAULT_IMAGE_URL = PYFUNC_IMAGE_URL + +DEFAULT_BUCKET_NAME_PREFIX = "mlflow-sagemaker" + _DOCKERFILE_TEMPLATE = """ # Build an image that can serve pyfunc model in SageMaker FROM ubuntu:16.04 @@ -134,8 +139,8 @@ def push_image_to_ecr(image=DEFAULT_IMAGE_NAME): os.system(cmd) -def deploy(app_name, model_path, execution_role_arn, bucket, run_id=None, - image="mlflow_sage", region_name="us-west-2"): +def deploy(app_name, model_path, bucket=None, image_url=DEFAULT_IMAGE_URL, run_id=None, + region_name="us-west-2", execution_role_arn=None): """ Deploy model on Sagemaker. Current active AWS account needs to have correct permissions setup. @@ -146,23 +151,31 @@ def deploy(app_name, model_path, execution_role_arn, bucket, run_id=None, :param execution_role_arn: Amazon execution role with sagemaker rights :param bucket: S3 bucket where model artifacts are gonna be stored :param run_id: MLflow run id. - :param image: name of the Docker image to be used. + :param image_url: URL of the ECR-hosted docker image to be used :param region_name: Name of the AWS region to deploy to. """ prefix = model_path if run_id: model_path = _get_model_log_dir(model_path, run_id) - prefix = run_id + "/" + prefix + prefix = os.path.join(run_id, prefix) run_id = _check_compatible(model_path) + + if bucket is None: + # Attempt to create a default bucket + eprint("No model data bucket specified, using the default bucket") + bucket = _get_default_s3_bucket(region_name) + + if execution_role_arn is None: + execution_role_arn = _get_assumed_role_arn() + model_s3_path = _upload_s3(local_model_path=model_path, bucket=bucket, prefix=prefix) _deploy(role=execution_role_arn, - image=image, + image_url=image_url, app_name=app_name, model_s3_path=model_s3_path, run_id=run_id, region_name=region_name) - def run_local(model_path, run_id=None, port=5000, image=DEFAULT_IMAGE_NAME): """ Serve model locally in a SageMaker compatible Docker container. @@ -212,6 +225,48 @@ def _make_tarfile(output_filename, source_dir): for f in os.listdir(source_dir): tar.add(os.path.join(source_dir, f), arcname=f) +def _get_account_id(): + sess = boto3.Session() + sts_client = sess.client("sts") + identity_info = sts_client.get_caller_identity() + account_id = identity_info["Account"] + return account_id + +def _get_assumed_role_arn(): + """ + :return: ARN of the user's current IAM role + """ + sess = boto3.Session() + sts_client = sess.client("sts") + identity_info = sts_client.get_caller_identity() + sts_arn = identity_info["Arn"] + role_name = sts_arn.split("/")[1] + iam_client = sess.client("iam") + role_response = iam_client.get_role(RoleName=role_name) + return role_response["Role"]["Arn"] + +def _get_default_s3_bucket(region_name): + # create bucket if it does not exist + sess = boto3.Session() + account_id = _get_account_id() + region_name = sess.region_name or "us-west-2" + bucket_name = "{pfx}-{rn}-{aid}".format(pfx=DEFAULT_BUCKET_NAME_PREFIX, rn=region_name, aid=account_id) + s3 = sess.client('s3') + response = s3.list_buckets() + buckets = [b['Name'] for b in response["Buckets"]] + if not bucket_name in buckets: + eprint("Default bucket `%s` not found. Creating..." % bucket_name) + response = s3.create_bucket( + ACL='bucket-owner-full-control', + Bucket=bucket_name, + CreateBucketConfiguration={ + 'LocationConstraint': region_name, + }, + ) + eprint(response) + else: + eprint("Default bucket `%s` already exists. Skipping creation." % bucket_name) + return bucket_name def _upload_s3(local_model_path, bucket, prefix): """ @@ -236,28 +291,24 @@ def _upload_s3(local_model_path, bucket, prefix): Tagging={'TagSet': [{'Key': 'SageMaker', 'Value': 'true'}, ]} ) eprint('tag response', response) - return '{}/{}/{}'.format(s3.meta.endpoint_url, bucket, key) - + return "/".join(map(lambda x: str(x).rstrip('/'), [s3.meta.endpoint_url, bucket, key])) -def _deploy(role, image, app_name, model_s3_path, run_id, region_name): +def _deploy(role, image_url, app_name, model_s3_path, run_id, region_name): """ Deploy model on sagemaker. :param role: SageMaker execution ARN role - :param image: Name of the Docker image the model is being deployed into + :param image_url: URL of the ECR-hosted docker image the model is being deployed into :param app_name: Name of the deployed app :param model_s3_path: s3 path where we stored the model artifacts :param run_id: RunId that generated this model """ sage_client = boto3.client('sagemaker', region_name=region_name) - ecr_client = boto3.client("ecr") - repository_conf = ecr_client.describe_repositories( - repositoryNames=[image])['repositories'][0] model_name = app_name + '-model' model_response = sage_client.create_model( ModelName=model_name, PrimaryContainer={ 'ContainerHostname': 'mlflow-serve-%s' % model_name, - 'Image': repository_conf["repositoryUri"], + 'Image': image_url, 'ModelDataUrl': model_s3_path, 'Environment': {}, }, diff --git a/mlflow/sagemaker/container/__init__.py b/mlflow/sagemaker/container/__init__.py index c904ee601a714..7fd4a66716f5d 100644 --- a/mlflow/sagemaker/container/__init__.py +++ b/mlflow/sagemaker/container/__init__.py @@ -66,6 +66,9 @@ def _serve(): print("activating custom environment") env = conf[pyfunc.ENV] env_path_dst = os.path.join("/opt/mlflow/", env) + env_path_dst_dir = os.path.dirname(env_path_dst) + if not os.path.exists(env_path_dst_dir): + os.makedirs(env_path_dst_dir) # /opt/ml/ is read-only, we need to copy the env elsewhere before importing it shutil.copy(src=os.path.join("/opt/ml/model/", env), dst=env_path_dst) os.system("conda env create -n custom_env -f {}".format(env_path_dst))