Upgrade to SageMaker v2 including changes:

brightsparc · brightsparc · commit 3013c34e1ada · 2020-08-10T21:03:54.000+10:00
* image -&gt; image_uri
* train_instance_count -&gt; instance_count
* train_instance_type -&gt; instance_type
* train_max_run -&gt; max_run
* train_max_run_wait  -&gt; max_run_wait
* train_volume_size  -&gt;  volume_size
* sagemaker.session.s3_input -&gt; sagemaker.inputs.TrainingInput
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,3 @@
-sagemaker>=1.71.0
+sagemaker>=2.0.0
 boto3>=1.9.213
 pyyaml
diff --git a/src/stepfunctions/steps/sagemaker.py b/src/stepfunctions/steps/sagemaker.py
@@ -36,12 +36,12 @@ def __init__(self, state_id, estimator, job_name, data=None, hyperparameters=Non
             data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator, as this can take any of the following forms:
 
                 * (str) - The S3 location where training data is saved.
-                * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple
+                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
                     channels for training data, you can specify a dict mapping channel names to
-                    strings or :func:`~sagemaker.session.s3_input` objects.
-                * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can
+                    strings or :func:`~sagemaker.inputs.TrainingInput` objects.
+                * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
                     provide additional information about the training dataset. See
-                    :func:`sagemaker.session.s3_input` for full details.
+                    :func:`sagemaker.inputs.TrainingInput` for full details.
                 * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                     Amazon :class:`Record` objects serialized and stored in S3.
                     For use with an estimator for an Amazon algorithm.
@@ -202,7 +202,7 @@ def __init__(self, state_id, model, model_name=None, instance_type=None, tags=No
             tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
         """
         if isinstance(model, FrameworkModel):
-            parameters = model_config(model=model, instance_type=instance_type, role=model.role, image=model.image)
+            parameters = model_config(model=model, instance_type=instance_type, role=model.role, image_uri=model.image_uri)
             if model_name:
                 parameters['ModelName'] = model_name
         elif isinstance(model, Model):
@@ -211,7 +211,7 @@ def __init__(self, state_id, model, model_name=None, instance_type=None, tags=No
                 'ModelName': model_name or model.name,
                 'PrimaryContainer': {
                     'Environment': {},
-                    'Image': model.image,
+                    'Image': model.image_uri,
                     'ModelDataUrl': model.model_data
                 }
             }
@@ -322,12 +322,12 @@ def __init__(self, state_id, tuner, job_name, data, wait_for_completion=True, ta
             data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator in the tuner, as this can take any of the following forms:
 
                 * (str) - The S3 location where training data is saved.
-                * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple
+                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
                     channels for training data, you can specify a dict mapping channel names to
-                    strings or :func:`~sagemaker.session.s3_input` objects.
-                * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can
+                    strings or :func:`~sagemaker.inputs.TrainingInput` objects.
+                * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
                     provide additional information about the training dataset. See
-                    :func:`sagemaker.session.s3_input` for full details.
+                    :func:`sagemaker.inputs.TrainingInput` for full details.
                 * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                     Amazon :class:`Record` objects serialized and stored in S3.
                     For use with an estimator for an Amazon algorithm.
diff --git a/src/stepfunctions/template/pipeline/inference.py b/src/stepfunctions/template/pipeline/inference.py
@@ -48,8 +48,8 @@ def __init__(self, preprocessor, estimator, inputs, s3_bucket, role, client=None
             inputs: Information about the training data. Please refer to the `fit()` method of the associated estimator, as this can take any of the following forms:
 
                 * (str) - The S3 location where training data is saved.
-                * (dict[str, str] or dict[str, `sagemaker.session.s3_input`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.session.s3_input` objects.
-                * (`sagemaker.session.s3_input`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.session.s3_input` for full details.
+                * (dict[str, str] or dict[str, `sagemaker.inputs.TrainingInput`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.inputs.TrainingInput` objects.
+                * (`sagemaker.inputs.TrainingInput`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.inputs.TrainingInput` for full details.
                 * (`sagemaker.amazon.amazon_estimator.RecordSet`) - A collection of Amazon `Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm.
                 * (list[`sagemaker.amazon.amazon_estimator.RecordSet`]) - A list of `sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data.
             s3_bucket (str): S3 bucket under which the output artifacts from the training job will be stored. The parent path used is built using the format: ``s3://{s3_bucket}/{pipeline_name}/models/{job_name}/``. In this format, `pipeline_name` refers to the keyword argument provided for TrainingPipeline. If a `pipeline_name` argument was not provided, one is auto-generated by the pipeline as `training-pipeline-<timestamp>`. Also, in the format, `job_name` refers to the job name provided when calling the :meth:`TrainingPipeline.run()` method.
@@ -87,8 +87,8 @@ def build_workflow_definition(self):
         """
         default_name = self.pipeline_name
 
-        train_instance_type = self.preprocessor.train_instance_type
-        train_instance_count = self.preprocessor.train_instance_count
+        instance_type = self.preprocessor.instance_type
+        instance_count = self.preprocessor.instance_count
 
         # Preprocessor for feature transformation
         preprocessor_train_step = TrainingStep(
@@ -100,13 +100,13 @@ def build_workflow_definition(self):
         preprocessor_model = self.preprocessor.create_model()
         preprocessor_model_step = ModelStep(
             StepId.CreatePreprocessorModel.value,
-            instance_type=train_instance_type,
+            instance_type=instance_type,
             model=preprocessor_model,
             model_name=default_name
         )
         preprocessor_transform_step = TransformStep(
             StepId.TransformInput.value,
-            transformer=self.preprocessor.transformer(instance_count=train_instance_count, instance_type=train_instance_type, max_payload=20),
+            transformer=self.preprocessor.transformer(instance_count=instance_count, instance_type=instance_type, max_payload=20),
             job_name=default_name,
             model_name=default_name,
             data=self.inputs['train'],
@@ -115,8 +115,8 @@ def build_workflow_definition(self):
         )
 
         # Training
-        train_instance_type = self.estimator.train_instance_type
-        train_instance_count = self.estimator.train_instance_count
+        instance_type = self.estimator.instance_type
+        instance_count = self.estimator.instance_count
 
         training_step = TrainingStep(
             StepId.Train.value,
@@ -135,21 +135,21 @@ def build_workflow_definition(self):
         )
         pipeline_model_step = ModelStep(
             StepId.CreatePipelineModel.value,
-            instance_type=train_instance_type,
+            instance_type=instance_type,
             model=preprocessor_model,
             model_name=default_name
         )
-        pipeline_model_step.parameters = self.pipeline_model_config(train_instance_type, pipeline_model)
+        pipeline_model_step.parameters = self.pipeline_model_config(instance_type, pipeline_model)
 
-        deployable_model = Model(model_data='', image='')
+        deployable_model = Model(model_data='', image_uri='')
 
         # Deployment
         endpoint_config_step = EndpointConfigStep(
             StepId.ConfigureEndpoint.value,
             endpoint_config_name=default_name,
             model_name=default_name,
-            initial_instance_count=train_instance_count,
-            instance_type=train_instance_type
+            initial_instance_count=instance_count,
+            instance_type=instance_type
         )
 
         deploy_step = EndpointStep(
diff --git a/src/stepfunctions/template/pipeline/train.py b/src/stepfunctions/template/pipeline/train.py
@@ -43,8 +43,8 @@ def __init__(self, estimator, role, inputs, s3_bucket, client=None, **kwargs):
             inputs: Information about the training data. Please refer to the `fit()` method of the associated estimator, as this can take any of the following forms:
 
                 * (str) - The S3 location where training data is saved.
-                * (dict[str, str] or dict[str, `sagemaker.session.s3_input`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.session.s3_input` objects.
-                * (`sagemaker.session.s3_input`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.session.s3_input` for full details.
+                * (dict[str, str] or dict[str, `sagemaker.inputs.TrainingInput`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.inputs.TrainingInput` objects.
+                * (`sagemaker.inputs.TrainingInput`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.inputs.TrainingInput` for full details.
                 * (`sagemaker.amazon.amazon_estimator.RecordSet`) - A collection of Amazon `Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm.
                 * (list[`sagemaker.amazon.amazon_estimator.RecordSet`]) - A list of `sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data.
             s3_bucket (str): S3 bucket under which the output artifacts from the training job will be stored. The parent path used is built using the format: ``s3://{s3_bucket}/{pipeline_name}/models/{job_name}/``. In this format, `pipeline_name` refers to the keyword argument provided for TrainingPipeline. If a `pipeline_name` argument was not provided, one is auto-generated by the pipeline as `training-pipeline-<timestamp>`. Also, in the format, `job_name` refers to the job name provided when calling the :meth:`TrainingPipeline.run()` method.
@@ -79,8 +79,8 @@ def build_workflow_definition(self):
         """
         default_name = self.pipeline_name
 
-        train_instance_type = self.estimator.train_instance_type
-        train_instance_count = self.estimator.train_instance_count
+        instance_type = self.estimator.instance_type
+        instance_count = self.estimator.instance_count
 
         training_step = TrainingStep(
             StepId.Train.value,
@@ -92,7 +92,7 @@ def build_workflow_definition(self):
         model = self.estimator.create_model()
         model_step = ModelStep(
             StepId.CreateModel.value,
-            instance_type=train_instance_type,
+            instance_type=instance_type,
             model=model,
             model_name=default_name
         )
@@ -101,8 +101,8 @@ def build_workflow_definition(self):
             StepId.ConfigureEndpoint.value,
             endpoint_config_name=default_name,
             model_name=default_name,
-            initial_instance_count=train_instance_count,
-            instance_type=train_instance_type
+            initial_instance_count=instance_count,
+            instance_type=instance_type
         )
         deploy_step = EndpointStep(
             StepId.Deploy.value,
diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py
@@ -53,8 +53,8 @@ def sagemaker_role_arn(aws_account_id):
 def pca_estimator_fixture(sagemaker_role_arn):
     estimator = pca.PCA(
         role=sagemaker_role_arn,
-        train_instance_count=1,
-        train_instance_type="ml.m5.large",
+        instance_count=1,
+        instance_type="ml.m5.large",
         num_components=48
     )
     return estimator
diff --git a/tests/integ/test_inference_pipeline.py b/tests/integ/test_inference_pipeline.py
@@ -45,7 +45,7 @@ def sklearn_preprocessor(sagemaker_role_arn, sagemaker_session):
     sklearn_preprocessor = SKLearn(
         entry_point=script_path,
         role=sagemaker_role_arn,
-        train_instance_type="ml.m5.large",
+        instance_type="ml.m5.large",
         sagemaker_session=sagemaker_session,
         hyperparameters={"epochs": 1},
     )
@@ -60,7 +60,7 @@ def sklearn_estimator(sagemaker_role_arn, sagemaker_session):
     sklearn_estimator = SKLearn(
         entry_point=script_path,
         role=sagemaker_role_arn,
-        train_instance_type="ml.m5.large",
+        instance_type="ml.m5.large",
         sagemaker_session=sagemaker_session,
         hyperparameters={"epochs": 1},
         input_mode='File'
diff --git a/tests/integ/test_sagemaker_steps.py b/tests/integ/test_sagemaker_steps.py
@@ -254,8 +254,8 @@ def test_tuning_step(sfn_client, record_set_for_hyperparameter_tuning, sagemaker
 
     kmeans = KMeans(
         role=sagemaker_role_arn,
-        train_instance_count=1,
-        train_instance_type=INSTANCE_TYPE,
+        instance_count=1,
+        instance_type=INSTANCE_TYPE,
         k=10
     )
 
diff --git a/tests/integ/test_training_pipeline_estimators.py b/tests/integ/test_training_pipeline_estimators.py
@@ -50,8 +50,8 @@ def pca_estimator(sagemaker_role_arn):
     pca_estimator = PCA(
         role=sagemaker_role_arn,
         num_components=1,
-        train_instance_count=1,
-        train_instance_type='ml.m5.large',
+        instance_count=1,
+        instance_type='ml.m5.large',
         )
 
     pca_estimator.feature_dim=500
diff --git a/tests/integ/test_training_pipeline_framework_estimator.py b/tests/integ/test_training_pipeline_framework_estimator.py
@@ -36,8 +36,8 @@ def torch_estimator(sagemaker_role_arn):
         entry_point=script_path,
         role=sagemaker_role_arn,
         framework_version='1.2.0',
-        train_instance_count=1,
-        train_instance_type='ml.m5.large',
+        instance_count=1,
+        instance_type='ml.m5.large',
         hyperparameters={
             'epochs': 6,
             'backend': 'gloo'
@@ -50,8 +50,8 @@ def sklearn_estimator(sagemaker_role_arn):
     return SKLearn(
         entry_point=script_path,
         role=sagemaker_role_arn,
-        train_instance_count=1,
-        train_instance_type='ml.m5.large',
+        instance_count=1,
+        instance_type='ml.m5.large',
         framework_version='0.20.0',
         hyperparameters={
             "epochs": 1
diff --git a/tests/unit/test_pipeline.py b/tests/unit/test_pipeline.py
@@ -36,8 +36,8 @@ def pca_estimator():
     pca = sagemaker.estimator.Estimator(
         PCA_IMAGE,
         role=SAGEMAKER_EXECUTION_ROLE,
-        train_instance_count=1,
-        train_instance_type='ml.c4.xlarge',
+        instance_count=1,
+        instance_type='ml.c4.xlarge',
         output_path=s3_output_location,
         sagemaker_session=sagemaker_session
     )
@@ -62,7 +62,7 @@ def sklearn_preprocessor():
     sklearn_preprocessor = SKLearn(
         entry_point=script_path,
         role=SAGEMAKER_EXECUTION_ROLE,
-        train_instance_type="ml.c4.xlarge",
+        instance_type="ml.c4.xlarge",
         source_dir=source_dir,
         sagemaker_session=sagemaker_session
     )
@@ -82,10 +82,10 @@ def linear_learner_estimator():
     ll_estimator = sagemaker.estimator.Estimator(
         LINEAR_LEARNER_IMAGE,
         SAGEMAKER_EXECUTION_ROLE, 
-        train_instance_count=1, 
-        train_instance_type='ml.c4.xlarge',
-        train_volume_size=20,
-        train_max_run=3600,
+        instance_count=1, 
+        instance_type='ml.c4.xlarge',
+        volume_size=20,
+        max_run=3600,
         input_mode='File',
         output_path=s3_output_location,
         sagemaker_session=sagemaker_session
diff --git a/tests/unit/test_sagemaker_steps.py b/tests/unit/test_sagemaker_steps.py
@@ -44,8 +44,8 @@ def pca_estimator():
     pca = sagemaker.estimator.Estimator(
         PCA_IMAGE,
         role=EXECUTION_ROLE,
-        train_instance_count=1,
-        train_instance_type='ml.c4.xlarge',
+        instance_count=1,
+        instance_type='ml.c4.xlarge',
         output_path=s3_output_location
     )
 
@@ -90,8 +90,8 @@ def pca_estimator_with_debug_hook():
     pca = sagemaker.estimator.Estimator(
         PCA_IMAGE,
         role=EXECUTION_ROLE,
-        train_instance_count=1,
-        train_instance_type='ml.c4.xlarge',
+        instance_count=1,
+        instance_type='ml.c4.xlarge',
         output_path=s3_output_location,
         debugger_hook_config = hook_config,
         rules=rules
@@ -116,7 +116,7 @@ def pca_model():
     model_data = 's3://sagemaker/models/pca.tar.gz'
     return Model(
         model_data=model_data,
-        image=PCA_IMAGE,
+        image_uri=PCA_IMAGE,
         role=EXECUTION_ROLE,
         name='pca-model'
     )
@@ -140,8 +140,8 @@ def tensorflow_estimator():
         framework_version='1.13',
         training_steps=1000,
         evaluation_steps=100,
-        train_instance_count=1,
-        train_instance_type='ml.p2.xlarge',
+        instance_count=1,
+        instance_type='ml.p2.xlarge',
         output_path=s3_output_location,
         source_dir=s3_source_location,
         image_name=TENSORFLOW_IMAGE,
@@ -460,7 +460,7 @@ def test_get_expected_model(pca_estimator):
             'ModelName': 'pca-model',
             'PrimaryContainer': {
                 'Environment': {},
-                'Image': expected_model.image,
+                'Image': expected_model.image_uri,
                 'ModelDataUrl.$': "$['ModelArtifacts']['S3ModelArtifacts']"
             }
         },
@@ -492,7 +492,7 @@ def test_get_expected_model_with_framework_estimator(tensorflow_estimator):
                     'SAGEMAKER_CONTAINER_LOG_LEVEL': '20',
                     'SAGEMAKER_REGION': 'us-east-1',
                 },
-                'Image': expected_model.image,
+                'Image': expected_model.image_uri,
                 'ModelDataUrl.$': "$['ModelArtifacts']['S3ModelArtifacts']"
             }
         },
@@ -509,7 +509,7 @@ def test_model_step_creation(pca_model):
             'ModelName': 'pca-model',
             'PrimaryContainer': {
                 'Environment': {},
-                'Image': pca_model.image,
+                'Image': pca_model.image_uri,
                 'ModelDataUrl': pca_model.model_data
             },
             'Tags': DEFAULT_TAGS_LIST

Original file line number	Diff line number	Diff line change
`@@ -254,8 +254,8 @@ def test_tuning_step(sfn_client, record_set_for_hyperparameter_tuning, sagemaker`
`254`	`254`
`255`	`255`	`kmeans = KMeans(`
`256`	`256`	`role=sagemaker_role_arn,`
`257`		`- train_instance_count=1,`
`258`		`- train_instance_type=INSTANCE_TYPE,`
	`257`	`+ instance_count=1,`
	`258`	`+ instance_type=INSTANCE_TYPE,`
`259`	`259`	`k=10`
`260`	`260`	`)`
`261`	`261`