Skip to content

Commit 3013c34

Browse files
committed
Upgrade to SageMaker v2 including changes:
* image -> image_uri * train_instance_count -> instance_count * train_instance_type -> instance_type * train_max_run -> max_run * train_max_run_wait -> max_run_wait * train_volume_size -> volume_size * sagemaker.session.s3_input -> sagemaker.inputs.TrainingInput
1 parent 2a104d3 commit 3013c34

11 files changed

+60
-60
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
sagemaker>=1.71.0
1+
sagemaker>=2.0.0
22
boto3>=1.9.213
33
pyyaml

src/stepfunctions/steps/sagemaker.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,12 @@ def __init__(self, state_id, estimator, job_name, data=None, hyperparameters=Non
3636
data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator, as this can take any of the following forms:
3737
3838
* (str) - The S3 location where training data is saved.
39-
* (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple
39+
* (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
4040
channels for training data, you can specify a dict mapping channel names to
41-
strings or :func:`~sagemaker.session.s3_input` objects.
42-
* (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can
41+
strings or :func:`~sagemaker.inputs.TrainingInput` objects.
42+
* (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
4343
provide additional information about the training dataset. See
44-
:func:`sagemaker.session.s3_input` for full details.
44+
:func:`sagemaker.inputs.TrainingInput` for full details.
4545
* (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
4646
Amazon :class:`Record` objects serialized and stored in S3.
4747
For use with an estimator for an Amazon algorithm.
@@ -202,7 +202,7 @@ def __init__(self, state_id, model, model_name=None, instance_type=None, tags=No
202202
tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
203203
"""
204204
if isinstance(model, FrameworkModel):
205-
parameters = model_config(model=model, instance_type=instance_type, role=model.role, image=model.image)
205+
parameters = model_config(model=model, instance_type=instance_type, role=model.role, image_uri=model.image_uri)
206206
if model_name:
207207
parameters['ModelName'] = model_name
208208
elif isinstance(model, Model):
@@ -211,7 +211,7 @@ def __init__(self, state_id, model, model_name=None, instance_type=None, tags=No
211211
'ModelName': model_name or model.name,
212212
'PrimaryContainer': {
213213
'Environment': {},
214-
'Image': model.image,
214+
'Image': model.image_uri,
215215
'ModelDataUrl': model.model_data
216216
}
217217
}
@@ -322,12 +322,12 @@ def __init__(self, state_id, tuner, job_name, data, wait_for_completion=True, ta
322322
data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator in the tuner, as this can take any of the following forms:
323323
324324
* (str) - The S3 location where training data is saved.
325-
* (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple
325+
* (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
326326
channels for training data, you can specify a dict mapping channel names to
327-
strings or :func:`~sagemaker.session.s3_input` objects.
328-
* (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can
327+
strings or :func:`~sagemaker.inputs.TrainingInput` objects.
328+
* (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
329329
provide additional information about the training dataset. See
330-
:func:`sagemaker.session.s3_input` for full details.
330+
:func:`sagemaker.inputs.TrainingInput` for full details.
331331
* (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
332332
Amazon :class:`Record` objects serialized and stored in S3.
333333
For use with an estimator for an Amazon algorithm.

src/stepfunctions/template/pipeline/inference.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ def __init__(self, preprocessor, estimator, inputs, s3_bucket, role, client=None
4848
inputs: Information about the training data. Please refer to the `fit()` method of the associated estimator, as this can take any of the following forms:
4949
5050
* (str) - The S3 location where training data is saved.
51-
* (dict[str, str] or dict[str, `sagemaker.session.s3_input`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.session.s3_input` objects.
52-
* (`sagemaker.session.s3_input`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.session.s3_input` for full details.
51+
* (dict[str, str] or dict[str, `sagemaker.inputs.TrainingInput`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.inputs.TrainingInput` objects.
52+
* (`sagemaker.inputs.TrainingInput`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.inputs.TrainingInput` for full details.
5353
* (`sagemaker.amazon.amazon_estimator.RecordSet`) - A collection of Amazon `Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm.
5454
* (list[`sagemaker.amazon.amazon_estimator.RecordSet`]) - A list of `sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data.
5555
s3_bucket (str): S3 bucket under which the output artifacts from the training job will be stored. The parent path used is built using the format: ``s3://{s3_bucket}/{pipeline_name}/models/{job_name}/``. In this format, `pipeline_name` refers to the keyword argument provided for TrainingPipeline. If a `pipeline_name` argument was not provided, one is auto-generated by the pipeline as `training-pipeline-<timestamp>`. Also, in the format, `job_name` refers to the job name provided when calling the :meth:`TrainingPipeline.run()` method.
@@ -87,8 +87,8 @@ def build_workflow_definition(self):
8787
"""
8888
default_name = self.pipeline_name
8989

90-
train_instance_type = self.preprocessor.train_instance_type
91-
train_instance_count = self.preprocessor.train_instance_count
90+
instance_type = self.preprocessor.instance_type
91+
instance_count = self.preprocessor.instance_count
9292

9393
# Preprocessor for feature transformation
9494
preprocessor_train_step = TrainingStep(
@@ -100,13 +100,13 @@ def build_workflow_definition(self):
100100
preprocessor_model = self.preprocessor.create_model()
101101
preprocessor_model_step = ModelStep(
102102
StepId.CreatePreprocessorModel.value,
103-
instance_type=train_instance_type,
103+
instance_type=instance_type,
104104
model=preprocessor_model,
105105
model_name=default_name
106106
)
107107
preprocessor_transform_step = TransformStep(
108108
StepId.TransformInput.value,
109-
transformer=self.preprocessor.transformer(instance_count=train_instance_count, instance_type=train_instance_type, max_payload=20),
109+
transformer=self.preprocessor.transformer(instance_count=instance_count, instance_type=instance_type, max_payload=20),
110110
job_name=default_name,
111111
model_name=default_name,
112112
data=self.inputs['train'],
@@ -115,8 +115,8 @@ def build_workflow_definition(self):
115115
)
116116

117117
# Training
118-
train_instance_type = self.estimator.train_instance_type
119-
train_instance_count = self.estimator.train_instance_count
118+
instance_type = self.estimator.instance_type
119+
instance_count = self.estimator.instance_count
120120

121121
training_step = TrainingStep(
122122
StepId.Train.value,
@@ -135,21 +135,21 @@ def build_workflow_definition(self):
135135
)
136136
pipeline_model_step = ModelStep(
137137
StepId.CreatePipelineModel.value,
138-
instance_type=train_instance_type,
138+
instance_type=instance_type,
139139
model=preprocessor_model,
140140
model_name=default_name
141141
)
142-
pipeline_model_step.parameters = self.pipeline_model_config(train_instance_type, pipeline_model)
142+
pipeline_model_step.parameters = self.pipeline_model_config(instance_type, pipeline_model)
143143

144-
deployable_model = Model(model_data='', image='')
144+
deployable_model = Model(model_data='', image_uri='')
145145

146146
# Deployment
147147
endpoint_config_step = EndpointConfigStep(
148148
StepId.ConfigureEndpoint.value,
149149
endpoint_config_name=default_name,
150150
model_name=default_name,
151-
initial_instance_count=train_instance_count,
152-
instance_type=train_instance_type
151+
initial_instance_count=instance_count,
152+
instance_type=instance_type
153153
)
154154

155155
deploy_step = EndpointStep(

src/stepfunctions/template/pipeline/train.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ def __init__(self, estimator, role, inputs, s3_bucket, client=None, **kwargs):
4343
inputs: Information about the training data. Please refer to the `fit()` method of the associated estimator, as this can take any of the following forms:
4444
4545
* (str) - The S3 location where training data is saved.
46-
* (dict[str, str] or dict[str, `sagemaker.session.s3_input`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.session.s3_input` objects.
47-
* (`sagemaker.session.s3_input`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.session.s3_input` for full details.
46+
* (dict[str, str] or dict[str, `sagemaker.inputs.TrainingInput`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.inputs.TrainingInput` objects.
47+
* (`sagemaker.inputs.TrainingInput`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.inputs.TrainingInput` for full details.
4848
* (`sagemaker.amazon.amazon_estimator.RecordSet`) - A collection of Amazon `Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm.
4949
* (list[`sagemaker.amazon.amazon_estimator.RecordSet`]) - A list of `sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data.
5050
s3_bucket (str): S3 bucket under which the output artifacts from the training job will be stored. The parent path used is built using the format: ``s3://{s3_bucket}/{pipeline_name}/models/{job_name}/``. In this format, `pipeline_name` refers to the keyword argument provided for TrainingPipeline. If a `pipeline_name` argument was not provided, one is auto-generated by the pipeline as `training-pipeline-<timestamp>`. Also, in the format, `job_name` refers to the job name provided when calling the :meth:`TrainingPipeline.run()` method.
@@ -79,8 +79,8 @@ def build_workflow_definition(self):
7979
"""
8080
default_name = self.pipeline_name
8181

82-
train_instance_type = self.estimator.train_instance_type
83-
train_instance_count = self.estimator.train_instance_count
82+
instance_type = self.estimator.instance_type
83+
instance_count = self.estimator.instance_count
8484

8585
training_step = TrainingStep(
8686
StepId.Train.value,
@@ -92,7 +92,7 @@ def build_workflow_definition(self):
9292
model = self.estimator.create_model()
9393
model_step = ModelStep(
9494
StepId.CreateModel.value,
95-
instance_type=train_instance_type,
95+
instance_type=instance_type,
9696
model=model,
9797
model_name=default_name
9898
)
@@ -101,8 +101,8 @@ def build_workflow_definition(self):
101101
StepId.ConfigureEndpoint.value,
102102
endpoint_config_name=default_name,
103103
model_name=default_name,
104-
initial_instance_count=train_instance_count,
105-
instance_type=train_instance_type
104+
initial_instance_count=instance_count,
105+
instance_type=instance_type
106106
)
107107
deploy_step = EndpointStep(
108108
StepId.Deploy.value,

tests/integ/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ def sagemaker_role_arn(aws_account_id):
5353
def pca_estimator_fixture(sagemaker_role_arn):
5454
estimator = pca.PCA(
5555
role=sagemaker_role_arn,
56-
train_instance_count=1,
57-
train_instance_type="ml.m5.large",
56+
instance_count=1,
57+
instance_type="ml.m5.large",
5858
num_components=48
5959
)
6060
return estimator

tests/integ/test_inference_pipeline.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def sklearn_preprocessor(sagemaker_role_arn, sagemaker_session):
4545
sklearn_preprocessor = SKLearn(
4646
entry_point=script_path,
4747
role=sagemaker_role_arn,
48-
train_instance_type="ml.m5.large",
48+
instance_type="ml.m5.large",
4949
sagemaker_session=sagemaker_session,
5050
hyperparameters={"epochs": 1},
5151
)
@@ -60,7 +60,7 @@ def sklearn_estimator(sagemaker_role_arn, sagemaker_session):
6060
sklearn_estimator = SKLearn(
6161
entry_point=script_path,
6262
role=sagemaker_role_arn,
63-
train_instance_type="ml.m5.large",
63+
instance_type="ml.m5.large",
6464
sagemaker_session=sagemaker_session,
6565
hyperparameters={"epochs": 1},
6666
input_mode='File'

tests/integ/test_sagemaker_steps.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,8 @@ def test_tuning_step(sfn_client, record_set_for_hyperparameter_tuning, sagemaker
254254

255255
kmeans = KMeans(
256256
role=sagemaker_role_arn,
257-
train_instance_count=1,
258-
train_instance_type=INSTANCE_TYPE,
257+
instance_count=1,
258+
instance_type=INSTANCE_TYPE,
259259
k=10
260260
)
261261

tests/integ/test_training_pipeline_estimators.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ def pca_estimator(sagemaker_role_arn):
5050
pca_estimator = PCA(
5151
role=sagemaker_role_arn,
5252
num_components=1,
53-
train_instance_count=1,
54-
train_instance_type='ml.m5.large',
53+
instance_count=1,
54+
instance_type='ml.m5.large',
5555
)
5656

5757
pca_estimator.feature_dim=500

tests/integ/test_training_pipeline_framework_estimator.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ def torch_estimator(sagemaker_role_arn):
3636
entry_point=script_path,
3737
role=sagemaker_role_arn,
3838
framework_version='1.2.0',
39-
train_instance_count=1,
40-
train_instance_type='ml.m5.large',
39+
instance_count=1,
40+
instance_type='ml.m5.large',
4141
hyperparameters={
4242
'epochs': 6,
4343
'backend': 'gloo'
@@ -50,8 +50,8 @@ def sklearn_estimator(sagemaker_role_arn):
5050
return SKLearn(
5151
entry_point=script_path,
5252
role=sagemaker_role_arn,
53-
train_instance_count=1,
54-
train_instance_type='ml.m5.large',
53+
instance_count=1,
54+
instance_type='ml.m5.large',
5555
framework_version='0.20.0',
5656
hyperparameters={
5757
"epochs": 1

tests/unit/test_pipeline.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ def pca_estimator():
3636
pca = sagemaker.estimator.Estimator(
3737
PCA_IMAGE,
3838
role=SAGEMAKER_EXECUTION_ROLE,
39-
train_instance_count=1,
40-
train_instance_type='ml.c4.xlarge',
39+
instance_count=1,
40+
instance_type='ml.c4.xlarge',
4141
output_path=s3_output_location,
4242
sagemaker_session=sagemaker_session
4343
)
@@ -62,7 +62,7 @@ def sklearn_preprocessor():
6262
sklearn_preprocessor = SKLearn(
6363
entry_point=script_path,
6464
role=SAGEMAKER_EXECUTION_ROLE,
65-
train_instance_type="ml.c4.xlarge",
65+
instance_type="ml.c4.xlarge",
6666
source_dir=source_dir,
6767
sagemaker_session=sagemaker_session
6868
)
@@ -82,10 +82,10 @@ def linear_learner_estimator():
8282
ll_estimator = sagemaker.estimator.Estimator(
8383
LINEAR_LEARNER_IMAGE,
8484
SAGEMAKER_EXECUTION_ROLE,
85-
train_instance_count=1,
86-
train_instance_type='ml.c4.xlarge',
87-
train_volume_size=20,
88-
train_max_run=3600,
85+
instance_count=1,
86+
instance_type='ml.c4.xlarge',
87+
volume_size=20,
88+
max_run=3600,
8989
input_mode='File',
9090
output_path=s3_output_location,
9191
sagemaker_session=sagemaker_session

0 commit comments

Comments
 (0)