com.amazonaws.services.elasticmapreduce.util.ResizeJobFlowStep Maven / Gradle / Ivy
Show all versions of aws-java-sdk-emr Show documentation
/*
* Copyright 2010-2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.amazonaws.services.elasticmapreduce.util;
import java.util.ArrayList;
import java.util.List;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.services.elasticmapreduce.model.HadoopJarStepConfig;
/**
* This class provides some helper methods for creating a Resize Job Flow step
* as part of your job flow. The resize step can be used to automatically
* adjust the composition of your cluster while it is running. For example, if
* you have a large workflow with different compute requirements, you can use
* this step to automatically add a task instance group before your most compute
* intensive step.
*
*
* AWSCredentials credentials = new BasicAWSCredentials(accessKey, secretKey);
* AmazonElasticMapReduce emr = new AmazonElasticMapReduceClient(credentials);
*
* HadoopJarStepConfig config = new ResizeJobFlowStep()
* .withResizeAction(new ModifyInstanceGroup()
* .withInstanceGroup("core")
* .withInstanceCount(10))
* .withResizeAction(new AddInstanceGroup()
* .withInstanceGroup("task")
* .withInstanceCount(10)
* .withInstanceType("m1.small"))
* .withOnArrested(OnArrested.Continue)
* .withOnFailure(OnFailure.Continue)
* .toHadoopJarStepConfig();
*
* StepConfig resizeJobFlow = new StepConfig()
* .withName("Resize job flow")
* .withActionOnFailure("TERMINATE_JOB_FLOW")
* .withHadoopJarStep(config);
*
* RunJobFlowRequest request = new RunJobFlowRequest()
* .withName("Resize job flow")
* .withSteps(resizeJobFlow)
* .withLogUri("s3://log-bucket/")
* .withInstances(new JobFlowInstancesConfig()
* .withEc2KeyName("keypair")
* .withHadoopVersion("0.20")
* .withInstanceCount(5)
* .withKeepJobFlowAliveWhenNoSteps(true)
* .withMasterInstanceType("m1.small")
* .withSlaveInstanceType("m1.small"));
*
* RunJobFlowResult result = emr.runJobFlow(request);
*
*/
public class ResizeJobFlowStep {
/**
* The action to take if your step is waiting for the instance group to start
* and it enters the Arrested state.
*
* Fail - Fail the step.
* Wait - Continue waiting until the instance group is no longer arrested (requires
* manual intervention).
* Continue - Proceed onto the next step.
*/
public static enum OnArrested {
Fail,
Wait,
Continue;
}
/**
* Action to take if there is a failure modifying your cluster composition.
* Fail - Fail the step.
* Continue - Proceed on to the next step.
*/
public static enum OnFailure {
Fail,
Continue;
}
private String bucket;
private List args = new ArrayList();
private boolean wait = true;
private OnArrested onArrested = null;
private OnFailure onFailure = null;
/**
* Creates a new ResizeJobFlowStep using the default Elastic Map Reduce
* bucket (us-east-1.elasticmapreduce) for the default (us-east-1) region.
*/
public ResizeJobFlowStep() {
this("us-east-1.elasticmapreduce");
}
/**
* Creates a new ResizeJobFlowStep using the specified Amazon S3 bucket to
* load resources.
*
* The official bucket format is "<region>.elasticmapreduce", so if
* you're using the us-east-1 region, you should use the bucket
* "us-east-1.elasticmapreduce".
*
* @param bucket
* The Amazon S3 bucket from which to load resources.
*/
public ResizeJobFlowStep(String bucket) {
this.bucket = bucket;
}
/**
* Add a new action for this step to perform. These actions can be to modify
* or add instance groups. This step supports multiple actions, but requires
* at least one be specified.
*
* @param resizeAction An instance of ResizeAction defining the change.
* @return A reference to this updated object so that method calls can be chained
* together.
*/
public ResizeJobFlowStep withResizeAction(ResizeAction resizeAction) {
this.args.addAll(resizeAction.getArgs());
return this;
}
/**
* Specifies whether the step should wait for the modification to complete
* or if it should just continue onto the next step once the modification
* request is received. Defaults to true.
* @param wait Whether this step should wait for the modification to complete.
* @return A reference to this updated object so that method calls can be chained
* together.
*/
public ResizeJobFlowStep withWait(boolean wait) {
this.wait = wait;
return this;
}
/**
* What action this step should take if any of the instance group modifications result
* in the instance group entering Arrested state. This can happen when the bootstrap
* actions on the newly launched instances are continuously failing.
* @param onArrested Enum specifying which action to take.
* @return A reference to this updated object so that method calls can be chained
* together.
*/
public ResizeJobFlowStep withOnArrested(OnArrested onArrested) {
this.onArrested = onArrested;
return this;
}
/**
* What action this step should take if the modification fails. This can happen when
* you request to perform an invalid action, such as shrink a core instance group.
* @param onFailure Enum specifying which action to take.
* @return A reference to this updated object so that method calls can be chained
* together.
*/
public ResizeJobFlowStep withOnFailure(OnFailure onFailure) {
this.onFailure = onFailure;
return this;
}
/**
* Creates the final HadoopJarStepConfig once you are done configuring the step. You can use
* this as you would any other HadoopJarStepConfig.
* @return HadoopJarStepConfig configured to perform the specified actions.
*/
public HadoopJarStepConfig toHadoopJarStepConfig() {
if (args.size() == 0) {
throw new AmazonServiceException("Cannot create a ResizeJobFlowStep with no resize actions.");
}
if (wait == false) {
args.add("--no-wait");
}
if (onArrested != null) {
args.add("--on-arrested");
args.add(onArrested.toString());
}
if (onFailure != null) {
args.add("--on-failure");
args.add(onFailure.toString());
}
return new HadoopJarStepConfig()
.withJar("s3://" + bucket + "/libs/resize-job-flow/0.1/resize-job-flow.jar")
.withArgs(args);
}
public static interface ResizeAction {
public List getArgs();
}
/**
* Class representing a change to an existing instance group.
*/
public static class ModifyInstanceGroup implements ResizeAction {
private String instanceGroup;
private Integer instanceCount;
/**
* Set the identification for the instance group to modify. You can specify either
* core or task if there is only one instance group of that role. Otherwise you can
* specify the instance group ID (such as ig-ABABABABABAB).
* @param instanceGroup Identifier for the instance group.
* @return A reference to this updated object so that method calls can be chained
* together.
*/
public ModifyInstanceGroup withInstanceGroup(String instanceGroup) {
this.instanceGroup = instanceGroup;
return this;
}
/**
* Specify the new size of the instance group.
* @param instanceCount Target size of the instance group.
* @return A reference to this updated object so that method calls can be chained
* together.
*/
public ModifyInstanceGroup withInstanceCount(Integer instanceCount) {
this.instanceCount = instanceCount;
return this;
}
public List getArgs() {
if (instanceGroup == null) {
throw new AmazonServiceException("InstanceGroup must not be null.");
}
if (instanceCount == null) {
throw new AmazonServiceException("InstanceCount must not be null.");
}
List args = new ArrayList();
args.add("--modify-instance-group");
args.add(instanceGroup);
args.add("--instance-count");
args.add(Integer.toString(instanceCount));
return args;
}
}
/**
* Class representing creating a new instance group.
*/
public static class AddInstanceGroup implements ResizeAction {
private String instanceGroup;
private Integer instanceCount;
private String instanceType;
/**
* Specify the role of the new instance group (core, task).
* @param instanceGroup New instance group's role.
* @return A reference to this updated object so that method calls can be chained
* together.
*/
public AddInstanceGroup withInstanceGroup(String instanceGroup) {
this.instanceGroup = instanceGroup;
return this;
}
/**
* Specify the count of the new instance group.
* @param instanceCount Target instance group size.
* @return A reference to this updated object so that method calls can be chained
* together.
*/
public AddInstanceGroup withInstanceCount(int instanceCount) {
this.instanceCount = instanceCount;
return this;
}
/**
* Specify the instance type to use for this instance group.
* @param instanceType Instance type.
* @return A reference to this updated object so that method calls can be chained
* together.
*/
public AddInstanceGroup withInstanceType(String instanceType) {
this.instanceType = instanceType;
return this;
}
public List getArgs() {
if (instanceGroup == null) {
throw new AmazonServiceException("InstanceGroup must not be null.");
}
if (instanceCount == null) {
throw new AmazonServiceException("InstanceCount must not be null.");
}
if (instanceType == null) {
throw new AmazonServiceException("InstanceType must not be null.");
}
List args = new ArrayList();
args.add("--add-instance-group");
args.add(instanceGroup);
args.add("--instance-count");
args.add(Integer.toString(instanceCount));
args.add("--instance-type");
args.add(instanceType);
return args;
}
}
}