All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.amazonaws.services.elasticmapreduce.util.StreamingStep Maven / Gradle / Ivy

Go to download

The AWS Java SDK for Amazon EMR module holds the client classes that are used for communicating with Amazon Elastic MapReduce Service

There is a newer version: 1.12.780
Show newest version
/*
 * Copyright 2010-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *  http://aws.amazon.com/apache2.0
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package com.amazonaws.services.elasticmapreduce.util;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.amazonaws.services.elasticmapreduce.model.HadoopJarStepConfig;

/**
 * Class that makes it easy to define Hadoop Streaming steps.
 * 

* See also: Hadoop Streaming * *

 * AWSCredentials credentials = new BasicAWSCredentials(accessKey, secretKey);
 * AmazonElasticMapReduce emr = new AmazonElasticMapReduceClient(credentials);
 *
 * HadoopJarStepConfig config = new StreamingStep()
 *     .withInputs("s3://elasticmapreduce/samples/wordcount/input")
 *     .withOutput("s3://my-bucket/output/")
 *     .withMapper("s3://elasticmapreduce/samples/wordcount/wordSplitter.py")
 *     .withReducer("aggregate")
 *     .toHadoopJarStepConfig();
 *
 * StepConfig wordCount = new StepConfig()
 *     .withName("Word Count")
 *     .withActionOnFailure("TERMINATE_JOB_FLOW")
 *     .withHadoopJarStep(config);
 *
 * RunJobFlowRequest request = new RunJobFlowRequest()
 *     .withName("Word Count")
 *     .withSteps(wordCount)
 *     .withLogUri("s3://log-bucket/")
 *     .withInstances(new JobFlowInstancesConfig()
 *         .withEc2KeyName("keypairt")
 *         .withHadoopVersion("0.20")
 *         .withInstanceCount(5)
 *         .withKeepJobFlowAliveWhenNoSteps(true)
 *         .withMasterInstanceType("m1.small")
 *         .withSlaveInstanceType("m1.small"));
 *
 * RunJobFlowResult result = emr.runJobFlow(request);
 * 
*/ public class StreamingStep { private List inputs = new ArrayList(); private String output; private String mapper; private String reducer; private Map hadoopConfig = new HashMap(); /** * Creates a new default StreamingStep. */ public StreamingStep() { } /** * Get list of step input paths. * @return List of step inputs */ public List getInputs() { return inputs; } /** * Set the list of step input paths. * @param inputs List of step inputs. */ public void setInputs(Collection inputs) { List newInputs = new ArrayList(); if (inputs != null) { newInputs.addAll(inputs); } this.inputs = newInputs; } /** * Add more input paths to this step. * @param inputs A list of inputs to this step. * @return A reference to this updated object so that method calls can be chained * together. */ public StreamingStep withInputs(String ... inputs) { for (String input : inputs) { this.inputs.add(input); } return this; } /** * Get output path. * @return Output path. */ public String getOutput() { return output; } /** * Set the output path for this step. * @param output Output path. */ public void setOutput(String output) { this.output = output; } /** * Set the output path for this step. * @param output Output path * @return A reference to this updated object so that method calls can be chained * together. */ public StreamingStep withOutput(String output) { this.output = output; return this; } /** * Get the mapper. * @return Mapper. */ public String getMapper() { return mapper; } /** * Set the mapper. * @param mapper Mapper */ public void setMapper(String mapper) { this.mapper = mapper; } /** * Set the mapper * @param mapper Mapper * @return A reference to this updated object so that method calls can be chained * together. */ public StreamingStep withMapper(String mapper) { this.mapper = mapper; return this; } /** * Get the reducer * @return Reducer */ public String getReducer() { return reducer; } /** * Set the reducer * @param reducer Reducer */ public void setReducer(String reducer) { this.reducer = reducer; } /** * Set the reducer * @param reducer Reducer * @return A reference to this updated object so that method calls can be chained * together. */ public StreamingStep withReducer(String reducer) { this.reducer = reducer; return this; } /** * Get the Hadoop config overrides (-D values). * @return Hadoop config. */ public Map getHadoopConfig() { return hadoopConfig; } /** * Set the Hadoop config overrides (-D values). * @param hadoopConfig Hadoop config. */ public void setHadoopConfig(Map hadoopConfig) { this.hadoopConfig = hadoopConfig; } /** * Add a Hadoop config override (-D value). * @param key Hadoop configuration key. * @param value Configuration value. * @return A reference to this updated object so that method calls can be chained * together. */ public StreamingStep withHadoopConfig(String key, String value) { hadoopConfig.put(key, value); return this; } /** * Creates the final HadoopJarStepConfig once you are done configuring the step. You can use * this as you would any other HadoopJarStepConfig. * @return HadoopJarStepConfig representing this streaming step. */ public HadoopJarStepConfig toHadoopJarStepConfig() { List args = new ArrayList(); if (reducer == null) { hadoopConfig.put("mapred.reduce.tasks", "0"); } for (Map.Entry entry : hadoopConfig.entrySet()) { args.add("-D"); args.add(entry.getKey() + "=" + entry.getValue()); } for (String input : inputs) { args.add("-input"); args.add(input); } if (output != null) { args.add("-output"); args.add(output); } if (mapper != null) { args.add("-mapper"); args.add(mapper); } if (reducer != null) { args.add("-reducer"); args.add(reducer); } return new HadoopJarStepConfig() .withJar("/home/hadoop/contrib/streaming/hadoop-streaming.jar") .withArgs(args); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy