All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.amazonaws.services.elasticmapreduce.util.StreamingStep Maven / Gradle / Ivy

Go to download

The Amazon Web Services SDK for Java provides Java APIs for building software on AWS' cost-effective, scalable, and reliable infrastructure products. The AWS Java SDK allows developers to code against APIs for all of Amazon's infrastructure web services (Amazon S3, Amazon EC2, Amazon SQS, Amazon Relational Database Service, Amazon AutoScaling, etc).

The newest version!
/*
 * Copyright 2010-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *  http://aws.amazon.com/apache2.0
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package com.amazonaws.services.elasticmapreduce.util;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.amazonaws.services.elasticmapreduce.model.HadoopJarStepConfig;

/**
 * Class that makes it easy to define Hadoop Streaming steps.
 * 

* See also: Hadoop Streaming * *

 * AWSCredentials credentials = new BasicAWSCredentials(accessKey, secretKey);
 * AmazonElasticMapReduce emr = new AmazonElasticMapReduceClient(credentials);
 *
 * HadoopJarStepConfig config = new StreamingStep()
 *     .withInputs("s3://elasticmapreduce/samples/wordcount/input")
 *     .withOutput("s3://my-bucket/output/")
 *     .withMapper("s3://elasticmapreduce/samples/wordcount/wordSplitter.py")
 *     .withReducer("aggregate")
 *     .toHadoopJarStepConfig();
 *
 * StepConfig wordCount = new StepConfig()
 *     .withName("Word Count")
 *     .withActionOnFailure("TERMINATE_JOB_FLOW")
 *     .withHadoopJarStep(config);
 *
 * RunJobFlowRequest request = new RunJobFlowRequest()
 *     .withName("Word Count")
 *     .withSteps(wordCount)
 *     .withLogUri("s3://log-bucket/")
 *     .withInstances(new JobFlowInstancesConfig()
 *         .withEc2KeyName("keypairt")
 *         .withHadoopVersion("0.20")
 *         .withInstanceCount(5)
 *         .withKeepJobFlowAliveWhenNoSteps(true)
 *         .withMasterInstanceType("m1.small")
 *         .withSlaveInstanceType("m1.small"));
 *
 * RunJobFlowResult result = emr.runJobFlow(request);
 * 
*/ public class StreamingStep { private List inputs = new ArrayList(); private String output; private String mapper; private String reducer; private Map hadoopConfig = new HashMap(); /** * Creates a new default StreamingStep. */ public StreamingStep() { } /** * Get list of step input paths. * @return List of step inputs */ public List getInputs() { return inputs; } /** * Set the list of step input paths. * @param inputs List of step inputs. */ public void setInputs(Collection inputs) { List newInputs = new ArrayList(); if (inputs != null) { newInputs.addAll(inputs); } this.inputs = newInputs; } /** * Add more input paths to this step. * @param inputs A list of inputs to this step. * @return A reference to this updated object so that method calls can be chained * together. */ public StreamingStep withInputs(String ... inputs) { for (String input : inputs) { this.inputs.add(input); } return this; } /** * Get output path. * @return Output path. */ public String getOutput() { return output; } /** * Set the output path for this step. * @param output Output path. */ public void setOutput(String output) { this.output = output; } /** * Set the output path for this step. * @param output Output path * @return A reference to this updated object so that method calls can be chained * together. */ public StreamingStep withOutput(String output) { this.output = output; return this; } /** * Get the mapper. * @return Mapper. */ public String getMapper() { return mapper; } /** * Set the mapper. * @param mapper Mapper */ public void setMapper(String mapper) { this.mapper = mapper; } /** * Set the mapper * @param mapper Mapper * @return A reference to this updated object so that method calls can be chained * together. */ public StreamingStep withMapper(String mapper) { this.mapper = mapper; return this; } /** * Get the reducer * @return Reducer */ public String getReducer() { return reducer; } /** * Set the reducer * @param reducer Reducer */ public void setReducer(String reducer) { this.reducer = reducer; } /** * Set the reducer * @param reducer Reducer * @return A reference to this updated object so that method calls can be chained * together. */ public StreamingStep withReducer(String reducer) { this.reducer = reducer; return this; } /** * Get the Hadoop config overrides (-D values). * @return Hadoop config. */ public Map getHadoopConfig() { return hadoopConfig; } /** * Set the Hadoop config overrides (-D values). * @param hadoopConfig Hadoop config. */ public void setHadoopConfig(Map hadoopConfig) { this.hadoopConfig = hadoopConfig; } /** * Add a Hadoop config override (-D value). * @param key Hadoop configuration key. * @param value Configuration value. * @return A reference to this updated object so that method calls can be chained * together. */ public StreamingStep withHadoopConfig(String key, String value) { hadoopConfig.put(key, value); return this; } /** * Creates the final HadoopJarStepConfig once you are done configuring the step. You can use * this as you would any other HadoopJarStepConfig. * @return HadoopJarStepConfig representing this streaming step. */ public HadoopJarStepConfig toHadoopJarStepConfig() { List args = new ArrayList(); if (reducer == null) { hadoopConfig.put("mapred.reduce.tasks", "0"); } for (Map.Entry entry : hadoopConfig.entrySet()) { args.add("-D"); args.add(entry.getKey() + "=" + entry.getValue()); } for (String input : inputs) { args.add("-input"); args.add(input); } if (output != null) { args.add("-output"); args.add(output); } if (mapper != null) { args.add("-mapper"); args.add(mapper); } if (reducer != null) { args.add("-reducer"); args.add(reducer); } return new HadoopJarStepConfig() .withJar("/home/hadoop/contrib/streaming/hadoop-streaming.jar") .withArgs(args); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy