co.cask.cdap.etl.api.batch.SparkCompute Maven / Gradle / Ivy
/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.etl.api.batch;
import co.cask.cdap.api.annotation.Beta;
import co.cask.cdap.etl.api.PipelineConfigurable;
import co.cask.cdap.etl.api.PipelineConfigurer;
import org.apache.spark.api.java.JavaRDD;
import java.io.Serializable;
/**
* Spark Compute stage.
*
* @param Type of input object
* @param Type of output object
*/
@Beta
public abstract class SparkCompute implements PipelineConfigurable, Serializable {
public static final String PLUGIN_TYPE = "sparkcompute";
private static final long serialVersionUID = -8156450728774382658L;
/**
* Configure an ETL pipeline.
*
* @param pipelineConfigurer the configurer used to add required datasets and streams
* @throws IllegalArgumentException if the given config is invalid
*/
@Override
public void configurePipeline(PipelineConfigurer pipelineConfigurer) throws IllegalArgumentException {
//no-op
}
/**
* Transform the input and return the output to be sent to the next stage in the pipeline.
*
* @param context {@link SparkExecutionPluginContext} for this job
* @param input input data to be transformed
* @throws Exception if there's an error during this method invocation
*/
public abstract JavaRDD transform(SparkExecutionPluginContext context, JavaRDD input) throws Exception;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy