com.streamsets.pipeline.spark.api.TransformResult Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2018 StreamSets Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.pipeline.spark.api;
import com.streamsets.pipeline.api.Record;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.PairFlatMapFunction;
public class TransformResult {
private final JavaRDD result;
private final JavaPairRDD errors;
public TransformResult(JavaRDD result, JavaPairRDD errors) {
this.result = result;
this.errors = errors;
}
/**
* Return the result of the processing done via {@linkplain SparkTransformer}.
* The {@linkplain Record}s from this RDD are passed to the next stage in the pipeline.
* @return The RDD containing the records that were successfully processed.
*
* Ideally, Records that are added to error should not appear here, but there is no restriction imposed.
* Exceptions should not be thrown for bad records, they should instead be returned via the
* {@linkplain #getErrors()} method.
*/
public JavaRDD getResult() {
return result;
}
/**
* Get the {@linkplain Record}s that were not processed successfully and their corresponding error messages.
* These records will be written to the error stream for the pipeline and not passed
* to the next stage.
*
* A good way to calculate errors is to use
* {@linkplain org.apache.spark.api.java.JavaRDD#mapPartitionsToPair(PairFlatMapFunction)} and generate the
* error records and their messages into an RDD.
* @return A Pair RDD containing the error records and corresponding error messages.
*/
public JavaPairRDD getErrors() {
return errors;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy