 
                        
        
                        
        com.streamsets.pipeline.spark.api.TransformResult Maven / Gradle / Ivy
 The newest version!
        
        /*
 * Copyright 2018 StreamSets Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.streamsets.pipeline.spark.api;
import com.streamsets.pipeline.api.Record;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.PairFlatMapFunction;
public class TransformResult {
  private final JavaRDD result;
  private final JavaPairRDD errors;
  public TransformResult(JavaRDD result, JavaPairRDD errors) {
    this.result = result;
    this.errors = errors;
  }
  /**
   * Return the result of the processing done via {@linkplain SparkTransformer}.
   * The {@linkplain Record}s from this RDD are passed to the next stage in the pipeline.
   * @return The RDD containing the records that were successfully processed.
   *
   * Ideally, Records that are added to error should not appear here, but there is no restriction imposed.
   * Exceptions should not be thrown for bad records, they should instead be returned via the
   * {@linkplain #getErrors()} method.
   */
  public JavaRDD getResult() {
    return result;
  }
  /**
   * Get the {@linkplain Record}s that were not processed successfully and their corresponding error messages.
   * These records will be written to the error stream for the pipeline and not passed
   * to the next stage.
   *
   * A good way to calculate errors is to use
   * {@linkplain org.apache.spark.api.java.JavaRDD#mapPartitionsToPair(PairFlatMapFunction)} and generate the
   * error records and their messages into an RDD.
   * @return A Pair RDD containing the error records and corresponding error messages.
   */
  public JavaPairRDD getErrors() {
    return errors;
  }
}
      © 2015 - 2025 Weber Informatics LLC | Privacy Policy