All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.streamsets.pipeline.spark.api.TransformResult Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2018 StreamSets Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.streamsets.pipeline.spark.api;

import com.streamsets.pipeline.api.Record;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.PairFlatMapFunction;

public class TransformResult {

  private final JavaRDD result;
  private final JavaPairRDD errors;

  public TransformResult(JavaRDD result, JavaPairRDD errors) {
    this.result = result;
    this.errors = errors;
  }

  /**
   * Return the result of the processing done via {@linkplain SparkTransformer}.
   * The {@linkplain Record}s from this RDD are passed to the next stage in the pipeline.
   * @return The RDD containing the records that were successfully processed.
   *
   * Ideally, Records that are added to error should not appear here, but there is no restriction imposed.
   * Exceptions should not be thrown for bad records, they should instead be returned via the
   * {@linkplain #getErrors()} method.
   */
  public JavaRDD getResult() {
    return result;
  }

  /**
   * Get the {@linkplain Record}s that were not processed successfully and their corresponding error messages.
   * These records will be written to the error stream for the pipeline and not passed
   * to the next stage.
   *
   * A good way to calculate errors is to use
   * {@linkplain org.apache.spark.api.java.JavaRDD#mapPartitionsToPair(PairFlatMapFunction)} and generate the
   * error records and their messages into an RDD.
   * @return A Pair RDD containing the error records and corresponding error messages.
   */
  public JavaPairRDD getErrors() {
    return errors;
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy