All Downloads are FREE. Search and download functionalities are using the official Maven repository.

co.cask.cdap.etl.batch.KVTransformations Maven / Gradle / Ivy

/*
 * Copyright © 2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.etl.batch;

import co.cask.cdap.api.dataset.lib.KeyValue;
import co.cask.cdap.etl.api.Emitter;
import co.cask.cdap.etl.api.InvalidEntry;
import co.cask.cdap.etl.api.Transformation;
import co.cask.cdap.etl.api.batch.BatchAggregator;
import co.cask.cdap.etl.api.batch.BatchJoiner;
import co.cask.cdap.etl.api.batch.BatchSink;
import co.cask.cdap.etl.api.batch.BatchSource;
import co.cask.cdap.etl.common.Constants;
import co.cask.cdap.etl.common.DefaultEmitter;

/**
 * Key-Value transformation which wraps transformation for each batch plugin to emit stageName along with each record
 * except for {@link BatchSink}
 */
public final class KVTransformations {

  private KVTransformations() {
  }

  /**
   * Creates {@link Transformation} which adds stageName to each record being emitted from current stage,
   * except for {@link BatchSink}. Each stage will strip off stageName, apply original transformation and then again
   * wrap record with stageName. For {@link co.cask.cdap.etl.api.Joiner} the record is passed with stageName.
   *
   * @param stageName      stageName which is emitting the record
   * @param pluginType     type of the stage
   * @param isMapPhase     if it is map phase
   * @param transformation transformation to be wrapped
   * @return {@link Transformation} to wrap/unwrap stageName
   */
  public static Transformation getKVTransformation(String stageName, String pluginType, boolean isMapPhase,
                                                   Transformation transformation) {
    if (BatchSink.PLUGIN_TYPE.equalsIgnoreCase(pluginType)) {
      return new KVSinkTransformation<>(transformation);
    } else if (BatchSource.PLUGIN_TYPE.equalsIgnoreCase(pluginType)) {
      return new KVSourceTransformation<>(stageName, transformation);
    } else if (Constants.CONNECTOR_TYPE.equalsIgnoreCase(pluginType)) {
      return transformation;
    } else if (BatchJoiner.PLUGIN_TYPE.equalsIgnoreCase(pluginType)) {
      if (isMapPhase) {
        return transformation;
      } else {
        return new KVSourceTransformation<>(stageName, transformation);
      }
    } else if (BatchAggregator.PLUGIN_TYPE.equalsIgnoreCase(pluginType)) {
      if (isMapPhase) {
        return new KVSinkTransformation<>(transformation);
      } else {
        return new KVSourceTransformation<>(stageName, transformation);
      }
    }
    return new KVWrappedTransformation(stageName, transformation);
  }

  /**
   * Converts input to (stageName, input)
   *
   * @param   type of input
   * @param  type of output
   */
  public static class KVSourceTransformation implements Transformation> {
    private final String stageName;
    private final Transformation transformation;
    private final DefaultEmitter singleEmitter;

    public KVSourceTransformation(String stageName, Transformation transformation) {
      this.stageName = stageName;
      this.transformation = transformation;
      this.singleEmitter = new DefaultEmitter<>();
    }

    @Override
    public void transform(IN input, Emitter> emitter) throws Exception {
      singleEmitter.reset();
      transformation.transform(input, singleEmitter);
      for (OUT out : singleEmitter.getEntries()) {
        emitter.emit(new KeyValue<>(stageName, out));
      }
      for (InvalidEntry error : singleEmitter.getErrors()) {
        emitter.emitError(new InvalidEntry<>(error.getErrorCode(), error.getErrorMsg(),
                                             new KeyValue<>(stageName, error.getInvalidRecord())));
      }
    }
  }

  /**
   * Converts (stageName, input) to input
   *
   * @param   type of input
   * @param  type of output
   */
  public static class KVSinkTransformation implements Transformation, OUT> {
    private final Transformation transformation;
    private final DefaultEmitter singleEmitter;

    public KVSinkTransformation(Transformation transformation) {
      this.transformation = transformation;
      this.singleEmitter = new DefaultEmitter<>();
    }

    @Override
    public void transform(KeyValue input, Emitter emitter) throws Exception {
      singleEmitter.reset();
      transformation.transform(input.getValue(), singleEmitter);
      for (OUT out : singleEmitter.getEntries()) {
        emitter.emit(out);
      }
      for (InvalidEntry error : singleEmitter.getErrors()) {
        emitter.emitError(new InvalidEntry<>(error.getErrorCode(), error.getErrorMsg(), error.getInvalidRecord()));
      }
    }
  }

  /**
   * Unwraps (stageName, input) to input, applies transformation and wraps output to (stageName, output)
   *
   * @param   type of input
   * @param  type of output
   */
  public static class KVWrappedTransformation implements Transformation, KeyValue> {
    private final String stageName;
    private final Transformation transformation;
    private final DefaultEmitter singleEmitter;

    public KVWrappedTransformation(String stageName, Transformation transformation) {
      this.stageName = stageName;
      this.transformation = transformation;
      this.singleEmitter = new DefaultEmitter<>();
    }

    @Override
    public void transform(KeyValue input, Emitter> emitter) throws Exception {
      singleEmitter.reset();
      transformation.transform(input.getValue(), singleEmitter);
      for (OUT out : singleEmitter.getEntries()) {
        emitter.emit(new KeyValue<>(stageName, out));
      }
      for (InvalidEntry error : singleEmitter.getErrors()) {
        emitter.emitError(new InvalidEntry<>(error.getErrorCode(), error.getErrorMsg(),
                                             new KeyValue<>(stageName, error.getInvalidRecord())));
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy