co.cask.cdap.etl.batch.KVTransformations Maven / Gradle / Ivy
/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.etl.batch;
import co.cask.cdap.api.dataset.lib.KeyValue;
import co.cask.cdap.etl.api.Emitter;
import co.cask.cdap.etl.api.InvalidEntry;
import co.cask.cdap.etl.api.Transformation;
import co.cask.cdap.etl.api.batch.BatchAggregator;
import co.cask.cdap.etl.api.batch.BatchJoiner;
import co.cask.cdap.etl.api.batch.BatchSink;
import co.cask.cdap.etl.api.batch.BatchSource;
import co.cask.cdap.etl.common.Constants;
import co.cask.cdap.etl.common.DefaultEmitter;
/**
* Key-Value transformation which wraps transformation for each batch plugin to emit stageName along with each record
* except for {@link BatchSink}
*/
public final class KVTransformations {
private KVTransformations() {
}
/**
* Creates {@link Transformation} which adds stageName to each record being emitted from current stage,
* except for {@link BatchSink}. Each stage will strip off stageName, apply original transformation and then again
* wrap record with stageName. For {@link co.cask.cdap.etl.api.Joiner} the record is passed with stageName.
*
* @param stageName stageName which is emitting the record
* @param pluginType type of the stage
* @param isMapPhase if it is map phase
* @param transformation transformation to be wrapped
* @return {@link Transformation} to wrap/unwrap stageName
*/
public static Transformation getKVTransformation(String stageName, String pluginType, boolean isMapPhase,
Transformation transformation) {
if (BatchSink.PLUGIN_TYPE.equalsIgnoreCase(pluginType)) {
return new KVSinkTransformation<>(transformation);
} else if (BatchSource.PLUGIN_TYPE.equalsIgnoreCase(pluginType)) {
return new KVSourceTransformation<>(stageName, transformation);
} else if (Constants.CONNECTOR_TYPE.equalsIgnoreCase(pluginType)) {
return transformation;
} else if (BatchJoiner.PLUGIN_TYPE.equalsIgnoreCase(pluginType)) {
if (isMapPhase) {
return transformation;
} else {
return new KVSourceTransformation<>(stageName, transformation);
}
} else if (BatchAggregator.PLUGIN_TYPE.equalsIgnoreCase(pluginType)) {
if (isMapPhase) {
return new KVSinkTransformation<>(transformation);
} else {
return new KVSourceTransformation<>(stageName, transformation);
}
}
return new KVWrappedTransformation(stageName, transformation);
}
/**
* Converts input to (stageName, input)
*
* @param type of input
* @param type of output
*/
public static class KVSourceTransformation implements Transformation> {
private final String stageName;
private final Transformation transformation;
private final DefaultEmitter singleEmitter;
public KVSourceTransformation(String stageName, Transformation transformation) {
this.stageName = stageName;
this.transformation = transformation;
this.singleEmitter = new DefaultEmitter<>();
}
@Override
public void transform(IN input, Emitter> emitter) throws Exception {
singleEmitter.reset();
transformation.transform(input, singleEmitter);
for (OUT out : singleEmitter.getEntries()) {
emitter.emit(new KeyValue<>(stageName, out));
}
for (InvalidEntry error : singleEmitter.getErrors()) {
emitter.emitError(new InvalidEntry<>(error.getErrorCode(), error.getErrorMsg(),
new KeyValue<>(stageName, error.getInvalidRecord())));
}
}
}
/**
* Converts (stageName, input) to input
*
* @param type of input
* @param type of output
*/
public static class KVSinkTransformation implements Transformation, OUT> {
private final Transformation transformation;
private final DefaultEmitter singleEmitter;
public KVSinkTransformation(Transformation transformation) {
this.transformation = transformation;
this.singleEmitter = new DefaultEmitter<>();
}
@Override
public void transform(KeyValue input, Emitter emitter) throws Exception {
singleEmitter.reset();
transformation.transform(input.getValue(), singleEmitter);
for (OUT out : singleEmitter.getEntries()) {
emitter.emit(out);
}
for (InvalidEntry error : singleEmitter.getErrors()) {
emitter.emitError(new InvalidEntry<>(error.getErrorCode(), error.getErrorMsg(), error.getInvalidRecord()));
}
}
}
/**
* Unwraps (stageName, input) to input, applies transformation and wraps output to (stageName, output)
*
* @param type of input
* @param type of output
*/
public static class KVWrappedTransformation implements Transformation, KeyValue> {
private final String stageName;
private final Transformation transformation;
private final DefaultEmitter singleEmitter;
public KVWrappedTransformation(String stageName, Transformation transformation) {
this.stageName = stageName;
this.transformation = transformation;
this.singleEmitter = new DefaultEmitter<>();
}
@Override
public void transform(KeyValue input, Emitter> emitter) throws Exception {
singleEmitter.reset();
transformation.transform(input.getValue(), singleEmitter);
for (OUT out : singleEmitter.getEntries()) {
emitter.emit(new KeyValue<>(stageName, out));
}
for (InvalidEntry error : singleEmitter.getErrors()) {
emitter.emitError(new InvalidEntry<>(error.getErrorCode(), error.getErrorMsg(),
new KeyValue<>(stageName, error.getInvalidRecord())));
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy