eu.stratosphere.api.java.operators.OperatorTranslation Maven / Gradle / Ivy
/***********************************************************************************************************************
*
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.api.java.operators;
import eu.stratosphere.api.common.operators.AbstractUdfOperator;
import eu.stratosphere.api.common.operators.BinaryOperatorInformation;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.UnaryOperatorInformation;
import eu.stratosphere.api.common.operators.base.BulkIterationBase;
import eu.stratosphere.api.common.operators.base.DeltaIterationBase;
import eu.stratosphere.api.common.operators.base.GenericDataSinkBase;
import eu.stratosphere.api.java.BulkIterationResultSet;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.DeltaIteration;
import eu.stratosphere.api.java.DeltaIterationResultSet;
import eu.stratosphere.api.java.IterativeDataSet;
import eu.stratosphere.api.java.operators.translation.JavaPlan;
import eu.stratosphere.configuration.Configuration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class OperatorTranslation {
/** The already translated operations */
private Map, Operator>> translated = new HashMap, Operator>>();
public JavaPlan translateToPlan(List> sinks, String jobName) {
List> planSinks = new ArrayList>();
for (DataSink> sink : sinks) {
planSinks.add(translate(sink));
}
return new JavaPlan(planSinks);
}
private GenericDataSinkBase translate(DataSink sink) {
// translate the input recursively
Operator input = translate(sink.getDataSet());
// translate the sink itself and connect it to the input
GenericDataSinkBase translatedSink = sink.translateToDataFlow(input);
return translatedSink;
}
private Operator translate(DataSet dataSet) {
// check if we have already translated that data set (operation or source)
Operator> previous = (Operator>) this.translated.get(dataSet);
if (previous != null) {
@SuppressWarnings("unchecked")
Operator typedPrevious = (Operator) previous;
return typedPrevious;
}
Operator dataFlowOp;
if (dataSet instanceof DataSource) {
dataFlowOp = ((DataSource) dataSet).translateToDataFlow();
}
else if (dataSet instanceof SingleInputOperator) {
dataFlowOp = translateSingleInputOperator((SingleInputOperator, ?, ?>) dataSet);
}
else if (dataSet instanceof TwoInputOperator) {
dataFlowOp = translateTwoInputOperator((TwoInputOperator, ?, ?, ?>) dataSet);
}
else if (dataSet instanceof BulkIterationResultSet) {
dataFlowOp = translateBulkIteration((BulkIterationResultSet>) dataSet);
}
else if (dataSet instanceof DeltaIterationResultSet) {
dataFlowOp = translateDeltaIteration((DeltaIterationResultSet, ?>) dataSet);
}
else {
throw new RuntimeException("Error while creating the data flow plan for the program: Unknown operator or data set type: " + dataSet);
}
this.translated.put(dataSet, dataFlowOp);
// take care of broadcast variables
translateBcVariables(dataSet, dataFlowOp);
return dataFlowOp;
}
private eu.stratosphere.api.common.operators.SingleInputOperator, O, ?> translateSingleInputOperator(SingleInputOperator, ?, ?> op) {
@SuppressWarnings("unchecked")
SingleInputOperator typedOp = (SingleInputOperator) op;
@SuppressWarnings("unchecked")
DataSet typedInput = (DataSet) op.getInput();
Operator input = translate(typedInput);
eu.stratosphere.api.common.operators.SingleInputOperator, O, ?> dataFlowOp = typedOp.translateToDataFlow(input);
if (op instanceof UdfOperator> ) {
@SuppressWarnings("unchecked")
SingleInputUdfOperator udfOp = (SingleInputUdfOperator) op;
// set configuration parameters
Configuration opParams = udfOp.getParameters();
if (opParams != null) {
dataFlowOp.getParameters().addAll(opParams);
}
// set the semantic properties
dataFlowOp.setSemanticProperties(udfOp.getSematicProperties());
}
return dataFlowOp;
}
private eu.stratosphere.api.common.operators.DualInputOperator, ?, O, ?> translateTwoInputOperator(TwoInputOperator, ?, ?, ?> op) {
@SuppressWarnings("unchecked")
TwoInputOperator typedOp = (TwoInputOperator) op;
@SuppressWarnings("unchecked")
DataSet typedInput1 = (DataSet) op.getInput1();
@SuppressWarnings("unchecked")
DataSet typedInput2 = (DataSet) op.getInput2();
Operator input1 = translate(typedInput1);
Operator input2 = translate(typedInput2);
eu.stratosphere.api.common.operators.DualInputOperator, ?, O, ?> dataFlowOp = typedOp.translateToDataFlow(input1, input2);
if (op instanceof UdfOperator> ) {
@SuppressWarnings("unchecked")
TwoInputUdfOperator udfOp = (TwoInputUdfOperator) op;
// set configuration parameters
Configuration opParams = udfOp.getParameters();
if (opParams != null) {
dataFlowOp.getParameters().addAll(opParams);
}
// set the semantic properties
dataFlowOp.setSemanticProperties(udfOp.getSematicProperties());
}
return dataFlowOp;
}
private BulkIterationBase translateBulkIteration(BulkIterationResultSet> untypedIterationEnd) {
@SuppressWarnings("unchecked")
BulkIterationResultSet iterationEnd = (BulkIterationResultSet) untypedIterationEnd;
BulkIterationBase iterationOperator =
new BulkIterationBase(new UnaryOperatorInformation(iterationEnd.getType(), iterationEnd.getType()), "Bulk Iteration");
IterativeDataSet iterationHead = iterationEnd.getIterationHead();
translated.put(iterationHead, iterationOperator.getPartialSolution());
Operator translatedBody = translate(iterationEnd.getNextPartialSolution());
iterationOperator.setNextPartialSolution(translatedBody);
iterationOperator.setMaximumNumberOfIterations(iterationHead.getMaxIterations());
iterationOperator.setInput(translate(iterationHead.getInput()));
iterationOperator.getAggregators().addAll(iterationHead.getAggregators());
if(iterationEnd.getTerminationCriterion() != null) {
iterationOperator.setTerminationCriterion(translate(iterationEnd.getTerminationCriterion()));
}
return iterationOperator;
}
private DeltaIterationBase translateDeltaIteration(DeltaIterationResultSet, ?> untypedIterationEnd) {
@SuppressWarnings("unchecked")
DeltaIterationResultSet iterationEnd = (DeltaIterationResultSet) untypedIterationEnd;
DeltaIterationBase iterationOperator = new DeltaIterationBase(new BinaryOperatorInformation(iterationEnd.getType(), iterationEnd.getWorksetType(), iterationEnd.getType()),
iterationEnd.getKeyPositions(), "Unnamed Java Delta Iteration");
iterationOperator.setMaximumNumberOfIterations(iterationEnd.getMaxIterations());
DeltaIteration iterationHead = iterationEnd.getIterationHead();
DeltaIteration.SolutionSetPlaceHolder solutionSetPlaceHolder = iterationHead.getSolutionSet();
DeltaIteration.WorksetPlaceHolder worksetPlaceHolder = iterationHead.getWorkset();
translated.put(solutionSetPlaceHolder, iterationOperator.getSolutionSet());
translated.put(worksetPlaceHolder, iterationOperator.getWorkset());
Operator translatedSolutionSet = translate(iterationEnd.getNextSolutionSet());
Operator translatedWorkset = translate(iterationEnd.getNextWorkset());
iterationOperator.setNextWorkset(translatedWorkset);
iterationOperator.setSolutionSetDelta(translatedSolutionSet);
iterationOperator.setInitialSolutionSet(translate(iterationHead.getInitialSolutionSet()));
iterationOperator.setInitialWorkset(translate(iterationHead.getInitialWorkset()));
return iterationOperator;
}
private void translateBcVariables(DataSet> setOrOp, Operator> dataFlowOp) {
// check if this is actually an operator that could have broadcast variables
if (setOrOp instanceof UdfOperator) {
if (!(dataFlowOp instanceof AbstractUdfOperator, ?>)) {
throw new RuntimeException("Error while creating the data flow plan for the program: A UDF operation was not translated to a UDF operator.");
}
UdfOperator> udfOp = (UdfOperator>) setOrOp;
AbstractUdfOperator, ?> udfDataFlowOp = (AbstractUdfOperator, ?>) dataFlowOp;
for (Map.Entry> bcVariable : udfOp.getBroadcastSets().entrySet()) {
Operator> bcInput = translate(bcVariable.getValue());
udfDataFlowOp.setBroadcastVariable(bcVariable.getKey(), bcInput);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy