All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.api.java.sca.UdfAnalyzer Maven / Gradle / Ivy

There is a newer version: 1.20.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.api.java.sca;

import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.functions.CoGroupFunction;
import org.apache.flink.api.common.functions.CrossFunction;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.FlatJoinFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.operators.DualInputSemanticProperties;
import org.apache.flink.api.common.operators.Keys;
import org.apache.flink.api.common.operators.Keys.ExpressionKeys;
import org.apache.flink.api.common.operators.SemanticProperties;
import org.apache.flink.api.common.operators.SingleInputSemanticProperties;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.functions.SemanticPropUtil;
import org.apache.flink.api.java.sca.TaggedValue.Input;

import org.apache.flink.shaded.asm5.org.objectweb.asm.Type;
import org.apache.flink.shaded.asm5.org.objectweb.asm.tree.MethodNode;

import org.slf4j.Logger;

import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;

import static org.apache.flink.api.java.sca.UdfAnalyzerUtils.convertTypeInfoToTaggedValue;
import static org.apache.flink.api.java.sca.UdfAnalyzerUtils.findMethodNode;
import static org.apache.flink.api.java.sca.UdfAnalyzerUtils.mergeReturnValues;
import static org.apache.flink.api.java.sca.UdfAnalyzerUtils.removeUngroupedInputsFromContainer;

/**
 * Implements a Static Code Analyzer (SCA) that uses the ASM framework
 * for interpreting Java bytecode of Flink UDFs. The analyzer is build on
 * top of ASM's BasicInterpreter. Instead of ASM's BasicValues, it introduces
 * TaggedValues which extend BasicValue and allows for appending interesting
 * information to values. Interesting values such as inputs, collectors, or
 * constants are tagged such that a tracking of atomic input fields through the
 * entire UDF (until the function returns or calls collect()) is possible.
 *
 * 

The implementation is as conservative as possible meaning that for cases * or bytecode instructions that haven't been considered the analyzer * will fallback to the ASM library (which removes TaggedValues). */ @Internal public class UdfAnalyzer { // necessary to prevent endless loops and stack overflows private static final int MAX_NESTING = 20; // general information about the UDF that is available before analysis takes place private final Method baseClassMethod; private final boolean hasCollector; private final boolean isBinary; private final boolean isIterableInput; private final boolean isReduceFunction; private final boolean isFilterFunction; private final Class udfClass; private final String externalUdfName; private final String internalUdfClassName; private final TypeInformation in1Type; private final TypeInformation in2Type; private final TypeInformation outType; private final Keys keys1; private final Keys keys2; // flag if code errors should throw an CodeErrorException private final boolean throwErrorExceptions; // list of all values added with a "collect()" call private final List collectorValues; // list of all hints that can be returned after analysis private final List hints; private boolean warning = false; // stages for capturing and tagging the initial BasicValues private int state = STATE_CAPTURE_RETURN; static final int STATE_CAPTURE_RETURN = 0; static final int STATE_CAPTURE_THIS = 1; static final int STATE_CAPTURE_INPUT1 = 2; static final int STATE_CAPTURE_INPUT2 = 3; static final int STATE_CAPTURE_COLLECTOR = 4; static final int STATE_END_OF_CAPTURING = 5; static final int STATE_END_OF_ANALYZING = 6; // flag that indicates if the "hasNext()" call of an input iterator has returned "TRUE" // and can now return "FALSE" if assumption has already been used private boolean iteratorTrueAssumptionApplied; // merged return value of all return statements in the UDF private TaggedValue returnValue; // statistics for object creation hinting private int newOperationCounterOverall; private int newOperationCounterTopLevel; // stored FilterFunction input for later modification checking private TaggedValue filterInputCopy; private TaggedValue filterInputRef; public UdfAnalyzer(Class baseClass, Class udfClass, String externalUdfName, TypeInformation in1Type, TypeInformation in2Type, TypeInformation outType, Keys keys1, Keys keys2, boolean throwErrorExceptions) { baseClassMethod = baseClass.getDeclaredMethods()[0]; this.udfClass = udfClass; this.externalUdfName = externalUdfName; this.internalUdfClassName = Type.getInternalName(udfClass); this.in1Type = in1Type; this.in2Type = in2Type; this.outType = outType; this.keys1 = keys1; this.keys2 = keys2; this.throwErrorExceptions = throwErrorExceptions; if (baseClass == CoGroupFunction.class) { hasCollector = true; isBinary = true; isIterableInput = true; isReduceFunction = false; isFilterFunction = false; iteratorTrueAssumptionApplied = true; } else if (baseClass == CrossFunction.class) { hasCollector = false; isBinary = true; isIterableInput = false; isReduceFunction = false; isFilterFunction = false; iteratorTrueAssumptionApplied = true; } else if (baseClass == FlatJoinFunction.class) { hasCollector = true; isBinary = true; isIterableInput = false; isReduceFunction = false; isFilterFunction = false; iteratorTrueAssumptionApplied = true; } else if (baseClass == FlatMapFunction.class) { hasCollector = true; isBinary = false; isIterableInput = false; isReduceFunction = false; isFilterFunction = false; iteratorTrueAssumptionApplied = true; } else if (baseClass == GroupReduceFunction.class) { hasCollector = true; isBinary = false; isIterableInput = true; isReduceFunction = false; isFilterFunction = false; iteratorTrueAssumptionApplied = false; } else if (baseClass == JoinFunction.class) { hasCollector = false; isBinary = true; isIterableInput = false; isReduceFunction = false; isFilterFunction = false; iteratorTrueAssumptionApplied = true; } else if (baseClass == MapFunction.class) { hasCollector = false; isBinary = false; isIterableInput = false; isReduceFunction = false; isFilterFunction = false; iteratorTrueAssumptionApplied = true; } else if (baseClass == ReduceFunction.class) { hasCollector = false; isBinary = false; isIterableInput = false; isReduceFunction = true; isFilterFunction = false; iteratorTrueAssumptionApplied = true; } else if (baseClass == FilterFunction.class) { hasCollector = false; isBinary = false; isIterableInput = false; isReduceFunction = false; isFilterFunction = true; iteratorTrueAssumptionApplied = true; } // TODO MapPartitionFunction, GroupCombineFunction and CombineFunction not implemented yet else { throw new UnsupportedOperationException("Unsupported operator."); } if (hasCollector) { collectorValues = new ArrayList(); } else { collectorValues = null; } hints = new ArrayList(); } public int getState() { return state; } public void setState(int state) { this.state = state; } public boolean isUdfBinary() { return isBinary; } public boolean isIteratorTrueAssumptionApplied() { return iteratorTrueAssumptionApplied; } public void applyIteratorTrueAssumption() { iteratorTrueAssumptionApplied = true; } public void incrNewOperationCounters(boolean topLevel) { newOperationCounterOverall++; if (topLevel) { newOperationCounterTopLevel++; } } public boolean hasUdfCollector() { return hasCollector; } public boolean hasUdfIterableInput() { return isIterableInput; } public boolean isUdfReduceFunction() { return isReduceFunction; } public String getInternalUdfClassName() { return internalUdfClassName; } public List getCollectorValues() { return collectorValues; } public boolean analyze() throws CodeAnalyzerException { if (state == STATE_END_OF_ANALYZING) { throw new IllegalStateException("Analyzing is already done."); } boolean discardReturnValues = false; if (isIterableInput) { if (keys1 == null || (keys2 == null && isBinary)) { throw new IllegalArgumentException("This type of function requires key information for analysis."); } else if (!(keys1 instanceof ExpressionKeys) || (!(keys2 instanceof ExpressionKeys) && isBinary)) { // TODO currently only ExpressionKeys are supported as keys discardReturnValues = true; } } try { final Object[] mn = findMethodNode(internalUdfClassName, baseClassMethod); final NestedMethodAnalyzer nma = new NestedMethodAnalyzer(this, (String) mn[1], (MethodNode) mn[0], null, MAX_NESTING, true); final TaggedValue result = nma.analyze(); setState(STATE_END_OF_ANALYZING); // special case: FilterFunction if (isFilterFunction) { discardReturnValues = true; // check for input modification if (!filterInputCopy.equals(filterInputRef)) { addHintOrThrowException("Function modifies the input. This can lead to unexpected behaviour during runtime."); } } if (!discardReturnValues) { // merge return values of a collector if (hasCollector) { returnValue = mergeReturnValues(collectorValues); } else { returnValue = result; } // remove ungrouped inputs from result if UDF has iterators // or is a reduce function if ((isIterableInput || isReduceFunction) && returnValue != null) { if (returnValue.canContainFields()) { removeUngroupedInputsFromContainer(returnValue); } else if (returnValue.isInput() && !returnValue.isGrouped()) { returnValue = null; } } } // any return value is invalid else { returnValue = null; } } catch (Exception e) { Throwable cause = e.getCause(); while (cause != null && !(cause instanceof CodeErrorException)) { cause = cause.getCause(); } if (cause instanceof CodeErrorException || e instanceof CodeErrorException) { throw new CodeErrorException("Function code contains obvious errors. " + "If you think the code analysis is wrong at this point you can " + "disable the entire code analyzer in ExecutionConfig or add" + " @SkipCodeAnalysis to your function to disable the analysis.", (cause != null) ? cause : e); } throw new CodeAnalyzerException("Exception occurred during code analysis.", e); } return true; } public SemanticProperties getSemanticProperties() { final SemanticProperties sp; if (isBinary) { sp = new DualInputSemanticProperties(); if (returnValue != null) { String[] ff1Array = null; final String ff1 = returnValue.toForwardedFieldsExpression(Input.INPUT_1); if (ff1 != null && ff1.length() > 0) { ff1Array = new String[]{ff1}; } String[] ff2Array = null; final String ff2 = returnValue.toForwardedFieldsExpression(Input.INPUT_2); if (ff2 != null && ff2.length() > 0) { ff2Array = new String[]{ff2}; } SemanticPropUtil.getSemanticPropsDualFromString((DualInputSemanticProperties) sp, ff1Array, ff2Array, null, null, null, null, in1Type, in2Type, outType, true); } } else { sp = new SingleInputSemanticProperties(); if (returnValue != null) { String[] ffArray = null; final String ff = returnValue.toForwardedFieldsExpression(Input.INPUT_1); if (ff != null && ff.length() > 0) { ffArray = new String[]{ff}; } SemanticPropUtil.getSemanticPropsSingleFromString((SingleInputSemanticProperties) sp, ffArray, null, null, in1Type, outType, true); } } return sp; } public void addSemanticPropertiesHints() { boolean added = false; if (returnValue != null) { if (isBinary) { final String ff1 = returnValue.toForwardedFieldsExpression(Input.INPUT_1); if (ff1 != null && ff1.length() > 0) { added = true; hints.add("Possible annotation: " + "@ForwardedFieldsFirst(\"" + ff1 + "\")"); } final String ff2 = returnValue.toForwardedFieldsExpression(Input.INPUT_2); if (ff2 != null && ff2.length() > 0) { added = true; hints.add("Possible annotation: " + "@ForwardedFieldsSecond(\"" + ff2 + "\")"); } } else { final String ff = returnValue.toForwardedFieldsExpression(Input.INPUT_1); if (ff != null && ff.length() > 0) { added = true; hints.add("Possible annotation: " + "@ForwardedFields(\"" + ff + "\")"); } } } if (!added) { hints.add("Possible annotations: none."); } } public void printToLogger(Logger log) { StringBuilder sb = new StringBuilder(); sb.append("Code analysis result for '" + externalUdfName + " (" + udfClass.getName() + ")':"); sb.append("\nNumber of object creations: " + newOperationCounterTopLevel + " in method / " + newOperationCounterOverall + " transitively"); for (String hint : hints) { sb.append('\n'); sb.append(hint); } if (warning) { log.warn(sb.toString()); } else { log.info(sb.toString()); } } public TaggedValue getInput1AsTaggedValue() { final int[] groupedKeys; if (keys1 != null) { groupedKeys = keys1.computeLogicalKeyPositions(); } else { groupedKeys = null; } final TaggedValue input1 = convertTypeInfoToTaggedValue(Input.INPUT_1, in1Type, "", null, groupedKeys); // store the input and a copy of it to check for modification afterwards if (isFilterFunction) { filterInputRef = input1; filterInputCopy = input1.copy(); } return input1; } public TaggedValue getInput2AsTaggedValue() { final int[] groupedKeys; if (keys2 != null) { groupedKeys = keys2.computeLogicalKeyPositions(); } else { groupedKeys = null; } return convertTypeInfoToTaggedValue(Input.INPUT_2, in2Type, "", null, groupedKeys); } private void addHintOrThrowException(String msg) { if (throwErrorExceptions) { throw new CodeErrorException(externalUdfName + ": " + msg); } else { warning = true; hints.add(msg); } } public void handleNullReturn() { addHintOrThrowException("Function returns 'null' values. This can lead to errors during runtime."); } public void handlePutStatic() { addHintOrThrowException("Function modifies static fields. This can lead to unexpected behaviour during runtime."); } public void handleInvalidTupleAccess() { addHintOrThrowException("Function contains tuple accesses with invalid indexes. This can lead to errors during runtime."); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy