All Downloads are FREE. Search and download functionalities are using the official Maven repository.

proguard.analysis.CallResolver Maven / Gradle / Ivy

Go to download

ProGuardCORE is a free library to read, analyze, modify, and write Java class files.

There is a newer version: 9.1.7
Show newest version
/*
 * ProGuardCORE -- library to process Java bytecode.
 *
 * Copyright (c) 2002-2021 Guardsquare NV
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package proguard.analysis;

import java.lang.reflect.Modifier;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import proguard.analysis.CallResolver.Metrics.MetricType;
import proguard.analysis.datastructure.CodeLocation;
import proguard.analysis.datastructure.Location;
import proguard.analysis.datastructure.callgraph.Call;
import proguard.analysis.datastructure.callgraph.CallGraph;
import proguard.analysis.datastructure.callgraph.ConcreteCall;
import proguard.analysis.datastructure.callgraph.SymbolicCall;
import proguard.backport.LambdaExpression;
import proguard.backport.LambdaExpressionCollector;
import proguard.classfile.*;
import proguard.classfile.attribute.Attribute;
import proguard.classfile.attribute.CodeAttribute;
import proguard.classfile.attribute.visitor.AllAttributeVisitor;
import proguard.classfile.attribute.visitor.AttributeVisitor;
import proguard.classfile.constant.AnyMethodrefConstant;
import proguard.classfile.constant.Constant;
import proguard.classfile.constant.InvokeDynamicConstant;
import proguard.classfile.instruction.ConstantInstruction;
import proguard.classfile.instruction.Instruction;
import proguard.classfile.instruction.visitor.InstructionVisitor;
import proguard.classfile.util.ClassUtil;
import proguard.classfile.visitor.ClassVisitor;
import proguard.classfile.visitor.LineNumberFinder;
import proguard.classfile.visitor.MultiClassVisitor;
import proguard.evaluation.BasicInvocationUnit;
import proguard.evaluation.ExecutingInvocationUnit;
import proguard.evaluation.InvocationUnit;
import proguard.evaluation.PartialEvaluator;
import proguard.evaluation.value.*;
import proguard.evaluation.value.ParticularValueFactory.ReferenceValueFactory;
import proguard.util.PartialEvaluatorUtils;

/**
 * Collects all method invocations inside the analyzed methods.
 *
 * 

All method invocation instructions that appear in the bytecode are inspected, and their * actual target method is calculated. Java has several invocation instructions, * performing virtual, static, dynamic, interface and special calls. While most of these * instructions have a constant operand specifying a method name, the actual method that * will be called at runtime depends on multiple factors. Sometimes, e.g. when using * virtual calls, the invocation target depends on the specific type of the first parameter * on the stack, the so-called this pointer.

* *

This call analyzer performs a lookup process that adheres to the Java Virtual Machine * specification. Being a static analysis, 100% precision cannot be guaranteed, as the specific * type of variables at a specific program point is not always known in advance. But using * the {@link PartialEvaluator} in combination with intraprocedural possible type analysis of * {@link MultiTypedReferenceValue} objects, the resulting call graph should be a superset of * the actual calls happening at runtime. This makes it a complete but potentially unsound analysis.

* *

In addition to resolving the call target, this analyzer also reconstructs the corresponding * arguments and the return value. All of the collected information is wrapped in a {@link Call} * object and passed to subscribed {@link CallVisitor}s.

* * @author Samuel Hopstock */ public class CallResolver implements AttributeVisitor, ClassVisitor, InstructionVisitor { private static final Logger log = LogManager.getLogger(CallResolver.class); /** * Used in {@link #handleInvokeDynamic(CodeLocation, InvokeDynamicConstant)} to * resolve lambda expression invocations. */ private final Map lambdaExpressionMap = new HashMap<>(); private final LambdaExpressionCollector lambdaExpressionCollector = new LambdaExpressionCollector(lambdaExpressionMap); /** * Used to fill the {@link Call#controlFlowDependent} flag. */ private final DominatorCalculator dominatorCalculator; private final ClassPool programClassPool; private final ClassPool libraryClassPool; private final CallGraph callGraph; private final boolean clearCallValuesAfterVisit; private final boolean useDominatorAnalysis; private final List visitors; /** * Calculates concrete values that are created by the bytecode and stored in * variables or on the stack. Needed to reconstruct the actual arguments and * return value of method calls. */ private final PartialEvaluator particularValueEvaluator; private boolean particularValueEvaluationSuccessful; /** * Only calculates the type of values on the stack or in variables, but is * capable of handling cases where this type may be different depending * on the actual control flow path taken during runtime. Needed for * resolving all possible call targets of virtual calls that depend * on the type of the this pointer during runtime. */ private final PartialEvaluator multiTypeValueEvaluator; private boolean multiTypeEvaluationSuccessful; /** * Create a new call resolver. * * @param programClassPool {@link ClassPool} containing the classes whose * calls should be analyzed. * @param libraryClassPool Auxiliary {@link ClassPool} containing framework classes. * Their calls are not resolved, but the class structure information * (i.e. contained methods) is needed when resolving calls whose * target lies in such a library class. * @param callGraph The {@link CallGraph} to fill with all discovered {@link Call}s. * @param clearCallValuesAfterVisit If true, {@link Call#clearValues()} will be called after * {@link CallVisitor#visitCall(Call)}. This makes it possible * to analyze arguments and the return value of calls while still * adding them to a {@link CallGraph} afterwards, as call graph analysis * itself usually only requires the call locations and their targets, * not the arguments or return value. * @param useDominatorAnalysis If true, a dominator analysis is carried out * using the {@link DominatorCalculator} for each * method, in order to be able to fill the * {@link Call#controlFlowDependent} flag. * @param evaluateAllCode See {@link PartialEvaluator.Builder#setEvaluateAllCode(boolean)}. * @param includeSubClasses If true, virtual calls on class fields, parameters and return values of other methods * will take all possible subclasses into account. * This is necessary for a more complete * call graph, because the runtime type of these objects is not controlled by the current * method. E.g. a method that declares its return type to be of type A might also return * an object of type B in case B extends A. The same is true for class fields and parameters, * so in order to really find all potential calls, this circumstance needs to be modeled. * For objects of declared type {@link java.lang.Object} this will be skipped, as the fact * that every single Java class is a subclass of object would lead to an immense blow-up * of the call graph. * @param maxPartialEvaluations See {@link PartialEvaluator.Builder#stopAnalysisAfterNEvaluations(int)}. * @param visitors {@link CallVisitor}s that are interested in the * results of this analysis. */ public CallResolver(ClassPool programClassPool, ClassPool libraryClassPool, CallGraph callGraph, boolean clearCallValuesAfterVisit, boolean useDominatorAnalysis, boolean evaluateAllCode, boolean includeSubClasses, int maxPartialEvaluations, CallVisitor... visitors) { this.programClassPool = programClassPool; this.libraryClassPool = libraryClassPool; this.callGraph = callGraph; this.clearCallValuesAfterVisit = clearCallValuesAfterVisit; this.useDominatorAnalysis = useDominatorAnalysis; this.visitors = Arrays.asList(visitors); dominatorCalculator = new DominatorCalculator(); // Initialize the multitype evaluator. ValueFactory multiTypeValueFactory = includeSubClasses ? new MultiTypedReferenceValueFactory(true, this.programClassPool, this.libraryClassPool) : new MultiTypedReferenceValueFactory(); InvocationUnit multiTypeValueInvocationUnit = new BasicInvocationUnit(multiTypeValueFactory); multiTypeValueEvaluator = PartialEvaluator.Builder.create() .setValueFactory(multiTypeValueFactory) .setInvocationUnit(multiTypeValueInvocationUnit) .setEvaluateAllCode(evaluateAllCode) .stopAnalysisAfterNEvaluations(maxPartialEvaluations) .build(); // Initialize the particular value evaluator ValueFactory particularValueFactory = new ParticularValueFactory(new ArrayReferenceValueFactory(), new ReferenceValueFactory()); InvocationUnit particularValueInvocationUnit = new ExecutingInvocationUnit(particularValueFactory); particularValueEvaluator = PartialEvaluator.Builder.create() .setValueFactory(particularValueFactory) .setInvocationUnit(particularValueInvocationUnit) .setEvaluateAllCode(evaluateAllCode) .stopAnalysisAfterNEvaluations(maxPartialEvaluations) .build(); } @Override public void visitAnyClass(Clazz clazz) { // Only interested in program classes. } @Override public void visitProgramClass(ProgramClass programClass) { lambdaExpressionMap.clear(); programClass.accept(new MultiClassVisitor(lambdaExpressionCollector, new AllAttributeVisitor(true, this))); } @Override public void visitAnyAttribute(Clazz clazz, Attribute attribute) { // Only interested in code attributes. } @Override public void visitCodeAttribute(Clazz clazz, Method method, CodeAttribute codeAttribute) { // Exceptions while executing the partial evaluators are fine, the virtual // call resolving and argument/return value reconstruction handle these // cases gracefully. try { // Evaluate the code. multiTypeEvaluationSuccessful = false; multiTypeValueEvaluator.visitCodeAttribute0(clazz, method, codeAttribute); multiTypeEvaluationSuccessful = true; } catch (Exception e) { Metrics.increaseCount(MetricType.PARTIAL_EVALUATOR_EXCEPTION); log.debug("Exception during evaluating types", e); } try { // Evaluate the code. particularValueEvaluationSuccessful = false; particularValueEvaluator.visitCodeAttribute0(clazz, method, codeAttribute); particularValueEvaluationSuccessful = true; } catch (Exception e) { Metrics.increaseCount(MetricType.PARTIAL_EVALUATOR_EXCEPTION); log.debug("Exception during evaluating values", e); } if (useDominatorAnalysis) { dominatorCalculator.visitCodeAttribute(clazz, method, codeAttribute); } codeAttribute.instructionsAccept(clazz, method, this); } @Override public void visitAnyInstruction(Clazz clazz, Method method, CodeAttribute codeAttribute, int offset, Instruction instruction) { // Only interested in ConstantInstructions } @Override public void visitConstantInstruction(Clazz clazz, Method method, CodeAttribute codeAttribute, int offset, ConstantInstruction constantInstruction) { // Get the line number. LineNumberFinder lineNumberFinder = new LineNumberFinder(offset); codeAttribute.attributesAccept(clazz, method, lineNumberFinder); CodeLocation location = new CodeLocation(clazz, method, offset, lineNumberFinder.lineNumber); Constant constant = ((ProgramClass) clazz).getConstant(constantInstruction.constantIndex); if (constantInstruction.opcode == Instruction.OP_INVOKEDYNAMIC && constant instanceof InvokeDynamicConstant) { handleInvokeDynamic(location, (InvokeDynamicConstant) constant); } else if (constant instanceof AnyMethodrefConstant) { AnyMethodrefConstant ref = (AnyMethodrefConstant) constant; switch (constantInstruction.opcode) { case Instruction.OP_INVOKESTATIC: handleInvokeStatic(location, (AnyMethodrefConstant) constant); break; case Instruction.OP_INVOKEVIRTUAL: case Instruction.OP_INVOKEINTERFACE: handleVirtualMethods(location, ref, constantInstruction.opcode); break; case Instruction.OP_INVOKESPECIAL: handleInvokeSpecial(location, ref); break; default: Metrics.increaseCount(MetricType.UNSUPPORTED_OPCODE); log.warn("Unsupported invocation opcode " + constantInstruction.opcode + " at " + location); } } } private void addCall(CodeLocation location, String targetClass, String targetMethod, String targetDescriptor, int throwsNullptr, byte invocationOpcode, boolean runtimeTypeDependent) { boolean alwaysInvoked = true; if (useDominatorAnalysis) { alwaysInvoked = dominatorCalculator.dominates(location.offset, DominatorCalculator.EXIT_NODE_OFFSET); } Call call = instantiateCall(location, targetClass, targetMethod, targetDescriptor, throwsNullptr, invocationOpcode, !alwaysInvoked, runtimeTypeDependent); initArgumentsAndReturnValue(call); visitors.forEach(d -> d.visitCall(call)); if (clearCallValuesAfterVisit) { call.clearValues(); } if (callGraph != null) { callGraph.addCall(call); } } /** * Creates the appropriate object for the requested call * ({@link ConcreteCall} in case the target method is already * present in the class pool, otherwise a {@link SymbolicCall}). */ private Call instantiateCall(CodeLocation location, String targetClass, String targetMethod, String targetDescriptor, int throwsNullptr, byte invocationOpcode, boolean controlFlowDependent, boolean runtimeTypeDependent) { Call call; Clazz containingClass = programClassPool.getClass(targetClass); if (containingClass == null) { containingClass = libraryClassPool.getClass(targetClass); } if (containingClass == null) { call = new SymbolicCall(location, new MethodSignature(targetClass, targetMethod, targetDescriptor), throwsNullptr, invocationOpcode, controlFlowDependent, runtimeTypeDependent); Metrics.increaseCount(MetricType.SYMBOLIC_CALL); } else { Method method = containingClass.findMethod(targetMethod, targetDescriptor); if (method == null) { call = new SymbolicCall(location, new MethodSignature(targetClass, targetMethod, targetDescriptor), throwsNullptr, invocationOpcode, controlFlowDependent, runtimeTypeDependent); Metrics.increaseCount(MetricType.SYMBOLIC_CALL); } else { call = new ConcreteCall(location, containingClass, method, throwsNullptr, invocationOpcode, controlFlowDependent, runtimeTypeDependent); Metrics.increaseCount(MetricType.CONCRETE_CALL); if ((method.getAccessFlags() & AccessConstants.ABSTRACT) != 0) { Metrics.increaseCount(MetricType.CALL_TO_ABSTRACT_METHOD); } } } return call; } private void initArgumentsAndReturnValue(Call call) { boolean isStaticCall = call.invocationOpcode == Instruction.OP_INVOKESTATIC || call.invocationOpcode == Instruction.OP_INVOKEDYNAMIC; MethodSignature target = call.getTarget(); List arguments = getArguments(call.caller, target, isStaticCall); if (!isStaticCall && !arguments.isEmpty()) { // Handle the instance pointer separately. call.setInstance(arguments.remove(0)); } call.setArguments(arguments); if (!target.descriptor.getPrettyReturnType().equals("void") && particularValueEvaluationSuccessful) { call.setReturnValue(PartialEvaluatorUtils.getStackValue(particularValueEvaluator.getStackAfter(call.caller.offset), 0)); } } private List getArguments(CodeLocation location, MethodSignature invokedMethodSig, boolean isStaticCall) { if (invokedMethodSig.descriptor.argumentTypes == null) { log.error("Argument types list of {} is null!", invokedMethodSig); return Collections.emptyList(); } if (!(multiTypeEvaluationSuccessful && particularValueEvaluationSuccessful)) { int numArguments = invokedMethodSig.descriptor.argumentTypes.size(); if (!isStaticCall) { // For virtual calls we have the instance pointer as a first argument. numArguments++; } return new ArrayList<>(Collections.nCopies(numArguments, null)); } List args = new ArrayList<>(); int stackOffset = 0; for (int argNumber = invokedMethodSig.descriptor.argumentTypes.size() - 1; argNumber >= 0; argNumber--) { String argType = invokedMethodSig.descriptor.argumentTypes.get(argNumber); // Usually we are interested in concrete values for the arguments, i.e. we take them // from the particular value evaluator. But it can happen that this evaluator doesn't // know the argument value because it depends on some control flow specifics. Still, // we might at least know what type(s) the argument can have, which is better than nothing. // In that case the multitype evaluator needs to be consulted. Thus, we first ask the // multitype evaluator if the argument is known to have more than one possible type. // If this is the case, we can already assume that there is no known particular value // for it. Otherwise, we get the particular value as initially planned. Value stackTop = PartialEvaluatorUtils.getStackBefore(multiTypeValueEvaluator, location.offset, stackOffset); if (!(stackTop instanceof MultiTypedReferenceValue && ((MultiTypedReferenceValue) stackTop).getPotentialTypes().size() > 1)) { stackTop = PartialEvaluatorUtils.getStackBefore(particularValueEvaluator, location.offset, stackOffset); } // Make sure to reverse the parameter ordering. args.add(0, stackTop); stackOffset += ClassUtil.internalTypeSize(argType); } if (!isStaticCall) { // For virtual calls we have the instance pointer as a first argument. Value instance = PartialEvaluatorUtils.getStackBefore(multiTypeValueEvaluator, location.offset, stackOffset); if (!(instance instanceof MultiTypedReferenceValue && ((MultiTypedReferenceValue) instance).getPotentialTypes().size() > 1)) { instance = PartialEvaluatorUtils.getStackBefore(particularValueEvaluator, location.offset, stackOffset); } args.add(0, instance); } return args; } /** * Resolve invokedynamic instructions. See * JVM spec §6.5.invokedynamic. * In Java, this kind of invocation is used for lambda expressions, thus this handler depends on {@link * LambdaExpressionCollector} having run beforehand. * * @param location The {@link Location} of the instruction. * @param constant The {@link InvokeDynamicConstant} (JVM spec: "symbolic reference R") * containing the index to the corresponding bootstrap method that * identifies the actual call target. */ private void handleInvokeDynamic(CodeLocation location, InvokeDynamicConstant constant) { if (lambdaExpressionMap.containsKey(constant)) { LambdaExpression target = lambdaExpressionMap.get(constant); addCall(location, target.invokedClassName, target.invokedMethodName, target.invokedMethodDesc, Value.NEVER, Instruction.OP_INVOKEDYNAMIC, false ); } else { log.debug("invokedynamic without matching lambda expression at {}", location); } } /** * Resolve invokestatic instructions. See JVM spec §6.5.invokestatic * * @param location The {@link Location} of the instruction. * @param constant The {@link AnyMethodrefConstant} specifying the exact method to be invoked. */ private void handleInvokeStatic(CodeLocation location, AnyMethodrefConstant constant) { addCall(location, constant.getClassName(location.clazz), constant.getName(location.clazz), constant.getType(location.clazz), Value.NEVER, Instruction.OP_INVOKESTATIC, false ); } /** * Resolve invokespecial instructions. They are always used for super.x() calls * and constructor invocations. According to the * JVM spec §6.5.invokespecial, * this opcode is also sometimes used for private method calls, but so far I haven't seen that in the wild. * * @param location The {@link Location} of the instruction. * @param ref The {@link AnyMethodrefConstant} specifying name and descriptor * of the method to be invoked. */ private void handleInvokeSpecial(CodeLocation location, AnyMethodrefConstant ref) { Set targets = resolveInvokeSpecial(location.clazz, ref); if (targets.isEmpty()) { Metrics.increaseCount(MetricType.MISSING_METHODS); log.debug("Missing method {}", ref.getClassName(location.clazz)); } else { String name = ref.getName(location.clazz); String descriptor = ref.getType(location.clazz); for (String target : targets) { addCall(location, target, name, descriptor, Value.NEVER, Instruction.OP_INVOKESPECIAL, false ); } } } /** * The invokespecial resolution algorithm, annotated with JVM spec citations where appropriate, * so that the specified lookup process can easily be compared to this * implementation. * * @param callingClass JVM spec: "current class". * @param ref The {@link AnyMethodrefConstant} specifying name * and descriptor of the method to be invoked. * @return The fully qualified names of potential call target classes * (usually just one, but see {@link #resolveFromSuperinterfaces(Clazz, String, String)} * for details on when there might be multiple). */ private Set resolveInvokeSpecial(Clazz callingClass, AnyMethodrefConstant ref) { String name = ref.getName(callingClass); String descriptor = ref.getType(callingClass); // "If all of the following are true, let C be the direct superclass of the current class:" Clazz c; // ACC_SUPER flag is set (should always be the case, legacy flag). See JVM Spec §4.1. if ((callingClass.getAccessFlags() & AccessConstants.SUPER) != 0 && !name.equals(ClassConstants.METHOD_NAME_INIT) // Not an instance initialization method. && ref.referencedClass != null // Referenced class available. && (ref.referencedClass.getAccessFlags() & AccessConstants.INTERFACE) == 0 // Not an interface reference. && callingClass.extends_(ref.referencedClass) && !callingClass.getName().equals(ref.referencedClass.getName())) // Referenced class is strictly a superclass of the current class. { c = callingClass.getSuperClass(); } else { // "Otherwise, let C be the class or interface named by the symbolic reference". c = ref.referencedClass; } if (c == null) { // In this case, we don't have the referenced class in our class pool, so we can't be more specific here. String className = ref.getClassName(callingClass); Metrics.increaseCount(MetricType.MISSING_CLASS); log.debug("Missing class {}", className); return Collections.singleton(className); } // 1. (lookup in C directly) if (c.findMethod(name, descriptor) != null) { return Collections.singleton(c.getName()); } Optional target = Optional.empty(); if ((c.getAccessFlags() & AccessConstants.INTERFACE) == 0) { // 2. (C is a class -> check superclasses transitively) target = resolveFromSuperclasses(c, name, descriptor); } else { // 3. (C is an interface -> Check if java.lang.Object has a suitable method) for (java.lang.reflect.Method m : Object.class.getMethods()) { if ((m.getModifiers() & Modifier.PUBLIC) != 0 && m.getName().equals(name) && getDescriptor(m).equals(descriptor)) { target = Optional.of(ClassConstants.NAME_JAVA_LANG_OBJECT); break; } } } // 4. (Otherwise find maximally specific method from superinterfaces) return target.map(Collections::singleton) .orElseGet(() -> resolveFromSuperinterfaces(c, name, descriptor)); } /** * Get the Descriptor of a {@link java.lang.reflect.Method}. */ public static String getDescriptor(java.lang.reflect.Method m) { List parameters = Arrays.stream(m.getParameterTypes()) .map(Class::getName) .collect(Collectors.toList()); return ClassUtil.internalMethodDescriptor(m.getReturnType().getName(), parameters); } /** * Handle invokevirtual and invokeinterface instructions, * as they use more or less the same lookup process. * They are used for "normal" method calls, i.e. any instance method. * The actual invocation target depends on the type of the this pointer during runtime (first/bottom stack parameter). * In order to get a good estimate of this type, the lookup process depends on the analysis by a * {@link PartialEvaluator} that yields {@link MultiTypedReferenceValue} elements. * * @param location The {@link Location} of this instruction. * @param ref The {@link AnyMethodrefConstant} specifying name * and descriptor of the method to be invoked. */ private void handleVirtualMethods(CodeLocation location, AnyMethodrefConstant ref, byte invocationOpcode) { String name = ref.getName(location.clazz); String descriptor = ref.getType(location.clazz); // Estimate the runtime type of the this pointer: Intraprocedural analysis performed by the partial evaluator. int argumentCount = ClassUtil.internalMethodParameterSize(descriptor, false); Value thisPtr = PartialEvaluatorUtils.getStackBefore(multiTypeValueEvaluator, location.offset, argumentCount - 1); if (!(thisPtr instanceof MultiTypedReferenceValue)) { // If the partial evaluation has not finished, this is to be expected and does not warrant an error message. if (multiTypeEvaluationSuccessful) { String classInfo = (thisPtr == null ? "null" : thisPtr.getClass().toString()); Metrics.increaseCount(MetricType.PARTIAL_EVALUATOR_VALUE_IMPRECISE); log.debug("Virtual call at {}: this-pointer is not a multi typed reference value but {}", location, classInfo); } } else { MultiTypedReferenceValue multiTypeThisPtr = (MultiTypedReferenceValue) thisPtr; for (TypedReferenceValue possibleType : multiTypeThisPtr.getPotentialTypes()) { if (possibleType.isNull() == Value.ALWAYS) { // This will always throw a NullPointerException, but we still want this info in the call graph. addCall(location, ref.getClassName(location.clazz), ref.getName(location.clazz), ref.getType(location.clazz), Value.ALWAYS, invocationOpcode, multiTypeThisPtr.getPotentialTypes().size() > 1 ); continue; } Clazz referencedClass; if (ClassUtil.isInternalArrayType(possibleType.getType())) { // If anybody wants to call methods on arrays, we need to check java.lang.Object. referencedClass = libraryClassPool.getClass(ClassConstants.NAME_JAVA_LANG_OBJECT); } else { referencedClass = possibleType.getReferencedClass(); // Sometimes the type doesn't have a reference to the class yet. // In this case we should try to look it up manually in both class pools. if (referencedClass == null) { referencedClass = programClassPool.getClass(possibleType.getType()); } if (referencedClass == null) { referencedClass = libraryClassPool.getClass(possibleType.getType()); } } if (referencedClass == null) { // Class still wasn't found, add it to the missing classes. Metrics.increaseCount(MetricType.MISSING_CLASS); log.info("Missing class {}", possibleType.getType()); } Set targetClasses = resolveVirtual(location.clazz, referencedClass, ref); if (targetClasses.isEmpty()) { if (referencedClass != null) { Metrics.increaseCount(MetricType.MISSING_METHODS); log.debug("Missing method {}", ref.getClassName(location.clazz)); } targetClasses = Collections.singleton(possibleType.getType()); } for (String targetClass : targetClasses) { addCall(location, targetClass, name, descriptor, multiTypeThisPtr.isNull(), invocationOpcode, multiTypeThisPtr.getPotentialTypes().size() > 1 ); } } } } /** * The invokevirtual and invokeinterface resolution algorithm, annotated with * JVM * spec §6.5.invokevirtual citations where appropriate, so that the specified lookup * process can easily be compared to this implementation. * * @param callingClass JVM spec: "current class". * @param thisPtrType The type of the this pointer of the * call (JVM spec: "objectref"). * @param ref The {@link AnyMethodrefConstant} specifying name * and descriptor of the method to be invoked. * @return The fully qualified names of potential call target clases * (usually just one, but see {@link #resolveFromSuperinterfaces(Clazz, String, String)} * for details on when there might be multiple). */ private Set resolveVirtual(Clazz callingClass, Clazz thisPtrType, AnyMethodrefConstant ref) { if (thisPtrType == null) { return Collections.emptySet(); } String name = ref.getName(callingClass); String descriptor = ref.getType(callingClass); // 1. + 2. (Search the class belonging to the this pointer type and all its transitive superclasses) return resolveFromSuperclasses(thisPtrType, name, descriptor) .map(Collections::singleton) // 3. (Otherwise find maximally specific method from superinterfaces) .orElseGet(() -> resolveFromSuperinterfaces(thisPtrType, name, descriptor)); } /** * Search for the invocation target in a specific class and recursively in all superclasses. * * @param start The {@link Clazz} where the lookup is to be started. * @param name The name of the method. * @param descriptor The method descriptor. * @return An {@link Optional} with the fully qualified name of the class * containing the target method, empty if it couldn't be found. */ private Optional resolveFromSuperclasses(Clazz start, String name, String descriptor) { Clazz curr = start; while (curr != null) { Method targetMethod = curr.findMethod(name, descriptor); if (targetMethod != null && (targetMethod.getAccessFlags() & AccessConstants.ABSTRACT) == 0) { return Optional.of(curr.getName()); } curr = curr.getSuperClass(); } return Optional.empty(); } /** * Search for a maximally specific default implementation in all superinterfaces of a class. * This step is potentially unintuitive and difficult to grasp, see * JVM spec §5.4.3.3 * for more information, as well as this great * blog post * concerning the resolution pitfalls. The following is based on the information on those * websites. * * @param start The {@link Clazz} whose superinterfaces are to be searched. * @param name The target method name. * @param descriptor The target method descriptor. * @return The fully qualified name of the class(es) that contain the method to be invoked. * Be aware that purely from a JVM point of view, this choice can be ambiguous, * in which case it just chooses the candidate randomly. * Here, we don't want to gamble, but rather want to add call graph edges for every possibility, * if this ever happens. Javac ensures that such a case never occurs, * but who knows how the bytecode has been generated, so this possibility is implemented just in case. */ private Set resolveFromSuperinterfaces(Clazz start, String name, String descriptor) { Set superInterfaces = new HashSet<>(); getSuperinterfaces(start, superInterfaces); // Get all transitive superinterfaces that have a matching method. Set applicableInterfaces = superInterfaces.stream() .filter(i -> { Method m = i.findMethod(name, descriptor); return m != null && (m.getAccessFlags() & (AccessConstants.PRIVATE | AccessConstants.STATIC | AccessConstants.ABSTRACT)) == 0; }) .collect(Collectors.toSet()); // Tricky part: Find the "maximally specific" implementation, // i.e. the lowest applicable interface in the type hierarchy. for (Clazz iface : new HashSet<>(applicableInterfaces)) { superInterfaces.clear(); getSuperinterfaces(iface, superInterfaces); // If an applicable interface overrides another applicable interface, it is more specific than the // one being overridden -> the overridden interface is no longer applicable. superInterfaces.forEach(applicableInterfaces::remove); } return applicableInterfaces.stream().map(Clazz::getName).collect(Collectors.toSet()); } /** * Get the transitive superinterfaces of a class/interface recursively. * * @param start The {@link Clazz} where the collection process is to be started. * @param accumulator The current set of superinterfaces, so that only one set is constructed at runtime. */ private void getSuperinterfaces(Clazz start, Set accumulator) { for (int i = 0; i < start.getInterfaceCount(); i++) { Clazz iface = start.getInterface(i); if (iface == null) { Metrics.increaseCount(MetricType.MISSING_CLASS); log.info("Missing class {}", start.getName()); continue; } accumulator.add(iface); getSuperinterfaces(iface, accumulator); } if (start.getSuperClass() != null) { getSuperinterfaces(start.getSuperClass(), accumulator); } } public static class Builder { private final ClassPool programClassPool; private final ClassPool libraryClassPool; private final CallGraph callGraph; private final CallVisitor[] visitors; private boolean clearCallValuesAfterVisit = true; private boolean useDominatorAnalysis = false; private boolean evaluateAllCode = false; private boolean includeSubClasses = false; private int maxPartialEvaluations = 50; public Builder(ClassPool programClassPool, ClassPool libraryClassPool, CallGraph callGraph, CallVisitor... visitors) { this.programClassPool = programClassPool; this.libraryClassPool = libraryClassPool; this.callGraph = callGraph; this.visitors = visitors; } /** * If true, {@link Call#clearValues()} will be called after * {@link CallVisitor#visitCall(Call)}. This makes it possible * to analyze arguments and the return value of calls while still * adding them to a {@link CallGraph} afterwards, as call graph analysis * itself usually only requires the call locations and their targets, * not the arguments or return value. */ public Builder setClearCallValuesAfterVisit(boolean clearCallValuesAfterVisit) { this.clearCallValuesAfterVisit = clearCallValuesAfterVisit; return this; } /** * If true, a dominator analysis is carried out using the {@link DominatorCalculator} * for each method, in order to be able to fill the {@link Call#controlFlowDependent} flag. */ public Builder setUseDominatorAnalysis(boolean useDominatorAnalysis) { this.useDominatorAnalysis = useDominatorAnalysis; return this; } /** * See {@link PartialEvaluator.Builder#setEvaluateAllCode(boolean)}. */ public Builder setEvaluateAllCode(boolean evaluateAllCode) { this.evaluateAllCode = evaluateAllCode; return this; } /** * If true, virtual calls on class fields, parameters and return values of other methods * will take all possible subclasses into account. * *

This is necessary for a more complete * call graph, because the runtime type of these objects is not controlled by the current * method. E.g. a method that declares its return type to be of type A might also return * an object of type B in case B extends A. The same is true for class fields and parameters, * so in order to really find all potential calls, this circumstance needs to be modeled. * For objects of declared type {@link java.lang.Object} this will be skipped, as the fact * that every single Java class is a subclass of object would lead to an immense blow-up * of the call graph.

*/ public Builder setIncludeSubClasses(boolean includeSubClasses) { this.includeSubClasses = includeSubClasses; return this; } /** * See {@link PartialEvaluator.Builder#stopAnalysisAfterNEvaluations(int)}. */ public Builder setMaxPartialEvaluations(int maxPartialEvaluations) { this.maxPartialEvaluations = maxPartialEvaluations; return this; } public CallResolver build() { return new CallResolver(programClassPool, libraryClassPool, callGraph, clearCallValuesAfterVisit, useDominatorAnalysis, evaluateAllCode, includeSubClasses, maxPartialEvaluations, visitors); } } /** * Utility to collect statistical information about the call resolution process. */ public static class Metrics { /** * Constants which are used as metric types. */ public enum MetricType { MISSING_CLASS, MISSING_METHODS, UNSUPPORTED_OPCODE, PARTIAL_EVALUATOR_EXCEPTION, PARTIAL_EVALUATOR_VALUE_IMPRECISE, SYMBOLIC_CALL, CONCRETE_CALL, CALL_TO_ABSTRACT_METHOD } public static final Map counts = new TreeMap<>(); public static void increaseCount(MetricType type) { counts.merge(type, 1, Integer::sum); } /** * Get all collected data as a string and clear it afterwards. */ public static String flush() { StringBuilder result = new StringBuilder("Call resolver Metrics:\n"); counts.forEach((type, count) -> result.append(type.name()) .append(": ") .append(count) .append("\n")); counts.clear(); return result.toString(); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy