proguard.util.CallGraphWalker Maven / Gradle / Ivy
Show all versions of proguard-core Show documentation
/*
* ProGuardCORE -- library to process Java bytecode.
*
* Copyright (c) 2002-2021 Guardsquare NV
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package proguard.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.function.Predicate;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import proguard.analysis.Metrics;
import proguard.analysis.Metrics.MetricType;
import proguard.analysis.datastructure.callgraph.Call;
import proguard.analysis.datastructure.callgraph.CallGraph;
import proguard.analysis.datastructure.callgraph.Node;
import proguard.classfile.MethodSignature;
/**
* Generic utilities to traverse the call graph.
*
* @author Samuel Hopstock
*/
public class CallGraphWalker {
private static final Logger log = LogManager.getLogger(CallGraphWalker.class);
/** Call graph strands are no longer explored after a maximum distance from the original root. */
public static final int MAX_DEPTH_DEFAULT = 100;
/**
* Once the call graph reaches a maximum width, no more nodes are added to the worklist of the
* next level. E.g. suppose this limit is 5 and we have already discovered the following call
* graph:
*
*
*
* level2_0 <-- level1_0 <-- root
* level2_1 <------| |
* level2_2 <------| |
* |
* level2_3 <-- level1_1 <----|
* level2_4 <------|
*
*
*
* If level1_1
has any more known predecessors, level 2 of the call graph would have
* width 6, which is more than the 5 allowed nodes. Thus, level1_1
is marked as
* truncated and its other predecessors are discarded.
*/
public static final int MAX_WIDTH_DEFAULT = 100;
/**
* Analogous to Soot's getReachableMethods()
: Starting from one particular method,
* all methods that are transitively reachable are collected in a single set. The exploration
* stops after no more reachable methods have been found, or the reachable call graph exceeds
* {@link #MAX_DEPTH_DEFAULT} and {@link #MAX_WIDTH_DEFAULT}.
*
* @param callGraph The {@link CallGraph} to use as the basis for this exploration
* @param start The method that is to be used as the exploration root
* @param maxDepth See {@link #MAX_DEPTH_DEFAULT}
* @param maxWidth See {@link #MAX_WIDTH_DEFAULT}
* @return A set of all transitively reachable methods
*/
public static Set getSuccessors(
CallGraph callGraph, MethodSignature start, int maxDepth, int maxWidth) {
Set visited = new LinkedHashSet<>();
explore(
callGraph,
start,
CallGraphWalker::calculateSuccessors,
n -> visited.add(n.signature),
maxDepth,
maxWidth);
return visited;
}
/**
* Like {@link #getSuccessors(CallGraph, MethodSignature, int, int)} but using default values for
* max depth and max width.
*
* @param callGraph The {@link CallGraph} to use as the basis for this exploration
* @param start The method that is to be used as the exploration root
* @return A set of all transitively reachable methods
*/
public static Set getSuccessors(CallGraph callGraph, MethodSignature start) {
return getSuccessors(callGraph, start, MAX_DEPTH_DEFAULT, MAX_WIDTH_DEFAULT);
}
/**
* Inverse of {@link #getSuccessors(CallGraph, MethodSignature)}: Starting from one particular
* method, all methods that can transitively reach it are collected in a single set. The
* exploration stops after no more incoming methods have been found, or the inversely-reachable
* call graph exceeds {@link #MAX_DEPTH_DEFAULT} and {@link #MAX_WIDTH_DEFAULT}.
*
* @param callGraph The {@link CallGraph} to use as the basis for this exploration
* @param start The method that is to be used as the exploration root
* @param maxDepth See {@link #MAX_DEPTH_DEFAULT}
* @param maxWidth See {@link #MAX_WIDTH_DEFAULT}
* @return A set of all methods that can transitively reach the root
*/
public static Set getPredecessors(
CallGraph callGraph, MethodSignature start, int maxDepth, int maxWidth) {
Set visited = new LinkedHashSet<>();
explore(
callGraph,
start,
CallGraphWalker::calculatePredecessors,
n -> visited.add(n.signature),
maxDepth,
maxWidth);
return visited;
}
/**
* Like {@link #getPredecessors(CallGraph, MethodSignature, int, int)} but using default values
* for max depth and max width.
*
* @param callGraph The {@link CallGraph} to use as the basis for this exploration
* @param start The method that is to be used as the exploration root
* @return A set of all methods that can transitively reach the root
*/
public static Set getPredecessors(CallGraph callGraph, MethodSignature start) {
return getPredecessors(callGraph, start, MAX_DEPTH_DEFAULT, MAX_WIDTH_DEFAULT);
}
/**
* Interactively explore the outgoing call graph (breadth-first) of a specific method.
*
* Outgoing call graph edges are transitively visited, one level of the call graph at a time.
* E.g. if we have the following graph:
*
*
*
* level2_0 <-- level1_0 <-- root
* level2_1 <------| |
* level2_2 <------| |
* |
* level2_3 <-- level1_1 <----|
* level2_4 <------|
*
*
*
* In this case, level1_0
and level1_1
are visited first, then
* level2_*
and so on. The user of this method provides a callback that will be executed
* for every newly visited path. This handler receives a {@link Node} that represents e.g.
* level2_1
and contains references to all its predecessors that have been visited in this
* particular path. Like this, the user can evaluate the whole call chain that led to any specific
* method being reachable from the starting point. Graph limits {@link #MAX_DEPTH_DEFAULT} and
* {@link #MAX_WIDTH_DEFAULT} are applicable. Any paths that are truncated due to any limit being
* reached, are marked with {@link Node#isTruncated}
*
* If you are only interested in which methods are reachable from a start method, but do not
* care about the individual paths that make this possible, you should use {@link
* #getSuccessors(CallGraph, MethodSignature)} instead.
*
* @param callGraph The {@link CallGraph} to use as the basis of this exploration
* @param start The method that is to be used as the exploration root
* @param handler The callback function that is invoked for newly visited paths. If this returns
* false, this specific path is not explored any further, without marking it as truncated.
* @param maxDepth See {@link #MAX_DEPTH_DEFAULT}
* @param maxWidth See {@link #MAX_WIDTH_DEFAULT}
* @return The {@link Node} representing the start method and all its successors
*/
public static Node successorPathsAccept(
CallGraph callGraph,
MethodSignature start,
Predicate handler,
int maxDepth,
int maxWidth) {
return explore(
callGraph, start, CallGraphWalker::calculateSuccessors, handler, maxDepth, maxWidth);
}
/**
* Like {@link #successorPathsAccept(CallGraph, MethodSignature, Predicate, int, int)} but using
* default values for max depth and max width.
*
* @param callGraph The {@link CallGraph} to use as the basis of this exploration
* @param start The method that is to be used as the exploration root
* @param handler The callback function that is invoked for newly visited paths. If this returns
* false, this specific path is not explored any further, without marking it as truncated.
* @return The {@link Node} representing the start method and all its successors
*/
public static Node successorPathsAccept(
CallGraph callGraph, MethodSignature start, Predicate handler) {
return successorPathsAccept(callGraph, start, handler, MAX_DEPTH_DEFAULT, MAX_WIDTH_DEFAULT);
}
/**
* Interactively explore the incoming call graph (breadth-first) of a specific method.
*
* Inverse of {@link #successorPathsAccept(CallGraph, MethodSignature, Predicate)}: Explores
* all methods that can reach the starting point and notifies the user's handler of newly found
* paths.
*
* @param callGraph The {@link CallGraph} to use as the basis of this exploration
* @param start The method that is to be used as the exploration root
* @param handler The callback function that is invoked for newly visited paths. If this returns
* false, this specific path is not explored any further, without marking it as truncated.
* @param maxDepth See {@link #MAX_DEPTH_DEFAULT}
* @param maxWidth See {@link #MAX_WIDTH_DEFAULT}
* @return The {@link Node} representing the start method and all its predecessors
*/
public static Node predecessorPathsAccept(
CallGraph callGraph,
MethodSignature start,
Predicate handler,
int maxDepth,
int maxWidth) {
return explore(
callGraph, start, CallGraphWalker::calculatePredecessors, handler, maxDepth, maxWidth);
}
/**
* Like {@link #predecessorPathsAccept(CallGraph, MethodSignature, Predicate, int, int)} but using
* default values for max depth and max width.
*
* @param callGraph The {@link CallGraph} to use as the basis of this exploration
* @param start The method that is to be used as the exploration root
* @param handler The callback function that is invoked for newly visited paths. If this returns
* false, this specific path is not explored any further, without marking it as truncated.
* @return The {@link Node} representing the start method and all its predecessors
*/
public static Node predecessorPathsAccept(
CallGraph callGraph, MethodSignature start, Predicate handler) {
return predecessorPathsAccept(callGraph, start, handler, MAX_DEPTH_DEFAULT, MAX_WIDTH_DEFAULT);
}
/**
* Generic call graph exploration function. The reachable methods are visited in a breadth-first
* way. The direction of this procedure (i.e. if the call graph is explored through outgoing or
* incoming edges) is determined by a user-provided function.
*
* @param callGraph The {@link CallGraph} to use as the basis of this exploration
* @param start The method that is to be used as the exploration root
* @param getNext After all methods of the current depth have been visited, this method is used to
* determine the methods of the next level. E.g. if we want to visit the call graph in the
* outgoing/successor direction, this function should yield all direct successors for nodes in
* the current level. See {@link #calculateSuccessors(CallGraph, Node)} or its opposite {@link
* #calculatePredecessors(CallGraph, Node)} to see example implementations.
* @param handler The callback function that is invoked for newly visited paths. If this returns
* false, this specific path is not explored any further, without marking it as truncated.
* @param maxDepth See {@link #MAX_DEPTH_DEFAULT}
* @param maxWidth See {@link #MAX_WIDTH_DEFAULT}
* @return The {@link Node} representing the start method and all its successors/predecessors
* (depending on the concrete implementation of getNext
*/
private static Node explore(
CallGraph callGraph,
MethodSignature start,
BiFunction> getNext,
Predicate handler,
int maxDepth,
int maxWidth) {
Node root = new Node(start);
ArrayList worklist = new ArrayList<>();
worklist.add(root);
int currLevel = 0;
while (!worklist.isEmpty()) {
if (currLevel >= maxDepth) {
Metrics.increaseCount(MetricType.CALL_GRAPH_RECONSTRUCTION_MAX_DEPTH_REACHED);
worklist.forEach(n -> n.isTruncated = true);
break;
}
worklist = currentLevelAccept(callGraph, getNext, handler, worklist, maxWidth);
currLevel++;
}
return root;
}
/**
* Visit all the nodes of the current depth in the call graph, forward them to the provided
* handler function and make sure that the next level is prepared.
*
* @param callGraph The {@link CallGraph} to use as the basis of this exploration
* @param getNext After all methods of the current depth have been visited, this method is used to
* determine the methods of the next level. E.g. if we want to visit the call graph in the
* outgoing/successor direction, this function should yield all direct successors for nodes in
* the current level. See {@link #calculateSuccessors(CallGraph, Node)} or its opposite {@link
* #calculatePredecessors(CallGraph, Node)} to see example implementations.
* @param handler The callback function that is invoked for newly visited paths. If this returns
* false, this specific path is not explored any further, without marking it as truncated.
* @param maxWidth See {@link #MAX_WIDTH_DEFAULT}
* @return The nodes of the next level, making sure that the depth and width limits of the call
* graph won't be exceeded
*/
private static ArrayList currentLevelAccept(
CallGraph callGraph,
BiFunction> getNext,
Predicate handler,
ArrayList worklist,
int maxWidth) {
ArrayList nextLevel = new ArrayList<>();
for (Node curr : worklist) {
if (!handler.test(curr)) {
// The handler wants us to stop exploring this path without marking it as truncated
continue;
}
for (Node next : getNext.apply(callGraph, curr)) {
if (nextLevel.size() >= maxWidth) {
Metrics.increaseCount(MetricType.CALL_GRAPH_RECONSTRUCTION_MAX_WIDTH_REACHED);
next.isTruncated = true;
} else {
nextLevel.add(next);
}
}
}
return nextLevel;
}
/** Return all direct predecessors of curr in this callgraph. */
private static Set calculatePredecessors(CallGraph callGraph, Node curr) {
Set predecessors = new LinkedHashSet<>();
for (Call i : callGraph.incoming.getOrDefault(curr.signature, Collections.emptySet())) {
if (!(i.caller.signature instanceof MethodSignature)) {
log.warn("Call graph edge {} does not have a method as the caller member!", i);
continue;
}
// Only add the caller to the chain if this doesn't create a loop
if (!curr.successorsContain((MethodSignature) i.caller.signature)) {
Node prev = new Node((MethodSignature) i.caller.signature);
curr.predecessors.add(prev);
curr.incomingCallLocations.add(i.caller);
prev.successors.add(curr);
prev.outgoingCallLocations.add(i.caller);
predecessors.add(prev);
}
}
return predecessors;
}
/** Return all direct successors of curr in this callgraph. */
private static Set calculateSuccessors(CallGraph callGraph, Node curr) {
Set successors = new LinkedHashSet<>();
for (Call i : callGraph.outgoing.getOrDefault(curr.signature, Collections.emptySet())) {
// Only add the caller to the chain if this doesn't create a loop
if (!curr.predecessorsContain(i.getTarget())) {
Node successor = new Node(i.getTarget());
curr.successors.add(successor);
curr.outgoingCallLocations.add(i.caller);
successor.predecessors.add(curr);
successor.incomingCallLocations.add(i.caller);
successors.add(successor);
}
}
return successors;
}
}