proguard.analysis.cpa.bam.BamTransferRelation Maven / Gradle / Ivy
Show all versions of proguard-core Show documentation
/*
* ProGuardCORE -- library to process Java bytecode.
*
* Copyright (c) 2002-2022 Guardsquare NV
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package proguard.analysis.cpa.bam;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Optional;
import java.util.Set;
import java.util.Stack;
import java.util.stream.Collectors;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import proguard.analysis.cpa.algorithms.CpaAlgorithm;
import proguard.analysis.cpa.defaults.BreadthFirstWaitlist;
import proguard.analysis.cpa.defaults.Cfa;
import proguard.analysis.cpa.defaults.NeverAbortOperator;
import proguard.analysis.cpa.defaults.ProgramLocationDependentReachedSet;
import proguard.analysis.cpa.defaults.StopSepOperator;
import proguard.analysis.cpa.interfaces.AbortOperator;
import proguard.analysis.cpa.interfaces.AbstractState;
import proguard.analysis.cpa.interfaces.CallEdge;
import proguard.analysis.cpa.interfaces.CfaEdge;
import proguard.analysis.cpa.interfaces.CfaNode;
import proguard.analysis.cpa.interfaces.Precision;
import proguard.analysis.cpa.interfaces.ProgramLocationDependent;
import proguard.analysis.cpa.interfaces.ProgramLocationDependentTransferRelation;
import proguard.analysis.cpa.interfaces.ReachedSet;
import proguard.analysis.cpa.interfaces.StopOperator;
import proguard.analysis.cpa.interfaces.TransferRelation;
import proguard.analysis.cpa.interfaces.Waitlist;
import proguard.analysis.datastructure.callgraph.Call;
import proguard.analysis.datastructure.callgraph.SymbolicCall;
import proguard.classfile.Signature;
/**
* This {@link TransferRelation} extends an analysis inter-procedurally. The transfer relation
* applies as close as possible the algorithms described in {@see
* https://dl.acm.org/doi/pdf/10.1145/3368089.3409718}. On a high level the task of this
* domain-independent transfer relation is to extend the intra-procedural domain-dependent transfer
* relation of a {@link CpaWithBamOperators} inter-procedurally. For more details on how the
* transfer relation works see {@link BamTransferRelation#generateAbstractSuccessors(AbstractState,
* Precision)}.
*
* @author Carlo Alberto Pozzoli
*/
public class BamTransferRelation<
CfaNodeT extends CfaNode,
CfaEdgeT extends CfaEdge,
SignatureT extends Signature>
implements TransferRelation {
private static final Logger log = LogManager.getLogger(BamTransferRelation.class);
private final CpaWithBamOperators wrappedCpa;
// TODO: maybe we don't need the cfa
private final Cfa cfa;
private final Stack stack = new Stack<>();
private boolean fixedPointReached = false;
private final CfaNodeT mainLocation;
private final BamCache cache;
private int maxCallStackDepth = -1;
private final StopOperator fixedPointStopOperator;
private final AbortOperator abortOperator;
/**
* Create a BAM transfer relation with an unlimited call stack.
*
* @param wrappedCpa a wrapped CPA with BAM operators
* @param cfa a control flow automaton
* @param mainFunction the signature of the main function of an analyzed program
* @param cache a cache for the block abstractions
*/
public BamTransferRelation(
CpaWithBamOperators wrappedCpa,
Cfa cfa,
SignatureT mainFunction,
BamCache cache) {
this(wrappedCpa, cfa, mainFunction, cache, -1, NeverAbortOperator.INSTANCE);
}
/**
* Create a BAM transfer relation with a specified maximum call stack depth. When the call stack
* meets its size limit the method call analysis is delegated to the wrapped intra-procedural
* transfer relation.
*
* @param wrappedCpa a wrapped CPA with BAM operators
* @param cfa a control flow automaton
* @param mainFunction the signature of the main function of an analyzed program
* @param cache a cache for the block abstractions
* @param maxCallStackDepth maximum depth of the call stack analyzed inter-procedurally. 0 means
* intra-procedural analysis. < 0 means no maximum depth.
* @param abortOperator an abort operator used for computing block abstractions
*/
public BamTransferRelation(
CpaWithBamOperators wrappedCpa,
Cfa cfa,
SignatureT mainFunction,
BamCache cache,
int maxCallStackDepth,
AbortOperator abortOperator) {
this.wrappedCpa = wrappedCpa;
this.cfa = cfa;
this.mainLocation = cfa.getFunctionEntryNode(mainFunction);
this.cache = cache;
this.fixedPointStopOperator = new StopSepOperator(wrappedCpa.getAbstractDomain());
this.maxCallStackDepth = maxCallStackDepth;
this.abortOperator = abortOperator;
}
// implementations for TransferRelation
/**
* In order to implement an inter-procedural analysis the abstract successors are calculated for
* the following cases:
*
* - Run the fixed point algorithm from the entry of the main method, continuing the analysis
* until a fixed point is reached (i.e. a function summary is provided for each function, also the
* recursive ones). If there are no recursive calls the fixed point is reached after the first
* iteration, while in case of recursive calls, depending on the domain-dependent transfer
* relation, they can be unrolled at each iteration until the fixed point is reached.
*
*
- Run the applyBlockAbstraction algorithm at every known procedure call. This algorithm
* takes care of retrieving the summary of the procedure from the cache (if available) or calls
* {@link CpaAlgorithm} recursively on the new function to compute and store in the cache the
* summary of the procedure when called from the specified {@link AbstractState} (i.e. different
* parameters or global state result in a different summary). Since we have no information on the
* code of the target of {@link SymbolicCall} this type of calls is delegated to the
* intra-procedural transfer relation instead of being analyzed by the applyBlockAbstraction
* algorithm. The result of the block abstraction on the intra-procedural level is simply
* generating a successor (or successors in case there are multiple call edges, e.g. for unknown
* runtime type of an object) abstract state that has as location the next node of the {@link
* Cfa}. The recursion can be limited at a maximum call stack size. The intra-procedural transfer
* relation is also applied in case the max call stack size is reached.
*
*
- Apply the underlying intra-procedural transfer relation to all the other non-exit nodes in
* order to act as the wrapped transfer relation when procedure calls are not involved.
*
*
- Exit nodes reached are the base cases of the recursion (along with the stop operator), in
* this case the transfer relation returns with no successors.
*/
@Override
public Collection generateAbstractSuccessors(
AbstractState abstractState, Precision precision) {
if (!(abstractState instanceof ProgramLocationDependent)) {
throw new IllegalArgumentException(
"The abstract state of type " + AbstractState.class + " is not location dependent");
}
CfaNodeT currentLocation =
((ProgramLocationDependent) abstractState)
.getProgramLocation();
Collection abstractSuccessors = new ArrayList<>();
// executed only on the first call
if (stack.isEmpty() && currentLocation.equals(mainLocation)) {
abstractSuccessors.addAll(fixedPoint(abstractState, currentLocation, precision));
}
// call location
else if (currentLocation.getLeavingEdges().stream().anyMatch(e -> e instanceof CallEdge)) {
// there might be multiple calls, in this case we generate successors for all of them and
// eventually let the merge operator handle the results
for (CfaEdgeT callEdge :
currentLocation.getLeavingEdges().stream()
.filter(e -> e instanceof CallEdge)
.collect(Collectors.toList())) {
// if the call target is an unknown node (i.e., either the method code of a symbolic call is
// not available or a library call is excluded from the analysis)
// similarly if the maximum call depth is reached we delegate to the intra-procedural
// wrapped transfer relation instead of calling recursively applyBlockAbstraction
if ((maxCallStackDepth < 0 || stack.size() < maxCallStackDepth)
&& !callEdge.getTarget().isUnknownNode()) {
abstractSuccessors.addAll(
applyBlockAbstraction(
abstractState, precision, (CfaEdge & CallEdge) callEdge));
} else {
abstractSuccessors.addAll(
((ProgramLocationDependentTransferRelation) wrappedCpa.getTransferRelation())
.generateEdgeAbstractSuccessors(abstractState, callEdge, precision));
}
}
}
// non-call instruction, apply wrapped inter-procedural transfer relation
// the exit node case is not checked specifically because in case of exit node the wrapped
// intra-procedural transfer relation does not produce successors
else {
abstractSuccessors.addAll(
wrappedCpa.getTransferRelation().generateAbstractSuccessors(abstractState, precision));
}
return abstractSuccessors;
}
/**
* Returns the maximal call stack depth. If negative the maximum call stack depth is unlimited.
*/
public int getMaxCallStackDepth() {
return maxCallStackDepth;
}
/** Returns the wrapped domain-dependent intra-procedural CPA. */
public CpaWithBamOperators getWrappedCpa() {
return wrappedCpa;
}
/**
* By default the {@link Waitlist} used by the applyBlockAbstraction algorithm is a {@link
* BreadthFirstWaitlist}, this method can be overridden to provide a different waitlist.
*/
protected Waitlist getWaitlist() {
return new BreadthFirstWaitlist();
}
/**
* By default the {@link ReachedSet} used by the applyBlockAbstraction algorithm is a {@link
* ProgramLocationDependentReachedSet}, this method can be overridden to provide a different
* reached set.
*/
protected ReachedSet getReachedSet() {
return new ProgramLocationDependentReachedSet<>();
}
/** Returns BAM cache storing analysis result for various method calls. */
public BamCache getCache() {
return cache;
}
/** Returns the CFA used by the transfer relation. */
public Cfa getCfa() {
return cfa;
}
private Collection fixedPoint(
AbstractState entryState, CfaNodeT currentLocation, Precision precision) {
Collection blockResult = Collections.emptyList();
while (!fixedPointReached) {
fixedPointReached = true;
blockResult = applyBlockAbstraction(entryState, precision, null);
}
return blockResult;
}
private & CallEdge>
Collection applyBlockAbstraction(
AbstractState callState, Precision precision, CfaCallEdgeT callEdge) {
ReachedSet reached = getReachedSet();
Waitlist waitlist = getWaitlist();
Call call = callEdge == null ? null : callEdge.getCall();
CfaNodeT entryNode =
call != null
? cfa.getFunctionEntryNode(callEdge.getTarget().getSignature())
: ((ProgramLocationDependent) callState)
.getProgramLocation();
SignatureT currentFunction = entryNode.getSignature();
// TODO: maybe we can have a better way to identify that the function was called from fixedPoint
// than having a null call
// apply the reduce operator to the entry state (i.e. discard information not relevant in the
// called procedure context (e.g. local variables of the caller)
// this step is not necessary if we are calculating the block abstraction of the main method
AbstractState reducedEntryState =
call != null
? wrappedCpa
.getReduceOperator()
.reduce(callState, cfa.getFunctionEntryNode(currentFunction), call)
: callState;
Optional previousCall =
stack.stream()
.filter(
x ->
x.function.equals(currentFunction)
&& wrappedCpa
.getAbstractDomain()
.isLessOrEqual(reducedEntryState, x.entryState))
.map(x -> x.entryState)
.findFirst();
// check if there are calls to the same function on the stack that cover the current call
if (previousCall.isPresent()) {
// if this is not the first call, get from the cache the result of the unrolling of the
// recursive procedure that has been already calculated
BlockAbstraction cacheEntry = cache.get(previousCall.get(), precision, currentFunction);
if (cacheEntry != null) {
reached = cacheEntry.getReachedSet();
}
// if this is the first unrolling the recursive call is not analyzed
else {
stack.peek().incompleteCallStates.add(callState);
fixedPointReached = false;
}
} else {
// get previously calculated results from the cache
BlockAbstraction cacheEntry = cache.get(reducedEntryState, precision, currentFunction);
if (cacheEntry != null) {
// TODO: these might be different waitlist/reached set types if the cache is initialized
// externally (things that is not currently possible). Shall we add all elements from the
// them
// instead of copying?
reached = cacheEntry.getReachedSet();
waitlist = cacheEntry.getWaitlist();
} else {
reached.add(reducedEntryState);
waitlist.add(reducedEntryState);
}
stack.push(new StackEntry(currentFunction, reducedEntryState));
// analyze the current procedure call with the CPA algorithm, this is the recursive step of
// the BAM CPA
// n.b. if the procedure has been already analyzed completely for the input the CPA algorithm
// will return immediately
new CpaAlgorithm(
this,
wrappedCpa.getMergeOperator(),
wrappedCpa.getStopOperator(),
wrappedCpa.getPrecisionAdjustment())
.run(reached, waitlist, abortOperator);
StackEntry stackEntry = stack.pop();
// since the fixed point has not been reached all the calls not analyzed are added to the
// waitlist
if (!stackEntry.incompleteCallStates.isEmpty()) {
// the call to the current method will be added to the waitlist of the caller
if (!stack.isEmpty()) {
stack.peek().incompleteCallStates.add(callState);
}
for (AbstractState incompleteCallState : stackEntry.incompleteCallStates) {
waitlist.add(incompleteCallState);
}
}
cacheEntry = cache.get(reducedEntryState, precision, currentFunction);
// check if the fixed point has not been reached at this iteration (i.e. some new exit state
// is not covered by one already calculated)
if (cacheEntry != null) {
// TODO: as above, we might want to add all instead of copying
ReachedSet reachedOld = cacheEntry.getReachedSet();
for (AbstractState reachedState : reached.asCollection()) {
CfaNodeT reachedLocation =
((ProgramLocationDependent) reachedState)
.getProgramLocation();
if (reachedLocation.getSignature().equals(currentFunction)
&& reachedLocation.isExitNode()
&& !(fixedPointStopOperator.stop(reachedState, reachedOld.asCollection(), null))) {
if (!stack.isEmpty()) {
stack.peek().incompleteCallStates.add(callState);
}
fixedPointReached = false;
break;
}
}
}
cache.put(
reducedEntryState, precision, currentFunction, new BlockAbstraction(reached, waitlist));
}
Collection exitStates = reached.asCollection();
// TODO: as before, maybe we can have a better way to identify that the function was called from
// fixedPoint
if (call != null) {
// reconstruct the next state of the caller procedure applying the expand and reduce
// operators.
exitStates =
exitStates.stream()
.filter(
e ->
((ProgramLocationDependent) e)
.getProgramLocation()
.isExitNode())
.map(e -> wrappedCpa.getExpandOperator().expand(callState, e, entryNode, call))
.map(e -> wrappedCpa.getRebuildOperator().rebuild(callState, e))
.collect(Collectors.toCollection(LinkedHashSet::new));
}
return exitStates;
}
private class StackEntry {
public final SignatureT function;
public final AbstractState entryState;
public final Set incompleteCallStates = new LinkedHashSet<>();
public StackEntry(SignatureT function, AbstractState entryState) {
this.function = function;
this.entryState = entryState;
}
}
}