de.fraunhofer.aisec.cpg.passes.ControlFlowSensitiveDFGPass.kt Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cpg-core Show documentation
Show all versions of cpg-core Show documentation
A simple library to extract a code property graph out of source code. It has support for multiple passes that can extend the analysis after the graph is constructed.
/*
* Copyright (c) 2022, Fraunhofer AISEC. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* $$$$$$\ $$$$$$$\ $$$$$$\
* $$ __$$\ $$ __$$\ $$ __$$\
* $$ / \__|$$ | $$ |$$ / \__|
* $$ | $$$$$$$ |$$ |$$$$\
* $$ | $$ ____/ $$ |\_$$ |
* $$ | $$\ $$ | $$ | $$ |
* \$$$$$ |$$ | \$$$$$ |
* \______/ \__| \______/
*
*/
package de.fraunhofer.aisec.cpg.passes
import de.fraunhofer.aisec.cpg.TranslationContext
import de.fraunhofer.aisec.cpg.graph.*
import de.fraunhofer.aisec.cpg.graph.declarations.*
import de.fraunhofer.aisec.cpg.graph.edge.Properties
import de.fraunhofer.aisec.cpg.graph.edge.PropertyEdge
import de.fraunhofer.aisec.cpg.graph.edge.partial
import de.fraunhofer.aisec.cpg.graph.statements.DeclarationStatement
import de.fraunhofer.aisec.cpg.graph.statements.ForEachStatement
import de.fraunhofer.aisec.cpg.graph.statements.ReturnStatement
import de.fraunhofer.aisec.cpg.graph.statements.Statement
import de.fraunhofer.aisec.cpg.graph.statements.expressions.*
import de.fraunhofer.aisec.cpg.helpers.*
import de.fraunhofer.aisec.cpg.passes.order.DependsOn
import kotlin.collections.set
import kotlin.contracts.ExperimentalContracts
import kotlin.contracts.contract
/**
* This pass determines the data flows of References which refer to a VariableDeclaration (not a
* field) while considering the control flow of a function. After this path, only such data flows
* are left which can occur when following the control flow (in terms of the EOG) of the program.
*/
@OptIn(ExperimentalContracts::class)
@DependsOn(EvaluationOrderGraphPass::class)
@DependsOn(DFGPass::class)
open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass(ctx) {
class Configuration(
/**
* This specifies the maximum complexity (as calculated per
* [Statement.cyclomaticComplexity]) a [FunctionDeclaration] must have in order to be
* considered.
*/
var maxComplexity: Int? = null
) : PassConfiguration()
override fun cleanup() {
// Nothing to do
}
/** We perform the actions for each [FunctionDeclaration]. */
override fun accept(node: Node) {
// For now, we only execute this for function declarations, we will support all EOG starters
// in the future.
if (node !is FunctionDeclaration) {
return
}
// Skip empty functions
if (node.body == null) {
return
}
// Calculate the complexity of the function and see, if it exceeds our threshold
val max = passConfig()?.maxComplexity
val c = node.body?.cyclomaticComplexity ?: 0
if (max != null && c > max) {
log.info(
"Ignoring function ${node.name} because its complexity (${c}) is greater than the configured maximum (${max})"
)
return
}
log.debug("Handling {} (complexity: {})", node.name, c)
clearFlowsOfVariableDeclarations(node)
val startState = DFGPassState>()
startState.declarationsState.push(node, PowersetLattice(identitySetOf()))
node.parameters.forEach {
startState.declarationsState.push(it, PowersetLattice(identitySetOf(it)))
}
val finalState =
iterateEOG(node.nextEOGEdges, startState, ::transfer) as? DFGPassState ?: return
removeUnreachableImplicitReturnStatement(
node,
finalState.returnStatements.values.flatMap {
it.elements.filterIsInstance()
}
)
for ((key, value) in finalState.generalState) {
if (key is TupleDeclaration) {
// We need a little hack for tuple statements to set the index. We have the
// outer part (i.e., the tuple) here, but we generate the DFG edges to the
// elements. We have the indices here, so it's amazingly easy to find the partial
// target.
key.elements.forEach { element ->
element.addAllPrevDFG(
value.elements.filterNot {
(it is VariableDeclaration || it is ParameterDeclaration) && key == it
},
granularity = partial(element)
)
}
} else {
key.addAllPrevDFG(
value.elements.filterNot {
(it is VariableDeclaration || it is ParameterDeclaration) && key == it
}
)
}
}
}
/**
* Removes all the incoming and outgoing DFG edges for each variable declaration in the block of
* code [node].
*/
protected fun clearFlowsOfVariableDeclarations(node: Node) {
for (varDecl in node.variables /*.filter { it !is FieldDeclaration }*/) {
varDecl.clearPrevDFG()
varDecl.clearNextDFG()
}
val allChildrenOfFunction = node.allChildren()
for (varDecl in node.parameters) {
// Clear only prev and next inside this function!
varDecl.nextDFG
.filter { it in allChildrenOfFunction }
.forEach { varDecl.removeNextDFG(it) }
varDecl.prevDFG
.filter { it in allChildrenOfFunction }
.forEach { varDecl.removePrevDFG(it) }
}
}
/**
* Computes the previous write access of [currentEdge].end if it is a [Reference] or
* [ValueDeclaration] based on the given [state] (which maps all variables to its last write
* instruction). It also updates the [state] if [currentEdge].end performs a write-operation to
* a variable.
*
* It further determines unnecessary implicit return statement which are added by some frontends
* even if every path reaching this point already contains a return statement.
*/
protected open fun transfer(
currentEdge: PropertyEdge,
state: State>,
worklist: Worklist, Node, Set>
): State> {
// We will set this if we write to a variable
val writtenDeclaration: Declaration?
val currentNode = currentEdge.end
val doubleState = state as DFGPassState
if (currentNode is VariableDeclaration) {
val initializer = currentNode.initializer
if (initializer != null) {
// A variable declaration with an initializer => The initializer flows to the
// declaration. This also affects tuples. We split it up later.
state.push(currentNode, PowersetLattice(identitySetOf(initializer)))
}
if (currentNode is TupleDeclaration) {
// For a tuple declaration, we write the elements in this statement. We do not
// really care about the tuple when using the elements subsequently.
currentNode.elements.forEach {
doubleState.pushToDeclarationsState(it, PowersetLattice(identitySetOf(it)))
}
} else {
// We also wrote something to this variable declaration here.
doubleState.pushToDeclarationsState(
currentNode,
PowersetLattice(identitySetOf(currentNode))
)
}
} else if (currentNode is MemberExpression && currentNode.access == AccessValues.WRITE) {
// already set in DFG pass, because otherwise we cannot set the field property
// state.push(currentNode.base, PowersetLattice(identitySetOf(currentNode)))
writtenDeclaration = (currentNode.base as? Reference)?.refersTo
if (writtenDeclaration != null) {
// we also want to set the last write to our base here.
doubleState.declarationsState[writtenDeclaration] =
PowersetLattice(identitySetOf(currentNode.base))
// Update the state identifier of this node, so that the data flows to later member
// expressions accessing the same object/field combination.
doubleState.declarationsState[currentNode.objectIdentifier()] =
PowersetLattice(identitySetOf(currentNode))
}
} else if (currentNode is MemberExpression && currentNode.access == AccessValues.READ) {
writtenDeclaration = (currentNode.base as? Reference)?.refersTo
val fieldDeclaration = currentNode.refersTo
if (writtenDeclaration != null && fieldDeclaration != null) {
// We do an ugly hack here: We store a (unique) hash out of field declaration and
// the variable declaration in the declaration state so that we can retrieve it
// later for READ accesses.
val declState = doubleState.declarationsState[currentNode.objectIdentifier()]
if (declState != null) {
state.push(currentNode, declState)
} else {
// If we do not have a stored state of our object+field, we can use the field
// declaration. This will help us follow a data flow from field initializers (if
// they exist in the language)
state.push(currentNode, PowersetLattice(identitySetOf(fieldDeclaration)))
}
}
} else if (
currentNode is MemberExpression && currentNode.access == AccessValues.READWRITE
) {
writtenDeclaration = (currentNode.base as? Reference)?.refersTo
val fieldDeclaration = currentNode.refersTo
if (writtenDeclaration != null && fieldDeclaration != null) {
// We do an ugly hack here: We store a (unique) hash out of field declaration and
// the variable declaration in the declaration state so that we can retrieve it
// later for READ accesses.
val declState = doubleState.declarationsState[currentNode.objectIdentifier()]
if (declState != null) {
state.push(currentNode, declState)
} else {
// If we do not have a stored state of our object+field, we can use the field
// declaration. This will help us follow a data flow from field initializers (if
// they exist in the language)
state.push(currentNode, PowersetLattice(identitySetOf(fieldDeclaration)))
}
}
if (writtenDeclaration != null) {
// we also want to set the last write to our base here.
doubleState.declarationsState[writtenDeclaration] =
PowersetLattice(identitySetOf(currentNode.base))
// Update the state identifier of this node, so that the data flows to later member
// expressions accessing the same object/field combination.
doubleState.declarationsState[currentNode.objectIdentifier()] =
PowersetLattice(identitySetOf(currentNode))
}
} else if (isSimpleAssignment(currentNode)) {
// It's an assignment which can have one or multiple things on the lhs and on the
// rhs. The lhs could be a declaration or a reference (or multiple of these things).
// The rhs can be anything. The rhs flows to the respective lhs. To identify the
// correct mapping, we use the "assignments" property which already searches for us.
currentNode.assignments.forEach { assignment ->
// This was the last write to the respective declaration.
(assignment.target as? Declaration ?: (assignment.target as? Reference)?.refersTo)
?.let {
doubleState.declarationsState[it] =
PowersetLattice(identitySetOf(assignment.target as Node))
}
}
} else if (isIncOrDec(currentNode)) {
// Increment or decrement => Add the prevWrite of the input to the input. After the
// operation, the prevWrite of the input's variable is this node.
val input = (currentNode as UnaryOperator).input as Reference
// We write to the variable in the input
writtenDeclaration = input.refersTo
if (writtenDeclaration != null) {
state.push(input, doubleState.declarationsState[writtenDeclaration])
doubleState.declarationsState[writtenDeclaration] =
PowersetLattice(identitySetOf(input))
}
} else if (isCompoundAssignment(currentNode)) {
// We write to the lhs, but it also serves as an input => We first get all previous
// writes to the lhs and then add the flow from lhs and rhs to the current node.
// The write operation goes to the variable in the lhs
val lhs = currentNode.lhs.singleOrNull()
writtenDeclaration = (lhs as? Reference)?.refersTo
if (writtenDeclaration != null && lhs != null) {
// Data flows from the last writes to the lhs variable to this node
state.push(lhs, doubleState.declarationsState[writtenDeclaration])
// The whole current node is the place of the last update, not (only) the lhs!
doubleState.declarationsState[writtenDeclaration] =
PowersetLattice(identitySetOf(lhs))
}
} else if (
(currentNode as? Reference)?.access == AccessValues.READ &&
(currentNode.refersTo is VariableDeclaration ||
currentNode.refersTo is ParameterDeclaration) &&
currentNode.refersTo !is FieldDeclaration
) {
// We can only find a change if there's a state for the variable
doubleState.declarationsState[currentNode.refersTo]?.let {
// We only read the variable => Get previous write which have been collected in
// the other steps
state.push(currentNode, it)
}
} else if (
(currentNode as? Reference)?.access == AccessValues.READWRITE &&
(currentNode.refersTo is VariableDeclaration ||
currentNode.refersTo is ParameterDeclaration) &&
currentNode.refersTo !is FieldDeclaration
) {
// We can only find a change if there's a state for the variable
doubleState.declarationsState[currentNode.refersTo]?.let {
// We only read the variable => Get previous write which have been collected in
// the other steps
state.push(currentNode, it)
}
} else if (currentNode is ForEachStatement && currentNode.variable != null) {
// The VariableDeclaration in the ForEachStatement doesn't have an initializer, so
// the "normal" case won't work. We handle this case separately here...
// This is what we write to the declaration
val iterable = currentNode.iterable as? Expression
val writtenTo =
when (val variable = currentNode.variable) {
is DeclarationStatement -> {
if (variable.isSingleDeclaration()) {
variable.singleDeclaration
} else if (variable.variables.size == 2) {
// If there are two variables, we just blindly assume that the order is
// (key, value), so we return the second one
variable.declarations[1]
} else {
null
}
}
else -> currentNode.variable
}
// We wrote something to this variable declaration
writtenDeclaration =
when (writtenTo) {
is Declaration -> writtenTo
is Reference -> writtenTo.refersTo
else -> {
log.error(
"The variable of type ${writtenTo?.javaClass} is not yet supported in the foreach loop"
)
null
}
}
if (writtenTo is Reference) {
// This is a special case: We add the nextEOGEdge which goes out of the loop but
// with the old previousWrites map.
val nodesOutsideTheLoop =
currentNode.nextEOGEdges.filter {
it.getProperty(Properties.UNREACHABLE) != true &&
it.end != currentNode.statement &&
it.end !in currentNode.statement.allChildren()
}
nodesOutsideTheLoop.forEach { worklist.push(it, state.duplicate()) }
}
iterable?.let {
writtenTo?.let {
state.push(writtenTo, PowersetLattice(identitySetOf(iterable)))
// Add the variable declaration (or the reference) to the list of previous
// write nodes in this path
state.declarationsState[writtenDeclaration] =
PowersetLattice(identitySetOf(writtenTo))
}
}
} else if (currentNode is FunctionDeclaration) {
// We have to add the parameters
currentNode.parameters.forEach {
doubleState.pushToDeclarationsState(it, PowersetLattice(identitySetOf(it)))
}
} else if (currentNode is ReturnStatement) {
doubleState.returnStatements.push(
currentNode,
PowersetLattice(identitySetOf(currentNode))
)
} else {
doubleState.declarationsState.push(
currentNode,
doubleState.declarationsState[currentEdge.start]
)
}
return state
}
/**
* Checks if the node performs an operation and an assignment at the same time e.g. with the
* operators +=, -=, *=, ...
*/
protected fun isCompoundAssignment(currentNode: Node): Boolean {
contract { returns(true) implies (currentNode is AssignExpression) }
return currentNode is AssignExpression &&
currentNode.operatorCode in
(currentNode.language?.compoundAssignmentOperators ?: setOf()) &&
(currentNode.lhs.singleOrNull() as? Reference)?.refersTo != null
}
protected fun isSimpleAssignment(currentNode: Node): Boolean {
contract { returns(true) implies (currentNode is AssignExpression) }
return currentNode is AssignExpression && currentNode.operatorCode == "="
}
/** Checks if the node is an increment or decrement operator (e.g. i++, i--, ++i, --i) */
protected fun isIncOrDec(currentNode: Node) =
currentNode is UnaryOperator &&
(currentNode.operatorCode == "++" || currentNode.operatorCode == "--") &&
(currentNode.input as? Reference)?.refersTo != null
/**
* Removes the DFG edges for a potential implicit return statement if it is not in
* [reachableReturnStatements].
*/
protected fun removeUnreachableImplicitReturnStatement(
node: Node,
reachableReturnStatements: Collection
) {
val lastStatement =
((node as? FunctionDeclaration)?.body as? Block)?.statements?.lastOrNull()
if (
lastStatement is ReturnStatement &&
lastStatement.isImplicit &&
lastStatement !in reachableReturnStatements
)
lastStatement.removeNextDFG(node)
}
/**
* A state which actually holds a state for all nodes, one only for declarations and one for
* ReturnStatements.
*/
protected class DFGPassState(
/**
* A mapping of a [Node] to its [LatticeElement]. The keys of this state will later get the
* DFG edges from the value!
*/
var generalState: State = State(),
/**
* It's main purpose is to store the most recent mapping of a [Declaration] to its
* [LatticeElement]. However, it is also used to figure out if we have to continue with the
* iteration (something in the declarationState has changed) which is why we store all nodes
* here. However, since we never use them except from determining if we changed something,
* it won't affect the result.
*/
var declarationsState: State = State(),
/** The [returnStatements] which are reachable. */
var returnStatements: State = State()
) : State() {
override fun duplicate(): DFGPassState {
return DFGPassState(generalState.duplicate(), declarationsState.duplicate())
}
override fun get(key: de.fraunhofer.aisec.cpg.graph.Node): LatticeElement? {
return generalState[key] ?: declarationsState[key]
}
override fun lub(
other: State
): Pair, Boolean> {
return if (other is DFGPassState) {
val (_, generalUpdate) = generalState.lub(other.generalState)
val (_, declUpdate) = declarationsState.lub(other.declarationsState)
Pair(this, generalUpdate || declUpdate)
} else {
val (_, generalUpdate) = generalState.lub(other)
Pair(this, generalUpdate)
}
}
override fun needsUpdate(other: State): Boolean {
return if (other is DFGPassState) {
generalState.needsUpdate(other.generalState) ||
declarationsState.needsUpdate(other.declarationsState)
} else {
generalState.needsUpdate(other)
}
}
override fun push(
newNode: de.fraunhofer.aisec.cpg.graph.Node,
newLatticeElement: LatticeElement?
): Boolean {
return generalState.push(newNode, newLatticeElement)
}
/** Pushes the [newNode] and its [newLatticeElement] to the [declarationsState]. */
fun pushToDeclarationsState(
newNode: Declaration,
newLatticeElement: LatticeElement?
): Boolean {
return declarationsState.push(newNode, newLatticeElement)
}
}
}
/**
* The "object identifier" of a node can be used to differentiate different "objects" that a node
* (most likely a [Reference]) refers to.
*
* In the most basic use-case the [objectIdentifier] of a simple variable reference is the hash-code
* of its [VariableDeclaration]. Consider the following code:
* ```c
* int a = 1;
* printf(a);
* ```
*
* In this case, the "object identifier" of the [Reference] `a` in the second line is the hash-code
* of the [VariableDeclaration] `a` in the first line.
*
* However, we also need to differentiate between different objects that are used as fields as well
* as different instances of the fields. Consider the second example:
* ```c
* struct myStruct {
* int field;
* };
*
* struct myStruct a;
* a.field = 1;
*
* struct myStruct b;
* b.field = 2;
* ```
*
* In this case, the [objectIdentifier] of the [MemberExpression] `a` is a combination of the
* hash-code of the [VariableDeclaration] `a` as well as the [FieldDeclaration] of `field`. The same
* applies for `b`. If we would only rely on the [VariableDeclaration], we would not be sensitive to
* fields, if we would only rely on the [FieldDeclaration], we would not be sensitive to different
* object instances. Therefore, we consider both.
*
* Please note however, that this current, very basic implementation does not consider perform any
* kind of pointer or alias analysis. This means that even though the "contents" of two variables
* that are the same (for example, because one is assigned into the other), they will be considered
* different "objects".
*/
private fun Node.objectIdentifier(): Int? {
return when (this) {
is MemberExpression -> this.objectIdentifier()
is Reference -> this.objectIdentifier()
is Declaration -> this.hashCode()
else -> null
}
}
/** Implements [Node.objectIdentifier] for a [MemberExpression]. */
private fun MemberExpression.objectIdentifier(): Int? {
val ref = this.refersTo
return if (ref == null) {
null
} else {
val baseIdentifier = base.objectIdentifier()
if (baseIdentifier != null) {
ref.hashCode() + baseIdentifier
} else {
null
}
}
}
/** Implements [Node.objectIdentifier] for a [Reference]. */
private fun Reference.objectIdentifier(): Int? {
return this.refersTo?.hashCode()
}