de.fraunhofer.aisec.cpg.passes.ControlFlowSensitiveDFGPass.kt Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cpg-core Show documentation
A simple library to extract a code property graph out of source code. It has support for multiple passes that can extend the analysis after the graph is constructed.
There is a newer version: 8.3.0
Show newest version
/*
 * Copyright (c) 2022, Fraunhofer AISEC. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *                    $$$$$$\  $$$$$$$\   $$$$$$\
 *                   $$  __$$\ $$  __$$\ $$  __$$\
 *                   $$ /  \__|$$ |  $$ |$$ /  \__|
 *                   $$ |      $$$$$$$  |$$ |$$$$\
 *                   $$ |      $$  ____/ $$ |\_$$ |
 *                   $$ |  $$\ $$ |      $$ |  $$ |
 *                   \$$$$$   |$$ |      \$$$$$   |
 *                    \______/ \__|       \______/
 *
 */
package de.fraunhofer.aisec.cpg.passes

import de.fraunhofer.aisec.cpg.TranslationContext
import de.fraunhofer.aisec.cpg.graph.AccessValues
import de.fraunhofer.aisec.cpg.graph.Node
import de.fraunhofer.aisec.cpg.graph.allChildren
import de.fraunhofer.aisec.cpg.graph.declarations.*
import de.fraunhofer.aisec.cpg.graph.edge.Properties
import de.fraunhofer.aisec.cpg.graph.edge.PropertyEdge
import de.fraunhofer.aisec.cpg.graph.statements.DeclarationStatement
import de.fraunhofer.aisec.cpg.graph.statements.ForEachStatement
import de.fraunhofer.aisec.cpg.graph.statements.ReturnStatement
import de.fraunhofer.aisec.cpg.graph.statements.Statement
import de.fraunhofer.aisec.cpg.graph.statements.expressions.*
import de.fraunhofer.aisec.cpg.graph.variables
import de.fraunhofer.aisec.cpg.helpers.*
import de.fraunhofer.aisec.cpg.passes.order.DependsOn
import kotlin.collections.component1
import kotlin.collections.component2
import kotlin.collections.set
import kotlin.contracts.ExperimentalContracts
import kotlin.contracts.contract

/**
 * This pass determines the data flows of References which refer to a VariableDeclaration (not a
 * field) while considering the control flow of a function. After this path, only such data flows
 * are left which can occur when following the control flow (in terms of the EOG) of the program.
 */
@OptIn(ExperimentalContracts::class)
@DependsOn(EvaluationOrderGraphPass::class)
@DependsOn(DFGPass::class)
open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass(ctx) {

    class Configuration(
        /**
         * This specifies the maximum complexity (as calculated per
         * [Statement.cyclomaticComplexity]) a [FunctionDeclaration] must have in order to be
         * considered.
         */
        var maxComplexity: Int? = null
    ) : PassConfiguration()

    override fun cleanup() {
        // Nothing to do
    }

    /** We perform the actions for each [FunctionDeclaration]. */
    override fun accept(node: Node) {
        // For now, we only execute this for function declarations, we will support all EOG starters
        // in the future.
        if (node !is FunctionDeclaration) {
            return
        }

        // Skip empty functions
        if (node.body == null) {
            return
        }

        // Calculate the complexity of the function and see, if it exceeds our threshold
        val max = passConfig()?.maxComplexity
        val c = node.body?.cyclomaticComplexity ?: 0
        if (max != null && c > max) {
            log.info(
                "Ignoring function ${node.name} because its complexity (${c}) is greater than the configured maximum (${max})"
            )
            return
        }

        log.debug("Handling {} (complexity: {})", node.name, c)

        clearFlowsOfVariableDeclarations(node)
        val startState = DFGPassState>()

        startState.declarationsState.push(node, PowersetLattice(identitySetOf()))
        val finalState =
            iterateEOG(node.nextEOGEdges, startState, ::transfer) as? DFGPassState ?: return

        removeUnreachableImplicitReturnStatement(
            node,
            finalState.returnStatements.values.flatMap {
                it.elements.filterIsInstance()
            }
        )

        for ((key, value) in finalState.generalState) {
            if (key is TupleDeclaration) {
                // We need a little hack for tuple statements to set the index. We have the
                // outer part (i.e., the tuple) here, but we generate the DFG edges to the
                // elements. We have the indices here, so it's amazing.
                key.elements.forEachIndexed { i, element ->
                    element.addAllPrevDFG(
                        value.elements.filterNot { it is VariableDeclaration && key == it },
                        mutableMapOf(Properties.INDEX to i)
                    )
                }
            } else {
                key.addAllPrevDFG(
                    value.elements.filterNot { it is VariableDeclaration && key == it }
                )
            }
        }
    }

    /**
     * Removes all the incoming and outgoing DFG edges for each variable declaration in the block of
     * code [node].
     */
    protected fun clearFlowsOfVariableDeclarations(node: Node) {
        for (varDecl in node.variables.filter { it !is FieldDeclaration }) {
            varDecl.clearPrevDFG()
            varDecl.clearNextDFG()
        }
    }

    /**
     * Computes the previous write access of [currentEdge].end if it is a [Reference] or
     * [ValueDeclaration] based on the given [state] (which maps all variables to its last write
     * instruction). It also updates the [state] if [currentEdge].end performs a write-operation to
     * a variable.
     *
     * It further determines unnecessary implicit return statement which are added by some frontends
     * even if every path reaching this point already contains a return statement.
     */
    protected open fun transfer(
        currentEdge: PropertyEdge,
        state: State>,
        worklist: Worklist, Node, Set>
    ): State> {
        // We will set this if we write to a variable
        val writtenDeclaration: Declaration?
        val currentNode = currentEdge.end

        val doubleState = state as DFGPassState

        val initializer = (currentNode as? VariableDeclaration)?.initializer
        if (initializer != null) {
            // A variable declaration with an initializer => The initializer flows to the
            // declaration. This also affects tuples. We split it up later.
            state.push(currentNode, PowersetLattice(identitySetOf(initializer)))

            if (currentNode is TupleDeclaration) {
                // For a tuple declaration, we write the elements in this statement. We do not
                // really care about the tuple when using the elements subsequently.
                currentNode.elements.forEach {
                    doubleState.pushToDeclarationsState(it, PowersetLattice(identitySetOf(it)))
                }
            } else {
                // We also wrote something to this variable declaration here.
                doubleState.pushToDeclarationsState(
                    currentNode,
                    PowersetLattice(identitySetOf(currentNode))
                )
            }
        } else if (isSimpleAssignment(currentNode)) {
            // It's an assignment which can have one or multiple things on the lhs and on the
            // rhs. The lhs could be a declaration or a reference (or multiple of these things).
            // The rhs can be anything. The rhs flows to the respective lhs. To identify the
            // correct mapping, we use the "assignments" property which already searches for us.
            currentNode.assignments.forEach { assignment ->
                // This was the last write to the respective declaration.
                (assignment.target as? Declaration ?: (assignment.target as? Reference)?.refersTo)
                    ?.let {
                        doubleState.declarationsState[it] =
                            PowersetLattice(identitySetOf(assignment.target as Node))
                    }
            }
        } else if (isIncOrDec(currentNode)) {
            // Increment or decrement => Add the prevWrite of the input to the input. After the
            // operation, the prevWrite of the input's variable is this node.
            val input = (currentNode as UnaryOperator).input as Reference
            // We write to the variable in the input
            writtenDeclaration = input.refersTo

            if (writtenDeclaration != null) {
                state.push(input, doubleState.declarationsState[writtenDeclaration])
                doubleState.declarationsState[writtenDeclaration] =
                    PowersetLattice(identitySetOf(input))
            }
        } else if (isCompoundAssignment(currentNode)) {
            // We write to the lhs, but it also serves as an input => We first get all previous
            // writes to the lhs and then add the flow from lhs and rhs to the current node.

            // The write operation goes to the variable in the lhs
            val lhs = currentNode.lhs.singleOrNull()
            writtenDeclaration = (lhs as? Reference)?.refersTo

            if (writtenDeclaration != null && lhs != null) {
                // Data flows from the last writes to the lhs variable to this node
                state.push(lhs, doubleState.declarationsState[writtenDeclaration])

                // The whole current node is the place of the last update, not (only) the lhs!
                doubleState.declarationsState[writtenDeclaration] =
                    PowersetLattice(identitySetOf(lhs))
            }
        } else if (
            (currentNode as? Reference)?.access == AccessValues.READ &&
                currentNode.refersTo is VariableDeclaration &&
                currentNode.refersTo !is FieldDeclaration
        ) {
            // We can only find a change if there's a state for the variable
            doubleState.declarationsState[currentNode.refersTo]?.let {
                // We only read the variable => Get previous write which have been collected in
                // the other steps
                state.push(currentNode, it)
            }
        } else if (currentNode is ForEachStatement && currentNode.variable != null) {
            // The VariableDeclaration in the ForEachStatement doesn't have an initializer, so
            // the "normal" case won't work. We handle this case separately here...
            // This is what we write to the declaration
            val iterable = currentNode.iterable as? Expression
            val writtenTo =
                when (val variable = currentNode.variable) {
                    is DeclarationStatement -> {
                        if (variable.isSingleDeclaration()) {
                            variable.singleDeclaration
                        } else if (variable.variables.size == 2) {
                            // If there are two variables, we just blindly assume that the order is
                            // (key, value), so we return the second one
                            variable.declarations[1]
                        } else {
                            null
                        }
                    }
                    else -> currentNode.variable
                }

            // We wrote something to this variable declaration
            writtenDeclaration =
                when (writtenTo) {
                    is Declaration -> writtenTo
                    is Reference -> writtenTo.refersTo
                    else -> {
                        log.error(
                            "The variable of type ${writtenTo?.javaClass} is not yet supported in the foreach loop"
                        )
                        null
                    }
                }

            if (writtenTo is Reference) {
                // This is a special case: We add the nextEOGEdge which goes out of the loop but
                // with the old previousWrites map.
                val nodesOutsideTheLoop =
                    currentNode.nextEOGEdges.filter {
                        it.getProperty(Properties.UNREACHABLE) != true &&
                            it.end != currentNode.statement &&
                            it.end !in currentNode.statement.allChildren()
                    }
                nodesOutsideTheLoop.forEach { worklist.push(it, state.duplicate()) }
            }

            iterable?.let {
                writtenTo?.let {
                    state.push(writtenTo, PowersetLattice(identitySetOf(iterable)))
                    // Add the variable declaration (or the reference) to the list of previous
                    // write nodes in this path
                    state.declarationsState[writtenDeclaration] =
                        PowersetLattice(identitySetOf(writtenTo))
                }
            }
        } else if (currentNode is FunctionDeclaration) {
            // We have to add the parameters
            currentNode.parameters.forEach {
                doubleState.pushToDeclarationsState(it, PowersetLattice(identitySetOf(it)))
            }
        } else if (currentNode is ReturnStatement) {
            doubleState.returnStatements.push(
                currentNode,
                PowersetLattice(identitySetOf(currentNode))
            )
        } else {
            doubleState.declarationsState.push(
                currentNode,
                doubleState.declarationsState[currentEdge.start]
            )
        }
        return state
    }

    /**
     * Checks if the node performs an operation and an assignment at the same time e.g. with the
     * operators +=, -=, *=, ...
     */
    protected fun isCompoundAssignment(currentNode: Node): Boolean {
        contract { returns(true) implies (currentNode is AssignExpression) }
        return currentNode is AssignExpression &&
            currentNode.operatorCode in
                (currentNode.language?.compoundAssignmentOperators ?: setOf()) &&
            (currentNode.lhs.singleOrNull() as? Reference)?.refersTo != null
    }

    protected fun isSimpleAssignment(currentNode: Node): Boolean {
        contract { returns(true) implies (currentNode is AssignExpression) }
        return currentNode is AssignExpression && currentNode.operatorCode == "="
    }

    /** Checks if the node is an increment or decrement operator (e.g. i++, i--, ++i, --i) */
    protected fun isIncOrDec(currentNode: Node) =
        currentNode is UnaryOperator &&
            (currentNode.operatorCode == "++" || currentNode.operatorCode == "--") &&
            (currentNode.input as? Reference)?.refersTo != null

    /**
     * Removes the DFG edges for a potential implicit return statement if it is not in
     * [reachableReturnStatements].
     */
    protected fun removeUnreachableImplicitReturnStatement(
        node: Node,
        reachableReturnStatements: Collection
    ) {
        val lastStatement =
            ((node as? FunctionDeclaration)?.body as? Block)?.statements?.lastOrNull()
        if (
            lastStatement is ReturnStatement &&
                lastStatement.isImplicit &&
                lastStatement !in reachableReturnStatements
        )
            lastStatement.removeNextDFG(node)
    }

    /**
     * A state which actually holds a state for all nodes, one only for declarations and one for
     * ReturnStatements.
     */
    protected class DFGPassState(
        /**
         * A mapping of a [Node] to its [LatticeElement]. The keys of this state will later get the
         * DFG edges from the value!
         */
        var generalState: State = State(),
        /**
         * It's main purpose is to store the most recent mapping of a [Declaration] to its
         * [LatticeElement]. However, it is also used to figure out if we have to continue with the
         * iteration (something in the declarationState has changed) which is why we store all nodes
         * here. However, since we never use them except from determining if we changed something,
         * it won't affect the result.
         */
        var declarationsState: State = State(),
        /** The [returnStatements] which are reachable. */
        var returnStatements: State = State()
    ) : State() {
        override fun duplicate(): DFGPassState {
            return DFGPassState(generalState.duplicate(), declarationsState.duplicate())
        }

        override fun get(key: Node?): LatticeElement? {
            return generalState[key] ?: declarationsState[key]
        }

        override fun lub(other: State): Pair, Boolean> {
            return if (other is DFGPassState) {
                val (_, generalUpdate) = generalState.lub(other.generalState)
                val (_, declUpdate) = declarationsState.lub(other.declarationsState)
                Pair(this, generalUpdate || declUpdate)
            } else {
                val (_, generalUpdate) = generalState.lub(other)
                Pair(this, generalUpdate)
            }
        }

        override fun needsUpdate(other: State): Boolean {
            return if (other is DFGPassState) {
                generalState.needsUpdate(other.generalState) ||
                    declarationsState.needsUpdate(other.declarationsState)
            } else {
                generalState.needsUpdate(other)
            }
        }

        override fun push(newNode: Node, newLatticeElement: LatticeElement?): Boolean {
            return generalState.push(newNode, newLatticeElement)
        }

        /** Pushes the [newNode] and its [newLatticeElement] to the [declarationsState]. */
        fun pushToDeclarationsState(
            newNode: Declaration,
            newLatticeElement: LatticeElement?
        ): Boolean {
            return declarationsState.push(newNode, newLatticeElement)
        }
    }
}