All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.fraunhofer.aisec.cpg.helpers.SubgraphWalker.kt Maven / Gradle / Ivy

Go to download

A simple library to extract a code property graph out of source code. It has support for multiple passes that can extend the analysis after the graph is constructed.

There is a newer version: 8.3.0
Show newest version
/*
 * Copyright (c) 2019, Fraunhofer AISEC. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *                    $$$$$$\  $$$$$$$\   $$$$$$\
 *                   $$  __$$\ $$  __$$\ $$  __$$\
 *                   $$ /  \__|$$ |  $$ |$$ /  \__|
 *                   $$ |      $$$$$$$  |$$ |$$$$\
 *                   $$ |      $$  ____/ $$ |\_$$ |
 *                   $$ |  $$\ $$ |      $$ |  $$ |
 *                   \$$$$$   |$$ |      \$$$$$   |
 *                    \______/ \__|       \______/
 *
 */
package de.fraunhofer.aisec.cpg.helpers

import de.fraunhofer.aisec.cpg.ScopeManager
import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend
import de.fraunhofer.aisec.cpg.graph.AST
import de.fraunhofer.aisec.cpg.graph.Node
import de.fraunhofer.aisec.cpg.graph.declarations.RecordDeclaration
import de.fraunhofer.aisec.cpg.graph.edge.PropertyEdge
import de.fraunhofer.aisec.cpg.graph.edge.PropertyEdge.Companion.checkForPropertyEdge
import de.fraunhofer.aisec.cpg.graph.edge.PropertyEdge.Companion.unwrap
import de.fraunhofer.aisec.cpg.processing.strategy.Strategy
import java.lang.annotation.AnnotationFormatError
import java.lang.reflect.Field
import java.util.*
import java.util.function.BiConsumer
import java.util.function.Consumer
import org.neo4j.ogm.annotation.Relationship
import org.slf4j.LoggerFactory

/** Helper class for graph walking: Walking through ast-, cfg-, ...- edges */
object SubgraphWalker {
    private val LOGGER = LoggerFactory.getLogger(SubgraphWalker::class.java)
    private val fieldCache = HashMap>()

    /**
     * Returns all the fields for a specific class type. Because this information is static during
     * runtime, we do cache this information in [fieldCache] for performance reasons.
     *
     * @param classType the class type
     * @return its fields, including the ones from its superclass
     */
    private fun getAllFields(classType: Class<*>): Collection {
        if (classType.superclass != null) {
            val cacheKey = classType.name

            // Note: we cannot use computeIfAbsent here, because we are calling our function
            // recursively and this would result in a ConcurrentModificationException
            if (fieldCache.containsKey(cacheKey)) {
                return fieldCache[cacheKey] ?: ArrayList()
            }
            val fields = ArrayList()
            fields.addAll(getAllFields(classType.superclass))
            fields.addAll(listOf(*classType.declaredFields))

            // update the cache
            fieldCache[cacheKey] = fields
            return fields
        }
        return ArrayList()
    }

    /**
     * Retrieves a list of AST children of the specified node by iterating all fields that are
     * annotated with the [AST] annotation.
     *
     * Please note, that you SHOULD NOT call this directly in a recursive function, since the AST
     * might have loops and you will probably run into a [StackOverflowError]. Therefore, use of
     * [Node.accept] with the [Strategy.AST_FORWARD] is encouraged.
     *
     * @param node the start node
     * @return a list of children from the node's AST
     */
    @JvmStatic
    fun getAstChildren(node: Node?): List {
        val children = ArrayList()
        if (node == null) return children
        val classType: Class<*> = node.javaClass

        /*for (member in node::class.members) {
            val subGraph = member.findAnnotation()
            if (subGraph != null && listOf(*subGraph.value).contains("AST")) {
                val old = member.isAccessible

                member.isAccessible = true

                val obj = member.call(node)

                // skip, if null
                if (obj == null) {
                    continue
                }

                member.isAccessible = old

                var outgoing = true // default
                var relationship = member.findAnnotation()
                if (relationship != null) {
                    outgoing =
                        relationship.direction ==
                                Relationship.Direction.OUTGOING)
                }
                if (checkForPropertyEdge(field, obj)) {
                    obj = unwrap(obj as List>, outgoing)
                }
                when (obj) {
                    is Node -> {
                        children.add(obj)
                    }
                    is Collection<*> -> {
                        children.addAll(obj as Collection)
                    }
                    else -> {
                        throw AnnotationFormatError(
                            "Found @field:SubGraph(\"AST\") on field of type " +
                                    obj.javaClass +
                                    " but can only used with node graph classes or collections of graph nodes"
                        )
                    }
                }
            }
        }*/

        // We currently need to stick to pure Java reflection, since Kotlin reflection
        // is EXTREMELY slow. See https://youtrack.jetbrains.com/issue/KT-32198
        for (field in getAllFields(classType)) {
            field.getAnnotation(AST::class.java) ?: continue
            try {
                // We need to synchronize access to the field, because otherwise different
                // threads might restore the isAccessible property while this thread is still
                // accessing the field
                var obj =
                    synchronized(field) {
                        // disable access mechanisms
                        field.trySetAccessible()
                        val obj = field[node]

                        // restore old state
                        field.isAccessible = false
                        obj
                    } ?: continue

                // skip, if null
                var outgoing = true // default
                if (field.getAnnotation(Relationship::class.java) != null) {
                    outgoing =
                        (field.getAnnotation(Relationship::class.java).direction ==
                            Relationship.Direction.OUTGOING)
                }
                if (checkForPropertyEdge(field, obj) && obj is Collection<*>) {
                    obj = unwrap(obj.filterIsInstance>(), outgoing)
                }
                when (obj) {
                    is Node -> {
                        children.add(obj)
                    }
                    is Collection<*> -> {
                        children.addAll(obj.filterIsInstance())
                    }
                    else -> {
                        throw AnnotationFormatError(
                            "Found @field:SubGraph(\"AST\") on field of type " +
                                obj.javaClass +
                                " but can only used with node graph classes or collections of graph nodes"
                        )
                    }
                }
            } catch (ex: IllegalAccessException) {
                LOGGER.error("Error while retrieving AST children: {}", ex.message)
            }
        }
        return children
    }

    /**
     * Flattens the tree, starting at Node n into a list.
     *
     * @param n the node which contains the ast children to flatten
     * @return the flattened nodes
     */
    fun flattenAST(n: Node?): List {
        if (n == null) {
            return ArrayList()
        }

        // We are using an identity set here, to avoid placing the *same* node in the identitySet
        // twice,
        // possibly resulting in loops
        val identitySet = IdentitySet()
        flattenASTInternal(identitySet, n)
        return identitySet.toSortedList()
    }

    private fun flattenASTInternal(identitySet: MutableSet, n: Node) {
        // Add the node itself and abort if its already there, to detect possible loops
        if (!identitySet.add(n)) {
            return
        }
        for (child in getAstChildren(n)) {
            flattenASTInternal(identitySet, child)
        }
    }

    /**
     * Function returns two lists in a list. The first list contains all eog nodes with no
     * predecessor in the subgraph with root 'n'. The second list contains eog edges that have no
     * successor in the subgraph with root 'n'. The first List marks the entry and the second marks
     * the exit nodes of the cfg in this subgraph.
     *
     * @param n - root of the subgraph.
     * @return Two lists, list 1 contains all eog entries and list 2 contains all exits.
     */
    fun getEOGPathEdges(n: Node?): Border {
        val border = Border()
        val flattedASTTree = flattenAST(n)
        val eogNodes =
            flattedASTTree.filter { node: Node ->
                node.prevEOG.isNotEmpty() || node.nextEOG.isNotEmpty()
            }
        // Nodes that are incoming edges, no other node
        border.entries =
            eogNodes
                .filter { node: Node -> node.prevEOG.any { prev -> prev !in eogNodes } }
                .toMutableList()
        border.exits =
            eogNodes
                .filter { node: Node -> node.nextEOG.any { next -> next !in eogNodes } }
                .toMutableList()
        return border
    }

    /**
     * For better readability: `result.entries` instead of `result.get(0)` when working with
     * getEOGPathEdges. Can be used for all subgraphs in subgraphs, e.g. AST entries and exits in a
     * EOG subgraph, EOG entries and exits in a CFG subgraph.
     */
    class Border {
        var entries = mutableListOf()
        var exits = mutableListOf()
    }

    class IterativeGraphWalker {
        private var todo: Deque>? = null
        var backlog: Deque? = null
            private set

        var strategy: (Node) -> Iterator = Strategy::AST_FORWARD

        /**
         * This callback is triggered whenever a new node is visited for the first time. This is the
         * place where usual graph manipulation will happen. The current node is the single argument
         * passed to the function
         */
        private val onNodeVisit: MutableList> = mutableListOf()
        private val onNodeVisit2: MutableList> = mutableListOf()

        private val replacements = mutableMapOf()

        /**
         * The callback that is designed to tell the user when we leave the current scope. The
         * exited node is passed as an argument to the callback function. Consider the following
         * AST:
         *
         * .........(1) parent
         *
         * ........./........\
         *
         * (2) child1....(4) child2
         *
         * ........|
         *
         * (3) subchild
         *
         * Once "parent" has been visited, we continue descending into its children. First into
         * "child1", followed by "subchild". Once we are done there, we return to "child1". At this
         * point, the exit handler notifies the user that "subchild" is being exited. Afterwards we
         * exit "child1", and after "child2" is done, "parent" is exited.
         */
        private val onNodeExit: MutableList> = ArrayList()

        /**
         * The core iterative AST traversal algorithm: In a depth-first way we descend into the
         * tree, providing callbacks for graph modification.
         *
         * @param root The node where we should start
         */
        fun iterate(root: Node) {
            todo = ArrayDeque()
            backlog = ArrayDeque()
            val seen: MutableSet = LinkedHashSet()
            todo?.push(Pair(root, null))
            while ((todo as ArrayDeque>).isNotEmpty()) {
                var (current, parent) = (todo as ArrayDeque>).pop()
                if (
                    (backlog as ArrayDeque).isNotEmpty() &&
                        (backlog as ArrayDeque).peek() == current
                ) {
                    val exiting = (backlog as ArrayDeque).pop()
                    onNodeExit.forEach(Consumer { c: Consumer -> c.accept(exiting) })
                } else {
                    onNodeVisit.forEach(Consumer { c: Consumer -> c.accept(current) })
                    onNodeVisit2.forEach(
                        Consumer { c: BiConsumer -> c.accept(current, parent) }
                    )

                    // Check if we have a replacement node
                    val toReplace = replacements[current]
                    if (toReplace != null) {
                        current = toReplace
                        replacements.remove(toReplace)
                    }

                    val unseenChildren =
                        strategy(current).asSequence().filter { it !in seen }.toMutableList()

                    // re-place the current node as a marker for the above check to find out when we
                    // need to exit a scope
                    (todo as ArrayDeque>).push(Pair(current, parent))

                    seen.addAll(unseenChildren)
                    unseenChildren.asReversed().forEach { child: Node ->
                        (todo as ArrayDeque>).push(Pair(child, current))
                    }
                    (backlog as ArrayDeque).push(current)
                }
            }
        }

        /**
         * Sometimes during walking the graph, we are replacing the current node. This causes
         * problems, that the walker still assumes the old node. Calling this function will ensure
         * that the walker knows about the new node.
         */
        fun registerReplacement(from: Node, to: Node) {
            replacements[from] = to
        }

        fun registerOnNodeVisit(callback: Consumer) {
            onNodeVisit.add(callback)
        }

        fun registerOnNodeVisit2(callback: BiConsumer) {
            onNodeVisit2.add(callback)
        }

        fun registerOnNodeExit(callback: Consumer) {
            onNodeExit.add(callback)
        }

        fun clearCallbacks() {
            onNodeVisit.clear()
            onNodeExit.clear()
        }

        fun getTodo(): Deque {
            return ArrayDeque(todo?.map { it.first })
        }
    }

    /**
     * This class traverses the graph in a similar way as the [IterativeGraphWalker], but with the
     * added feature, that a [ScopeManager] is populated with the scope information of the current
     * node. This way, we can call functions on the supplied [scopeManager] and emulate that we are
     * currently in the scope of the "consumed" node in the callback. This can be useful for
     * resolving declarations or other scope-related tasks.
     */
    class ScopedWalker {
        lateinit var strategy: (Node) -> Iterator
        private var walker: IterativeGraphWalker? = null
        private val scopeManager: ScopeManager

        constructor(lang: LanguageFrontend<*, *>) {
            scopeManager = lang.scopeManager
        }

        constructor(
            scopeManager: ScopeManager,
            strategy: (Node) -> Iterator = Strategy::AST_FORWARD
        ) {
            this.scopeManager = scopeManager
            this.strategy = strategy
        }

        /**
         * Callback function(s) getting three arguments: the type of the class we're currently in,
         * the root node of the current declaration scope, the currently visited node.
         */
        private val handlers = mutableListOf>()

        fun clearCallbacks() {
            handlers.clear()
        }

        fun registerHandler(handler: TriConsumer) {
            handlers.add(handler)
        }

        fun registerHandler(handler: BiConsumer) {
            handlers.add(
                TriConsumer { currClass: RecordDeclaration?, _: Node?, currNode: Node? ->
                    handler.accept(currNode, currClass)
                }
            )
        }

        fun registerHandler(handler: Consumer) {
            handlers.add(
                TriConsumer { _: RecordDeclaration?, _: Node?, currNode: Node? ->
                    handler.accept(currNode)
                }
            )
        }

        fun registerReplacement(from: Node, to: Node) {
            walker?.registerReplacement(from, to)
        }

        /**
         * Wraps [IterativeGraphWalker] to handle declaration scopes.
         *
         * @param root The node where AST descent is started
         */
        fun iterate(root: Node) {
            val walker = IterativeGraphWalker()
            walker.strategy = this.strategy
            handlers.forEach { h -> walker.registerOnNodeVisit { n -> handleNode(n, h) } }

            this.walker = walker

            walker.iterate(root)
        }

        private fun handleNode(
            current: Node,
            handler: TriConsumer
        ) {
            // Jump to the node's scope, if it is different from ours.
            if (scopeManager.currentScope != current.scope) {
                scopeManager.jumpTo(current.scope)
            }

            val parent = walker?.backlog?.peek()

            handler.accept(scopeManager.currentRecord, parent, current)
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy