All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.fraunhofer.aisec.cpg.frontends.cxx.CXXLanguageFrontend.kt Maven / Gradle / Ivy

There is a newer version: 8.3.0
Show newest version
/*
 * Copyright (c) 2019, Fraunhofer AISEC. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *                    $$$$$$\  $$$$$$$\   $$$$$$\
 *                   $$  __$$\ $$  __$$\ $$  __$$\
 *                   $$ /  \__|$$ |  $$ |$$ /  \__|
 *                   $$ |      $$$$$$$  |$$ |$$$$\
 *                   $$ |      $$  ____/ $$ |\_$$ |
 *                   $$ |  $$\ $$ |      $$ |  $$ |
 *                   \$$$$$   |$$ |      \$$$$$   |
 *                    \______/ \__|       \______/
 *
 */
package de.fraunhofer.aisec.cpg.frontends.cxx

import de.fraunhofer.aisec.cpg.ResolveInFrontend
import de.fraunhofer.aisec.cpg.TranslationContext
import de.fraunhofer.aisec.cpg.frontends.Language
import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend
import de.fraunhofer.aisec.cpg.frontends.TranslationException
import de.fraunhofer.aisec.cpg.graph.*
import de.fraunhofer.aisec.cpg.graph.Annotation
import de.fraunhofer.aisec.cpg.graph.declarations.ConstructorDeclaration
import de.fraunhofer.aisec.cpg.graph.declarations.Declaration
import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration
import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration
import de.fraunhofer.aisec.cpg.graph.statements.expressions.Expression
import de.fraunhofer.aisec.cpg.graph.types.*
import de.fraunhofer.aisec.cpg.graph.types.FunctionType
import de.fraunhofer.aisec.cpg.helpers.Benchmark
import de.fraunhofer.aisec.cpg.passes.FunctionPointerCallResolver
import de.fraunhofer.aisec.cpg.passes.order.RegisterExtraPass
import de.fraunhofer.aisec.cpg.sarif.PhysicalLocation
import de.fraunhofer.aisec.cpg.sarif.Region
import java.io.File
import java.lang.reflect.Field
import java.lang.reflect.Method
import java.nio.file.Path
import org.eclipse.cdt.core.dom.ast.*
import org.eclipse.cdt.core.dom.ast.cpp.ICPPASTReferenceOperator
import org.eclipse.cdt.core.dom.ast.gnu.c.GCCLanguage
import org.eclipse.cdt.core.dom.ast.gnu.cpp.GPPLanguage
import org.eclipse.cdt.core.dom.parser.AbstractCLikeLanguage
import org.eclipse.cdt.core.index.IIndexFileLocation
import org.eclipse.cdt.core.model.ILanguage
import org.eclipse.cdt.core.parser.DefaultLogService
import org.eclipse.cdt.core.parser.FileContent
import org.eclipse.cdt.core.parser.IncludeFileContentProvider
import org.eclipse.cdt.core.parser.ScannerInfo
import org.eclipse.cdt.internal.core.dom.parser.ASTNode
import org.eclipse.cdt.internal.core.dom.parser.ASTTranslationUnit
import org.eclipse.cdt.internal.core.dom.parser.cpp.CPPASTQualifiedName
import org.eclipse.cdt.internal.core.model.ASTStringUtil
import org.eclipse.cdt.internal.core.parser.IMacroDictionary
import org.eclipse.cdt.internal.core.parser.scanner.InternalFileContent
import org.eclipse.cdt.internal.core.parser.scanner.InternalFileContentProvider
import org.eclipse.core.runtime.CoreException
import org.slf4j.LoggerFactory

/**
 * The language frontend for translating C/C++ languages into the graph. It uses Eclipse CDT to
 * parse the actual source code into an AST.
 *
 * Based on the file ending (.c or .cpp) different dialects of Eclipse CDT are used ([GCCLanguage]
 * ad [GPPLanguage]). This enables us (to some degree) to deal with the finer difference between C
 * and C++ code.
 */
@RegisterExtraPass(FunctionPointerCallResolver::class)
class CXXLanguageFrontend(language: Language, ctx: TranslationContext) :
    LanguageFrontend(language, ctx) {

    /**
     * The dialect used by this language frontend, either [GCCLanguage] for C or [GPPLanguage] for
     * C++.
     */
    var dialect: AbstractCLikeLanguage? = null

    /**
     * Implements an [IncludeFileContentProvider] which features an inclusion/exclusion list for
     * header files.
     */
    private val includeFileContentProvider: IncludeFileContentProvider =
        object : InternalFileContentProvider() {
            /**
             * Returns the content of this path, without any cache.
             *
             * @return the content of the path of null if it is to be excluded
             */
            private fun getContentUncached(path: String): InternalFileContent? {
                if (!getInclusionExists(path)) {
                    return null
                }

                // check, if the file is on the blacklist
                if (absoluteOrRelativePathIsInList(Path.of(path), config.includeBlocklist)) {
                    LOGGER.debug("Blacklisting include file: {}", path)
                    return null
                }

                // check, if the white-list exists at all
                if (
                    hasIncludeWhitelist() && // and ignore the file if it is not on the whitelist
                    !absoluteOrRelativePathIsInList(Path.of(path), config.includeWhitelist)
                ) {
                    LOGGER.debug("Include file {} not on the whitelist. Ignoring.", path)
                    return null
                }
                LOGGER.debug("Loading include file {}", path)
                val content = FileContent.createForExternalFileLocation(path)
                return content as? InternalFileContent
            }

            private fun hasIncludeWhitelist(): Boolean {
                return config.includeWhitelist.isNotEmpty()
            }

            /**
             * This utility function checks, if the specified path is in the included list, either
             * as an absolute path or as a path relative to the translation configurations top level
             * or include paths
             *
             * @param path the absolute path to look for
             * @param list the list of paths to look for, either relative or absolute
             * @return true, if the path is in the list, false otherwise
             */
            private fun absoluteOrRelativePathIsInList(path: Path, list: List?): Boolean {
                // Path cannot be in the list if its empty or null
                if (list.isNullOrEmpty()) {
                    return false
                }

                // Check, if the absolute header path is in the list
                if (list.contains(path)) {
                    return true
                }

                // Check for relative path based on the top level and all include paths
                val includeLocations: MutableList = ArrayList()
                val topLevel = config.topLevel
                if (topLevel != null) {
                    includeLocations.add(topLevel.toPath().toAbsolutePath())
                }
                includeLocations.addAll(config.includePaths)

                // We need to find the proper include location for our relative path. Any location
                // is valid, if we can
                // find that the include-location + the path is contained in the list of paths we
                // are looking for.
                return includeLocations.any { include ->
                    // try to resolve path relatively
                    try {
                        val relative = include.relativize(path)
                        return list.contains(relative)
                    } catch (e: IllegalArgumentException) {
                        return false
                    }
                }
            }

            override fun getContentForInclusion(
                path: String,
                macroDictionary: IMacroDictionary
            ): InternalFileContent? {
                return getContentUncached(path)
            }

            override fun getContentForInclusion(
                ifl: IIndexFileLocation,
                astPath: String
            ): InternalFileContent? {
                return getContentUncached(astPath)
            }
        }

    val declarationHandler = DeclarationHandler(this)
    val declaratorHandler = DeclaratorHandler(this)
    val expressionHandler = ExpressionHandler(this)
    val initializerHandler = InitializerHandler(this)
    val parameterDeclarationHandler = ParameterDeclarationHandler(this)
    val statementHandler = StatementHandler(this)

    private val comments = HashMap, String>()

    @Throws(TranslationException::class)
    override fun parse(file: File): TranslationUnitDeclaration {
        val content = FileContent.createForExternalFileLocation(file.absolutePath)

        // include paths
        val includePaths: MutableList = ArrayList()
        config.topLevel?.let { includePaths.add(it.toPath().toAbsolutePath().toString()) }

        val symbols: HashMap = HashMap()
        symbols.putAll(config.symbols)
        includePaths.addAll(config.includePaths.map { it.toAbsolutePath().toString() })

        config.compilationDatabase?.getIncludePaths(file)?.let { includePaths.addAll(it) }
        config.compilationDatabase?.getSymbols(file)?.let { symbols.putAll(it) }

        val scannerInfo = ScannerInfo(symbols, includePaths.toTypedArray())
        val log = DefaultLogService()
        val opts = ILanguage.OPTION_PARSE_INACTIVE_CODE // | ILanguage.OPTION_ADD_COMMENTS;
        return try {
            var bench = Benchmark(this.javaClass, "Parsing sourcefile ${file.name}")

            // Set parser language, based on file extension
            this.dialect =
                if (file.extension == "c") {
                    GCCLanguage.getDefault()
                } else {
                    GPPLanguage.getDefault()
                }

            val translationUnit =
                this.dialect?.getASTTranslationUnit(
                    content,
                    scannerInfo,
                    includeFileContentProvider,
                    null,
                    opts,
                    log
                ) as ASTTranslationUnit
            val length = translationUnit.length
            LOGGER.info(
                "Parsed {} bytes in ${file.name} corresponding roughly to {} LoC",
                length,
                length / 50
            )
            bench.stop()
            bench = Benchmark(this.javaClass, "Transforming ${file.name} to CPG")
            if (config.debugParser) {
                explore(translationUnit, 0)
            }

            for (c in translationUnit.comments) {
                if (c.rawSignature.isEmpty()) {
                    continue
                }

                if (c.fileLocation == null) {
                    LOGGER.warn("Found comment with null location in {}", translationUnit.filePath)
                    continue
                }

                comments[Pair(c.fileLocation.fileName, c.fileLocation.startingLineNumber)] =
                    c.rawSignature
            }

            val translationUnitDeclaration =
                declarationHandler.handleTranslationUnit(translationUnit)
            bench.stop()
            translationUnitDeclaration
        } catch (ex: CoreException) {
            throw TranslationException(ex)
        }
    }

    override fun  getCodeFromRawNode(astNode: T): String? {
        if (astNode is ASTNode) {
            val node = astNode as ASTNode
            return node.rawSignature
        }

        return null
    }

    override fun  getLocationFromRawNode(astNode: T): PhysicalLocation? {
        if (astNode !is ASTNode) {
            return null
        }
        val node = astNode as ASTNode
        val fLocation = node.fileLocation ?: return null
        val lineBreaks: IntArray =
            try {
                val fLoc = getField(fLocation.javaClass, "fLocationCtx")
                fLoc.trySetAccessible()
                val locCtx = fLoc[fLocation]
                val fLineOffsets = getField(locCtx.javaClass, "fLineOffsets")
                val getLineNumber = getMethod(locCtx.javaClass, "getLineNumber", Int::class.java)
                fLineOffsets.trySetAccessible()

                // force to cache line numbers, this calls computeLineOffsets internally
                getLineNumber.trySetAccessible()
                getLineNumber.invoke(locCtx, 0)

                fLineOffsets[locCtx] as IntArray
            } catch (e: ReflectiveOperationException) {
                LOGGER.warn(
                    "Reflective retrieval of AST node source failed. Falling back to getRawSignature()"
                )
                IntArray(0)
            } catch (e: ClassCastException) {
                LOGGER.warn(
                    "Reflective retrieval of AST node source failed. Falling back to getRawSignature()"
                )
                IntArray(0)
            } catch (e: NullPointerException) {
                LOGGER.warn(
                    "Reflective retrieval of AST node source failed. Cannot reliably determine content of the file that contains the node"
                )
                return null
            }

        // our start line, indexed by 0
        val startLine = node.fileLocation.startingLineNumber - 1

        // our end line, indexed by 0
        val endLine = node.fileLocation.endingLineNumber - 1

        // our start column, index by 0
        val startColumn =
            if (startLine == 0) {
                // if we are in the first line, the start column is just the node offset
                node.fileLocation.nodeOffset
            } else {
                // otherwise, we need to calculate the difference to the previous line break
                node.fileLocation.nodeOffset -
                    lineBreaks[startLine - 1] -
                    1 // additional -1 because of the '\n' itself
            }

        // our end column, index by 0
        val endColumn =
            if (endLine == 0) {
                // if we are in the first line, the end column is just the node offset
                node.fileLocation.nodeOffset + node.fileLocation.nodeLength
            } else {
                // otherwise, we need to calculate the difference to the previous line break
                (node.fileLocation.nodeOffset + node.fileLocation.nodeLength) -
                    lineBreaks[endLine - 1] -
                    1 // additional -1 because of the '\n' itself
            }

        // for a SARIF compliant format, we need to add +1, since its index begins at 1 and
        // not 0
        val region = Region(startLine + 1, startColumn + 1, endLine + 1, endColumn + 1)
        return PhysicalLocation(Path.of(node.containingFilename).toUri(), region)
    }

    /**
     * Processes C++ [attributes](https://en.cppreference.com/w/cpp/language/attributes) into
     * [Annotation] nodes.
     *
     * @param node the node to process
     * @param owner the AST node which holds the attribute
     */
    fun processAttributes(node: Node, owner: IASTNode) {
        if (config.processAnnotations && owner is IASTAttributeOwner) { // set attributes
            node.addAnnotations(handleAttributes(owner))
        }
    }

    private fun handleAttributes(owner: IASTAttributeOwner): List {
        val list: MutableList = ArrayList()
        for (attribute in owner.attributes) {
            val annotation = newAnnotation(String(attribute.name), attribute.rawSignature)

            // go over the parameters
            if (attribute.argumentClause is IASTTokenList) {
                val members = handleTokenList(attribute.argumentClause as IASTTokenList)
                annotation.members = members
            }
            list.add(annotation)
        }
        return list
    }

    private fun handleTokenList(tokenList: IASTTokenList): MutableList {
        val list: MutableList = ArrayList()
        for (token in tokenList.tokens) {
            // skip commas and such
            if (token.tokenType == 6) {
                continue
            }
            list.add(handleToken(token))
        }
        return list
    }

    private fun handleToken(token: IASTToken): AnnotationMember {
        val code = String(token.tokenCharImage)
        val expression: Expression =
            when (token.tokenType) {
                1 -> // a variable
                newDeclaredReferenceExpression(code, newUnknownType(), code)
                2 -> // an integer
                newLiteral(
                        code.toInt(),
                        (language.getSimpleTypeOf("int") as? ObjectType) ?: parseType("int"),
                        code
                    )
                130 -> // a string
                newLiteral(
                        if (code.length >= 2) code.substring(1, code.length - 1) else "",
                        (language.getSimpleTypeOf("char") as? ObjectType)?.reference()
                            ?: parseType("char"),
                        code
                    )
                else ->
                    newLiteral(
                        code,
                        (language.getSimpleTypeOf("char") as? ObjectType)?.reference()
                            ?: parseType("char"),
                        code
                    )
            }
        return newAnnotationMember("", expression, code)
    }

    @Throws(NoSuchFieldException::class)
    private fun getField(type: Class<*>, fieldName: String): Field {
        return try {
            type.getDeclaredField(fieldName)
        } catch (e: NoSuchFieldException) {
            if (type.superclass != null) {
                return getField(type.superclass, fieldName)
            }
            throw e
        }
    }

    @Throws(NoSuchMethodException::class)
    private fun getMethod(
        type: Class<*>,
        methodName: String,
        vararg parameterTypes: Class<*>
    ): Method {
        return try {
            type.getDeclaredMethod(methodName, *parameterTypes)
        } catch (e: NoSuchMethodException) {
            if (type.superclass != null) {
                return getMethod(type.superclass, methodName, *parameterTypes)
            }
            throw e
        }
    }

    override fun  setComment(s: S, ctx: T) {
        if (ctx is ASTNode && s is Node) {
            val cpgNode = s as Node
            val location = cpgNode.location ?: return

            // No location, no comment

            val loc: Pair =
                Pair(location.artifactLocation.uri.path, location.region.endLine)
            comments[loc]?.let {
                // only exact match for now}
                cpgNode.comment = it
            }
            // TODO: handle orphanComments? i.e. comments which do not correspond to one line
            // todo: what to do with comments which are in a line which contains multiple
            // statements?
        }
    }

    /** Returns the [Type] that is represented by an [IASTTypeId]. */
    fun typeOf(id: IASTTypeId): Type {
        return typeOf(id.abstractDeclarator, id.declSpecifier)
    }

    /**
     * Returns te [Type] that is represented by the [declarator] and [specifier]. This tries to
     * resolve as much information about the type on its own using by analyzing the AST of the
     * supplied declarator and specifier. Finally, [TypeParser.createFrom] is invoked on the
     * innermost type, but all other type adjustments, such as creating a [PointerType] is done
     * within this method.
     *
     * Optionally, a [hint] in the form of an existing [Declaration] can be provided. The idea
     * behind this, is that in some scenarios we create the [Declaration] before the type and in
     * some, we derive the declaration from the type. In the first one, we might get some necessary
     * information from the declaration, that influences the type parsing. One such example is that
     * we check, whether a declaration is a [ConstructorDeclaration] and return an [ObjectType] that
     * corresponds with the record name it instantiates.
     *
     * @param hint an optional [Declaration], which serves as a parsing hint.
     */
    @ResolveInFrontend("getRecordForName")
    fun typeOf(
        declarator: IASTDeclarator,
        specifier: IASTDeclSpecifier,
        hint: Declaration? = null
    ): Type {
        // Retrieve the "name" of this type, including qualifiers.
        // TODO: In the future, we should parse the qualifiers, such as const here, instead of in
        //  the TypeParser
        val name = ASTStringUtil.getSignatureString(specifier, null)

        var type =
            when (specifier) {
                is IASTSimpleDeclSpecifier -> {
                    if (hint is ConstructorDeclaration && hint.name.parent != null) {
                        parseType(hint.name.parent!!)
                    } else {
                        // A primitive type
                        parseType(name)
                    }
                }
                is IASTNamedTypeSpecifier -> {
                    // A reference to an object type. We need to differentiate between two cases:
                    // a) the type name is already an FQN. In this case, we can just parse it as
                    //    such.
                    // b) the type is a local name. In this case, we can peek whether the local name
                    //    refers to a symbol in our current namespace. This means that we are doing
                    //    some resolving in the frontend, which we actually want to avoid since it
                    //    has limited view.
                    //
                    // Note: we cannot use parseType here, because of typedefs (and templates?) the
                    // TypeParser still needs to have access directly to the language frontend
                    // (meh!)
                    if (specifier.name is CPPASTQualifiedName) {
                        // Case a: FQN
                        TypeParser.createFrom(name, true, this)
                    } else {
                        // Case b: Peek into our symbols. This is most likely limited to our current
                        // translation unit
                        val decl =
                            scopeManager.currentScope?.let {
                                scopeManager.getRecordForName(it, Name(name))
                            }

                        // We found a symbol, so we can use its name
                        if (decl != null) {
                            TypeParser.createFrom(decl.name.toString(), true, this)
                        } else {
                            // Otherwise, we keep it as a local name and hope for the best
                            TypeParser.createFrom(name, true, this)
                        }
                    }
                }
                is IASTCompositeTypeSpecifier -> {
                    // A class. This actually also declares the class. At the moment, we handle this
                    // in handleSimpleDeclaration, but we might want to move it here
                    TypeParser.createFrom(name, true, this)
                }
                is IASTElaboratedTypeSpecifier -> {
                    // A class or struct
                    TypeParser.createFrom(name, true, this)
                }
                else -> {
                    newUnknownType()
                }
            }

        type = typeManager.registerType(type)
        type = this.adjustType(declarator, type)

        return type
    }

    /**
     * This is a little helper function, primarily used by [typeOf]. It's primary purpose is to
     * "adjust" the [incoming] type based on the [declarator]. This is needed because the type
     * information in C/C++ are split into a declarator and declaration specifiers.
     */
    private fun adjustType(declarator: IASTDeclarator, incoming: Type): Type {
        var type = incoming

        // First, look at the declarator's pointer operator, to see whether, we need to wrap the
        // type into a pointer or similar
        for (op in declarator.pointerOperators) {
            type =
                when (op) {
                    is IASTPointer -> {
                        type.reference(PointerType.PointerOrigin.POINTER)
                    }
                    is ICPPASTReferenceOperator -> {
                        ReferenceType(type)
                    }
                    else -> {
                        type
                    }
                }
        }

        // Check, if we are an array type
        if (declarator is IASTArrayDeclarator) {
            for (mod in declarator.arrayModifiers) {
                type = type.reference(PointerType.PointerOrigin.ARRAY)
            }
        } else if (declarator is IASTStandardFunctionDeclarator) {
            // Loop through the parameters
            var paramTypes = declarator.parameters.map { typeOf(it.declarator, it.declSpecifier) }

            var i = 0
            // Filter out void
            paramTypes =
                paramTypes.filter {
                    if (it is IncompleteType) {
                        i++
                        return@filter false
                    }

                    return@filter true
                }

            if (i > 1) {
                // TODO: We should actually report this as a "problem" somehow
                LOGGER.error(
                    "Type $type contains more than one void parameter. This is not allowed"
                )
            }

            // We need to construct a function type here. The existing type
            // so far is the return value. We then add the parameters and give it a name.
            val name =
                paramTypes.joinToString(
                    FunctionDeclaration.COMMA + FunctionDeclaration.WHITESPACE,
                    FunctionDeclaration.BRACKET_LEFT,
                    FunctionDeclaration.BRACKET_RIGHT
                ) {
                    it.typeName
                } + type.typeName
            type = FunctionType(name, paramTypes, listOf(type), language)
        }

        // Lastly, there might be further nested declarators that adjust the type further.
        // However, if the type is already a function pointer type, we can ignore it. In the future,
        // this will probably actually make the difference between a function type and a function
        // pointer type.
        if (declarator.nestedDeclarator != null && type !is FunctionPointerType) {
            type = adjustType(declarator.nestedDeclarator, type)
        }

        // Make sure, the type manager knows about this type
        return typeManager.registerType(type)
    }

    companion object {
        private val LOGGER = LoggerFactory.getLogger(CXXLanguageFrontend::class.java)

        private fun explore(node: IASTNode, indent: Int) {
            val children = node.children
            val s = StringBuilder()

            s.append(" ".repeat(indent))
            if (log.isTraceEnabled) {
                log.trace(
                    "{}{} -> {}",
                    s,
                    node.javaClass.simpleName,
                    node.rawSignature.replace('\n', '\\').replace('\t', ' ')
                )
            }

            for (astNode in children) {
                explore(astNode, indent + 2)
            }
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy