All Downloads are FREE. Search and download functionalities are using the official Maven repository.

toolkit.plugins.scanners.scancode-scanner.42.0.0.source-code.ScanCodeResultModelMapper.kt Maven / Gradle / Ivy

Go to download

Part of the OSS Review Toolkit (ORT), a suite to automate software compliance checks.

The newest version!
/*
 * Copyright (C) 2017 The ORT Project Authors (see )
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 * License-Filename: LICENSE
 */

package org.ossreviewtoolkit.plugins.scanners.scancode

import java.time.Instant
import java.time.ZoneId
import java.time.format.DateTimeFormatter

import org.ossreviewtoolkit.model.CopyrightFinding
import org.ossreviewtoolkit.model.Issue
import org.ossreviewtoolkit.model.LicenseFinding
import org.ossreviewtoolkit.model.ScanSummary
import org.ossreviewtoolkit.model.Severity
import org.ossreviewtoolkit.model.TextLocation
import org.ossreviewtoolkit.model.createAndLogIssue
import org.ossreviewtoolkit.model.mapLicense
import org.ossreviewtoolkit.model.utils.associateLicensesWithExceptions

import org.semver4j.Semver

const val MAX_SUPPORTED_OUTPUT_FORMAT_MAJOR_VERSION = 4

private val TIMESTAMP_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HHmmss.n").withZone(ZoneId.of("UTC"))

// Note: The "(File: ...)" part in the patterns below is actually added by ORT's own getRawResult() function.
private val UNKNOWN_ERROR_REGEX = Regex(
    "(ERROR: for scanner: (?\\w+):\n)?" +
        "ERROR: Unknown error:\n.+\n(?\\w+Error)[:\n](?.*) \\(File: (?.+)\\)",
    RegexOption.DOT_MATCHES_ALL
)

private val TIMEOUT_ERROR_REGEX = Regex(
    "(ERROR: for scanner: (?\\w+):\n)?" +
        "ERROR: Processing interrupted: timeout after (?\\d+) seconds. \\(File: (?.+)\\)"
)

private data class LicenseMatch(
    val expression: String,
    val startLine: Int,
    val endLine: Int,
    val score: Float
)

fun ScanCodeResult.toScanSummary(preferFileLicense: Boolean = false): ScanSummary {
    val licenseFindings = mutableSetOf()
    val copyrightFindings = mutableSetOf()
    val issues = mutableListOf()

    val header = headers.single()
    val inputPath = header.getInput()

    val outputFormatVersion = Semver(header.outputFormatVersion)

    if (outputFormatVersion.major > MAX_SUPPORTED_OUTPUT_FORMAT_MAJOR_VERSION) {
        issues += createAndLogIssue(
            source = ScanCode.SCANNER_NAME,
            message = "The output format version $outputFormatVersion exceeds the supported major version " +
                "$MAX_SUPPORTED_OUTPUT_FORMAT_MAJOR_VERSION. Results may be incomplete or incorrect.",
            severity = Severity.WARNING
        )
    }

    val filesOfTypeFile = files.filter { it.type == "file" }

    // Build a map of all ScanCode license keys in the result associated with their corresponding SPDX ID.
    val scanCodeKeyToSpdxIdMappings = licenseReferences?.associate { it.key to it.spdxLicenseKey }
        ?: files.flatMap { it.scanCodeKeyToSpdxIdMappings }.toMap()

    filesOfTypeFile.forEach { file ->
        val licensesWithoutReferences = file.licenses.filter {
            it !is LicenseEntry.Version3 || it.fromFile == null
                // Note that "fromFile" contains the name of the input directory, see
                // https://github.com/aboutcode-org/scancode-toolkit/issues/3712.
                || inputPath.resolveSibling(it.fromFile) == inputPath.resolve(file.path)
                // Check if input is a single file.
                || it.fromFile == inputPath.name
        }

        // ScanCode creates separate license entries for each license in an expression. Deduplicate these by grouping by
        // the same expression.
        val licenses = licensesWithoutReferences.groupBy {
            LicenseMatch(it.licenseExpression, it.startLine, it.endLine, it.score)
        }.map {
            // Arbitrarily take the first of the duplicate license entries.
            it.value.first()
        }

        if (preferFileLicense && file is FileEntry.Version3 && file.detectedLicenseExpressionSpdx != null) {
            licenseFindings += LicenseFinding(
                license = file.detectedLicenseExpressionSpdx,
                location = TextLocation(
                    path = file.path,
                    startLine = licenses.minOf { it.startLine },
                    endLine = licenses.maxOf { it.endLine }
                ),
                score = licenses.map { it.score }.average().toFloat()
            )
        } else {
            licenses.mapTo(licenseFindings) { license ->
                // ScanCode uses its own license keys as identifiers in license expressions.
                val spdxLicenseExpression = when {
                    license is LicenseEntry.Version3 && license.spdxLicenseExpression != null -> {
                        license.spdxLicenseExpression
                    }

                    license is LicenseEntry.Version4 && license.licenseExpressionSpdx != null -> {
                        license.licenseExpressionSpdx
                    }

                    else -> license.licenseExpression.mapLicense(scanCodeKeyToSpdxIdMappings)
                }

                LicenseFinding(
                    license = spdxLicenseExpression,
                    location = TextLocation(
                        path = file.path,
                        startLine = license.startLine,
                        endLine = license.endLine
                    ),
                    score = license.score
                )
            }
        }

        file.copyrights.mapTo(copyrightFindings) { copyright ->
            CopyrightFinding(
                statement = copyright.statement,
                location = TextLocation(
                    path = file.path,
                    startLine = copyright.startLine,
                    endLine = copyright.endLine
                )
            )
        }
    }

    issues += mapScanErrors(this)

    mapUnknownErrors(issues)
    mapTimeoutErrors(issues)

    return ScanSummary(
        startTime = TIMESTAMP_FORMATTER.parse(header.startTimestamp).query(Instant::from),
        endTime = TIMESTAMP_FORMATTER.parse(header.endTimestamp).query(Instant::from),
        licenseFindings = associateLicensesWithExceptions(licenseFindings),
        copyrightFindings = copyrightFindings,
        issues = issues
    )
}

/**
 * Map scan errors for all files using messages that contain the relative file path.
 */
private fun mapScanErrors(result: ScanCodeResult): List =
    result.files.flatMap { file ->
        file.scanErrors.map { error ->
            Issue(
                source = ScanCode.SCANNER_NAME,
                message = "$error (File: ${file.path})"
            )
        }
    }

/**
 * Map messages about timeout errors to a more compact form. Return true if solely timeout errors occurred, return false
 * otherwise.
 */
private fun mapTimeoutErrors(issues: MutableList): Boolean {
    if (issues.isEmpty()) return false

    var onlyTimeoutErrors = true

    @Suppress("UnsafeCallOnNullableType")
    val mappedIssues = issues.map { fullError ->
        val match = TIMEOUT_ERROR_REGEX.matchEntire(fullError.message)
        if (match != null) {
            val file = match.groups["file"]!!.value
            val timeout = match.groups["timeout"]!!.value

            fullError.copy(
                message = "ERROR: Timeout after $timeout seconds while scanning file '$file'.",
                affectedPath = file
            )
        } else {
            onlyTimeoutErrors = false
            fullError
        }
    }

    issues.clear()
    issues += mappedIssues.distinctBy { it.message }

    return onlyTimeoutErrors
}

/**
 * Map messages about unknown errors to a more compact form. Return true if solely memory errors occurred, return false
 * otherwise.
 */
private fun mapUnknownErrors(issues: MutableList): Boolean {
    if (issues.isEmpty()) return false

    var onlyMemoryErrors = true

    @Suppress("UnsafeCallOnNullableType")
    val mappedIssues = issues.map { fullError ->
        UNKNOWN_ERROR_REGEX.matchEntire(fullError.message)?.let { match ->
            val file = match.groups["file"]!!.value
            val error = match.groups["error"]!!.value

            if (error == "MemoryError") {
                fullError.copy(message = "ERROR: MemoryError while scanning file '$file'.")
            } else {
                onlyMemoryErrors = false
                val message = match.groups["message"]!!.value.trim()
                fullError.copy(message = "ERROR: $error while scanning file '$file' ($message).")
            }
        } ?: run {
            onlyMemoryErrors = false
            fullError
        }
    }

    issues.clear()
    issues += mappedIssues.distinctBy { it.message }

    return onlyMemoryErrors
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy