All Downloads are FREE. Search and download functionalities are using the official Maven repository.

parser.SpdxExpressionParser.kt Maven / Gradle / Ivy

/*
 * Copyright (C) 2024 The ORT Project Authors (see )
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 * License-Filename: LICENSE
 */

package org.ossreviewtoolkit.utils.spdx.parser

import org.ossreviewtoolkit.utils.common.nextOrNull
import org.ossreviewtoolkit.utils.spdx.SpdxCompoundExpression
import org.ossreviewtoolkit.utils.spdx.SpdxExpression
import org.ossreviewtoolkit.utils.spdx.SpdxLicenseIdExpression
import org.ossreviewtoolkit.utils.spdx.SpdxLicenseReferenceExpression
import org.ossreviewtoolkit.utils.spdx.SpdxLicenseWithExceptionExpression
import org.ossreviewtoolkit.utils.spdx.SpdxOperator

/**
 * A parser for SPDX expressions. It consumes a sequence of [Token]s and produces an [SpdxExpression].
 *
 * This parser implements the grammar defined in the
 * [SPDX specification](https://spdx.github.io/spdx-spec/v2.2.2/SPDX-license-expressions/):
 *
 * ```
 * license-expression   -> simple-expression | compound-expression
 * compound-expression  -> simple-expression |
 *                         simple-expression "WITH" license-exception-id |
 *                         compound-expression "AND" compound-expression |
 *                         compound-expression "OR" compound-expression |
 *                         "(" compound-expression ")"
 * simple-expression    -> license-id | license-id"+" | license-ref
 * license-ref          -> ["DocumentRef-" idstring ":"] "LicenseRef-" idstring
 * license-exception-id -> 
 * license-id           -> 
 * idstring             -> 1*(ALPHA / DIGIT / "-" / "." )
 * ```
 *
 * To simplify the implementation the grammar is transformed into the following form which implements the operator
 * precedence as part of the grammar. Each line in this grammar corresponds to a method in this class:
 *
 * ```
 * license-expression -> or-expression
 * or-expression      -> and-expression ( "OR" and-expression ) *
 * and-expression     -> primary ( "AND" primary ) *
 * primary            -> "(" license-expression ")" | simple-expression
 * simple-expression  -> ( IDENTIFIER [ "+" ] | [ "DOCUMENTREF" ":" ] LICENSEREF ) [ "WITH" IDENTIFIER ]
 * ```
 *
 * This allows implementing a
 * [recursive descent parser](https://en.wikipedia.org/wiki/Recursive_descent_parser) with
 * [Pratt parsing](https://en.wikipedia.org/wiki/Operator-precedence_parser#Pratt_parsing). The implementation is
 * loosely based on this
 * [example](https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/) but with many
 * simplifications as the SPDX grammar has only one operator per level of precedence and the parser does not need to be
 * extensible.
 *
 * Also, the rules for `license-id` and `license-exception-id` are changed to allow any valid `idstring` as the
 * [strictness] decides if only the SPDX identifiers are allowed for license and exception ids and therefore these rules
 * cannot be part of the grammar.
 *
 * For backward compatibility with the previously used SPDX expression parser, operators are case-insensitive. This is
 * also planned for future SPDX versions, see https://github.com/spdx/spdx-spec/pull/876.
 */
class SpdxExpressionParser(
    tokens: Sequence,
    private val strictness: SpdxExpression.Strictness = SpdxExpression.Strictness.ALLOW_ANY
) {
    constructor(
        input: String,
        strictness: SpdxExpression.Strictness = SpdxExpression.Strictness.ALLOW_ANY
    ) : this(SpdxExpressionLexer(input).tokens(), strictness)

    private val iterator = tokens.iterator()
    private var next = iterator.nextOrNull()

    fun parse(): SpdxExpression {
        val result = parseOrExpression()
        if (next != null) throw SpdxExpressionParserException(next)
        return result
    }

    /**
     * Parse an OR expression of the form `or-expression -> and-expression ( "OR" and-expression ) *`.
     */
    private fun parseOrExpression(): SpdxExpression {
        val children = mutableListOf(parseAndExpression())

        while (next is Token.OR) {
            consume()
            children.add(parseAndExpression())
        }

        return when {
            children.size > 1 -> SpdxCompoundExpression(SpdxOperator.OR, children)
            else -> children.first()
        }
    }

    /**
     * Parse an AND expression of the form `and-expression -> primary ( "AND" primary ) *`.
     */
    private fun parseAndExpression(): SpdxExpression {
        val children = mutableListOf(parsePrimary())

        while (next is Token.AND) {
            consume()
            children.add(parsePrimary())
        }

        return when {
            children.size > 1 -> SpdxCompoundExpression(SpdxOperator.AND, children)
            else -> children.first()
        }
    }

    /**
     * Parse a primary of the form `primary -> "(" license-expression ")" | simple-expression`.
     */
    private fun parsePrimary(): SpdxExpression {
        if (next is Token.OPEN) {
            consume()
            val expression = parseOrExpression()
            consume()
            return expression
        }

        return parseSimpleExpression()
    }

    /**
     * Parse a simple expression of the form
     * `simple-expression -> ( IDENTIFIER [ "+" ] | [ "DOCUMENTREF" ":" ] LICENSEREF ) [ "WITH" IDENTIFIER ]`.
     */
    private fun parseSimpleExpression(): SpdxExpression {
        val left = when (next) {
            is Token.IDENTIFIER -> {
                val identifier = consume()

                val orLaterVersion = next is Token.PLUS || identifier.value.endsWith("-or-later")
                if (next is Token.PLUS) consume()

                SpdxLicenseIdExpression(identifier.value, orLaterVersion).apply { validate(strictness) }
            }

            is Token.DOCUMENTREF -> {
                val documentRef = consume()
                consume()
                val licenseRef = consume()

                SpdxLicenseReferenceExpression("${documentRef.value}:${licenseRef.value}")
                    .apply { validate(strictness) }
            }

            is Token.LICENSEREF -> {
                val licenseRef = consume()

                SpdxLicenseReferenceExpression(licenseRef.value).apply { validate(strictness) }
            }

            else -> throw SpdxExpressionParserException(next)
        }

        if (next is Token.WITH) {
            consume()
            val exception = when (next) {
                is Token.IDENTIFIER -> consume().value
                is Token.LICENSEREF -> consume().value
                is Token.DOCUMENTREF -> "${consume().value}:${consume().value}"
                else -> throw SpdxExpressionParserException(
                    next,
                    Token.IDENTIFIER::class,
                    Token.LICENSEREF::class,
                    Token.DOCUMENTREF::class
                )
            }
            return SpdxLicenseWithExceptionExpression(left, exception).apply { validate(strictness) }
        }

        return left
    }

    /**
     * Consume the [next] token and return it if it is of the expected type [T], otherwise throw an
     * [SpdxExpressionParserException].
     */
    private inline fun  consume(): T {
        val token = next
        if (token !is T) throw SpdxExpressionParserException(token, T::class)
        next = iterator.nextOrNull()
        return token
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy