com.metaeffekt.artifact.analysis.bom.spdx.LicenseStringUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of ae-artifact-analysis Show documentation
The newest version!
/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.artifact.analysis.bom.spdx;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.NavigableMap;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Pattern;

public class LicenseStringUtils {
    private static final Logger LOG = LoggerFactory.getLogger(LicenseStringUtils.class);

    protected static final String escapeSequenceStart = "-.H0x";
    protected static final String escapeSequenceEnd = "-";

    protected static final String hexFormatter = "%x";
    protected static final String escapeFormatter = escapeSequenceStart + hexFormatter + escapeSequenceEnd;
    public static final String PREFIX_LICENSE_REF_COMMON = "LicenseRef-";

    /**
     * A list of supported licenseref namespaces.
     */
    protected static final String[] supportedNamespaces = {
            "ae",
            "scancode",
            "spdx",
            "unknown"
    };

    /**
     * Set of supported namespaces, initialized off of {@link #supportedNamespaces}.
     */
    protected static final Set supportedNamespacesSet = new HashSet<>(Arrays.asList(supportedNamespaces));

    /**
     * Convenience object for class-internal use, automatically built from {@link #supportedNamespaces}.
     */
    protected static final Set supportedLicenseRefPrefixes = Collections.unmodifiableSet(
            Arrays.stream(supportedNamespaces)
                    .map(namespaceString -> PREFIX_LICENSE_REF_COMMON + escapeForLicenseRef(namespaceString) + "-")
                    .collect(LinkedHashSet::new, Set::add, Set::addAll)
    );

    /**
     * Checks if the top level logic (meaning the immediately effective logic operator) is "+".

     * This also ignores brackets, which are invalid in our format but may be generated to preserve logic.

     * If the entire string is already bracketed, this will ignore everything and return true.

     * Used to detect when brackets need to be added.
     * Helps with simplifying logic during conversion between spdx and our license string.
     *
     * @param licensesString Input licensesString (in our format, supports ignoring brackets).
     * @return Returns true if only OR is immediately effective.
     */
    public static boolean containsTopAnd(String licensesString) {
        // we'll skip all bracketing since bracketing overrides logic operator precedence.
        NavigableMap skippable = findTopBrackets(licensesString);

        for (int i = 0; i < licensesString.length(); i++) {
            // skip skippable ranges
            Integer queryResult = skippable.get(i);
            if (queryResult != null) {
                // leave last bracket in, will be checked unnecessarily.
                // tradeoff to avoid checking for the end of the string outside of the for check.
                i = queryResult;
            }

            // check if the remaining characters contain the "," operator
            if (licensesString.charAt(i) == ',') {
                return true;
            }
        }

        return false;
    }

    /**
     * Finds and checks a string's (round) brackets. The returned index numbers match {@link String#charAt(int)}

     * Brackets must be correctly balanced, or an Exception will be thrown.
     *
     * @param string String to check for brackets.
     * @return Returns a NavigableMap with entries of opening bracket index to closing bracket index.
     * @throws IllegalArgumentException Throws IllegalArgumentException if brackets are unbalanced.
     */
    public static NavigableMap findTopBrackets(String string) throws IllegalArgumentException {
        return findTopBrackets(string, '(', ')');
    }

    /**
     * Finds and checks a string's (round) brackets. The returned index numbers match {@link String#charAt(int)}

     * Brackets must be correctly balanced, or an Exception will be thrown.
     *
     * @param string         String to check for brackets.
     * @param openingBracket The opening bracket character to search for.
     * @param closingBracket The closing bracket character to search for.
     * @return Returns a NavigableMap with entries of opening bracket index to closing bracket index.
     * @throws IllegalArgumentException Throws IllegalArgumentException if brackets are unbalanced.
     */
    public static NavigableMap findTopBrackets(String string,
                                                                 char openingBracket,
                                                                 char closingBracket) throws IllegalArgumentException {
        Objects.requireNonNull(string);

        NavigableMap beginToEndBracketIndex = new TreeMap<>();

        // while there are still some sort of brackets, determine their position and span.
        while (string.contains(Character.toString(openingBracket)) ||
                string.contains(Character.toString(closingBracket))) {
            int openBracketPos = string.indexOf(openingBracket);
            if (openBracketPos == -1) {
                throw new IllegalArgumentException("Imbalanced brackets in string '" + string + "'.");
            }
            int closeBracketPos = findClosingBracket(string, openBracketPos, openingBracket, closingBracket);
            if (closeBracketPos == -1) {
                throw new IllegalArgumentException("Imbalanced brackets in string '" + string + "'.");
            }

            // ignore bracket range without changing the string's length (to preserve character positions)
            string = string.substring(0, openBracketPos)
                    + StringUtils.repeat('*',
                    closeBracketPos - openBracketPos + 1)
                    + string.substring(closeBracketPos + 1);

            // note the found brackets
            beginToEndBracketIndex.put(openBracketPos, closeBracketPos);
        }

        return beginToEndBracketIndex;
    }


    /**
     * Finds the closing bracket that corresponds to the opening brachet at openingBracketIndex.

     * Only works with round brackets.
     *
     * @param stringToSearch The string to search for the closing bracket.
     * @param openingBracketIndex Where the opening bracket is located. Works like {@code string.charAt(index)}.
     * @param openingBracket Opening Bracket character.
     * @param closingBracket Closing Bracket character.
     *
     * @return Returns the index of the corresponding closing bracket or {@code -1} if none could be found.
     */
    public static int findClosingBracket(
            String stringToSearch,
            int openingBracketIndex,
            char openingBracket,
            char closingBracket) {
        if (stringToSearch.charAt(openingBracketIndex) != openingBracket) {
            throw new IllegalArgumentException("Didn't find opening bracket at specified index.");
        }

        int depth = 0;
        int searchIndex = openingBracketIndex;
        do {
            if (searchIndex >= stringToSearch.length()) {
                // there is no closing bracket. return -1 for error.
                return -1;
            }

            char foundChar = stringToSearch.charAt(searchIndex);
            if (foundChar == openingBracket) {
                depth++;
            } else if (foundChar == closingBracket) {
                depth--;
            }

            searchIndex++;
        } while (depth > 0);

        // undo last searchIndex++ and return
        return searchIndex - 1;
    }

    /**
     * Escape characters disallowed in Spdx license ids (especially LicenseRefs).

     * This uses a custom, rather odd escape mechanism to fit spdx's tight requirements.
     *
     * @param licenseName The input string which may contain disallowed characters.
     * @return Returns a string with invalid characters encoded using the hexadecimal escape mechanism.
     */
    public static String escapeForLicenseRef(String licenseName) {

        // treat some edge cases
        licenseName = licenseName.replaceAll("\\s+", "-");
        licenseName = licenseName.replaceAll("!", "");
        licenseName = licenseName.replaceAll("\\+", "-or-later");

        // omit -?
        if (licenseName.endsWith("-?")) licenseName = licenseName.substring(0, licenseName.length() - 2);

        // can't have floating escape sequences in the final product
        if (licenseName.contains(escapeSequenceStart)) {
            // escape existing escape sequence lookalikes
            StringBuilder escapeSequenceReplacementBuilder = new StringBuilder();
            for (int codepoint : escapeSequenceStart.codePoints().toArray()) {
                escapeSequenceReplacementBuilder.append(String.format(escapeFormatter, codepoint));
            }

            // replace all literal occurrences with the calculated replacement sequence
            licenseName = licenseName.replace(escapeSequenceStart, escapeSequenceReplacementBuilder.toString());
        }

        StringBuilder licenseRef = new StringBuilder();

        // replace characters not allowed in the specification
        for (int codepoint : licenseName.codePoints().toArray()) {
            if (codepoint < 123
                    && (Character.isAlphabetic(codepoint)
                    || Character.isDigit(codepoint)
                    || codepoint == '-' || codepoint == '.')) {
                licenseRef.appendCodePoint(codepoint);
            } else {
                licenseRef.append(String.format(escapeFormatter, codepoint));
            }
        }

        return licenseRef.toString();
    }

    /**
     * Unescapes a string escaped with {@link #escapeForLicenseRef(String)}.

     *
     * @param escapedLicenseId A license id that may contain escaped characters
     * @return Returns a String with all characters encoded back into the String
     * @see #unwrapLicenseRef(String)
     */
    public static String unescapeFromLicenseRef(String escapedLicenseId) {
        final String escapeSequenceStart = "-.H0x";
        final String escapeSequenceEnd = "-";

        Pattern escapeSequenceBeginPattern = Pattern.compile(Pattern.quote(escapeSequenceStart));
        List split = new ArrayList<>(Arrays.asList(escapeSequenceBeginPattern.split(escapedLicenseId)));

        StringBuilder resultBuilder = new StringBuilder();

        // first part of the string in front of the previously escaped character
        resultBuilder.append(split.remove(0));
        for (String sub : split) {
            // the next few hex before escapeSequenceEnd contain the escaped character
            int endIndex = sub.indexOf(escapeSequenceEnd);
            String hex = sub.substring(0, endIndex);
            String rest = sub.substring(endIndex + escapeSequenceEnd.length());

            int resultCodepoint;
            try {
                resultCodepoint = (int) Long.parseUnsignedLong(hex, 16);
            } catch (NumberFormatException e) {
                throw new RuntimeException("Invalid escaped hex codepoint in input string: " + e);
            }

            resultBuilder.appendCodePoint(resultCodepoint);
            resultBuilder.append(rest);
        }

        return resultBuilder.toString();
    }

    /**
     * Takes in any Unicode input and wraps it into an Spdx LicenseRef.

     * Characters are escaped in a custom format ({@link LicenseStringUtils#escapeForLicenseRef(String)}) to fit
     * Spdx charset requirements.

     * For consistency's sake, consider using the create*LicenseRef variants for methods that use preset namespaces.
     *
     * @param licenseName the name to use for creation of the LicenseRef
     * @return Returns a LicenseRef with the input name
     * @see #unwrapLicenseRef
     */
    protected static String createLicenseRef(String namespaceId, String licenseName) {
        if (!supportedNamespacesSet.contains(namespaceId)) {
            LOG.warn("Unsupported namespace [{}].", namespaceId);
        }

        String escapedNamespaceId = escapeForLicenseRef(namespaceId);
        if (!namespaceId.equals(escapedNamespaceId)) {
            LOG.warn("The namespaceId [{}] will be escaped to [{}]. Namespaces shouldn't need escaping!",
                    namespaceId, escapedNamespaceId);
        }
        return "LicenseRef-" + escapedNamespaceId + "-" + escapeForLicenseRef(licenseName);
    }

    public static String createAeLicenseRef(String licenseName) {
        return createLicenseRef("ae", licenseName);
    }

    public static String createScancodeLicenseRef(String licenseName) {
        return createLicenseRef("scancode", licenseName);
    }

    public static String createUnknownLicenseRef(String licenseName) {
        return createLicenseRef("unknown", licenseName);
    }

    public static String createSpdxLicenseRef(String licenseName) {
        return createLicenseRef("spdx", licenseName);
    }

    /**
     * Unwraps and unescapes a LicenseRef generated by {@link #createLicenseRef(String, String)}.

     * This means removal of the "LicenseRef-namespaceId-" prefix and unescaping the suffix.
     *
     * @param licenseRef Spdx LicenseRef- in the format of {@link #createLicenseRef(String, String)}
     * @return Returns the original license name that was used when generating the ref
     * @see #createLicenseRef
     */
    public static String unwrapLicenseRef(String licenseRef) {
        // it might be faster to split off the "LicenseRef-" first but this might just be fast enough
        for (String prefix : supportedLicenseRefPrefixes) {
            if (licenseRef.startsWith(prefix)) {
                return unescapeFromLicenseRef(licenseRef.substring(prefix.length()));
            }
        }
        throw new IllegalArgumentException("Argument should be a license ref.");
    }
}