All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.artifact.analysis.spdxbom.LicenseStringUtils Maven / Gradle / Ivy

/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.artifact.analysis.spdxbom;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.NavigableMap;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Pattern;

public class LicenseStringUtils {
    private static final Logger LOG = LoggerFactory.getLogger(LicenseStringUtils.class);

    protected static final String escapeSequenceStart = "-.H0x";
    protected static final String escapeSequenceEnd = "-";

    protected static final String hexFormatter = "%x";
    protected static final String escapeFormatter = escapeSequenceStart + hexFormatter + escapeSequenceEnd;
    public static final String PREFIX_LICENSE_REF_COMMON = "LicenseRef-";

    /**
     * A list of supported licenseref namespaces.
     */
    protected static final String[] supportedNamespaces = {
            "ae",
            "scancode",
            "spdx",
            "unknown"
    };

    /**
     * Set of supported namespaces, initialized off of {@link #supportedNamespaces}.
     */
    protected static final Set supportedNamespacesSet = new HashSet<>(Arrays.asList(supportedNamespaces));

    /**
     * Convenience object for class-internal use, automatically built from {@link #supportedNamespaces}.
     */
    protected static final Set supportedLicenseRefPrefixes = Collections.unmodifiableSet(
            Arrays.stream(supportedNamespaces)
                    .map(namespaceString -> PREFIX_LICENSE_REF_COMMON + escapeForLicenseRef(namespaceString) + "-")
                    .collect(LinkedHashSet::new, Set::add, Set::addAll)
    );

    /**
     * Checks if the top level logic (meaning the immediately effective logic operator) is "+".
* This also ignores brackets, which are invalid in our format but may be generated to preserve logic.
* If the entire string is already bracketed, this will ignore everything and return true.
* Used to detect when brackets need to be added. * Helps with simplifying logic during conversion between spdx and our license string. * * @param licensesString Input licensesString (in our format, supports ignoring brackets). * @return Returns true if only OR is immediately effective. */ protected static boolean containsTopAnd(String licensesString) { // we'll skip all bracketing since bracketing overrides logic operator precedence. NavigableMap skippable = findTopBrackets(licensesString); for (int i = 0; i < licensesString.length(); i++) { // skip skippable ranges Integer queryResult = skippable.get(i); if (queryResult != null) { // leave last bracket in, will be checked unnecessarily. // tradeoff to avoid checking for the end of the string outside of the for check. i = queryResult; } // check if the remaining characters contain the "," operator if (licensesString.charAt(i) == ',') { return true; } } return false; } /** * Finds and checks a string's (round) brackets. The returned index numbers match {@link String#charAt(int)}
* Brackets must be correctly balanced, or an Exception will be thrown. * * @param string String to check for brackets. * @return Returns a NavigableMap with entries of opening bracket index to closing bracket index. * @throws IllegalArgumentException Throws IllegalArgumentException if brackets are unbalanced. */ public static NavigableMap findTopBrackets(String string) throws IllegalArgumentException { return findTopBrackets(string, '(', ')'); } /** * Finds and checks a string's (round) brackets. The returned index numbers match {@link String#charAt(int)}
* Brackets must be correctly balanced, or an Exception will be thrown. * * @param string String to check for brackets. * @param openingBracket The opening bracket character to search for. * @param closingBracket The closing bracket character to search for. * @return Returns a NavigableMap with entries of opening bracket index to closing bracket index. * @throws IllegalArgumentException Throws IllegalArgumentException if brackets are unbalanced. */ public static NavigableMap findTopBrackets(String string, char openingBracket, char closingBracket) throws IllegalArgumentException { Objects.requireNonNull(string); NavigableMap beginToEndBracketIndex = new TreeMap<>(); // while there are still some sort of brackets, determine their position and span. while (string.contains(Character.toString(openingBracket)) || string.contains(Character.toString(closingBracket))) { int openBracketPos = string.indexOf(openingBracket); if (openBracketPos == -1) { throw new IllegalArgumentException("Imbalanced brackets in string '" + string + "'."); } int closeBracketPos = findClosingBracket(string, openBracketPos, openingBracket, closingBracket); if (closeBracketPos == -1) { throw new IllegalArgumentException("Imbalanced brackets in string '" + string + "'."); } // ignore bracket range without changing the string's length (to preserve character positions) string = string.substring(0, openBracketPos) + StringUtils.repeat('*', closeBracketPos - openBracketPos + 1) + string.substring(closeBracketPos + 1); // note the found brackets beginToEndBracketIndex.put(openBracketPos, closeBracketPos); } return beginToEndBracketIndex; } /** * Finds the closing bracket that corresponds to the opening brachet at openingBracketIndex.
* Defaults to round brackets. * * @param stringToSearch The string to search for the closing bracket. * @param openingBracketIndex Where the opening bracket is located. Works like {@code string.charAt(index)}. * @return Returns the index of the corresponding closing bracket or {@code -1} if none could be found.. * @see #findClosingBracket(String, int, char, char) */ private static int findClosingBracket(String stringToSearch, int openingBracketIndex) { return findClosingBracket(stringToSearch, openingBracketIndex, '(', ')'); } /** * Finds the closing bracket that corresponds to the opening brachet at openingBracketIndex.
* Only works with round brackets. * * @param stringToSearch The string to search for the closing bracket. * @param openingBracketIndex Where the opening bracket is located. Works like {@code string.charAt(index)}. * @param openingBracket Opening Bracket character. * @param closingBracket Closing Bracket character. * * @return Returns the index of the corresponding closing bracket or {@code -1} if none could be found. */ public static int findClosingBracket( String stringToSearch, int openingBracketIndex, char openingBracket, char closingBracket) { if (stringToSearch.charAt(openingBracketIndex) != openingBracket) { throw new IllegalArgumentException("Didn't find opening bracket at specified index."); } int depth = 0; int searchIndex = openingBracketIndex; do { if (searchIndex >= stringToSearch.length()) { // there is no closing bracket. return -1 for error. return -1; } char foundChar = stringToSearch.charAt(searchIndex); if (foundChar == openingBracket) { depth++; } else if (foundChar == closingBracket) { depth--; } searchIndex++; } while (depth > 0); // undo last searchIndex++ and return return searchIndex - 1; } /** * Escape characters disallowed in Spdx license ids (especially LicenseRefs).
* This uses a custom, rather odd escape mechanism to fit spdx's tight requirements. * * @param licenseName The input string which may contain disallowed characters. * @return Returns a string with invalid characters encoded using the hexadecimal escape mechanism. */ protected static String escapeForLicenseRef(String licenseName) { // treat some edge cases licenseName = licenseName.replaceAll("\\s+", "-"); licenseName = licenseName.replaceAll("!", ""); licenseName = licenseName.replaceAll("\\+", "-or-later"); // omit -? if (licenseName.endsWith("-?")) licenseName = licenseName.substring(0, licenseName.length() - 2); // can't have floating escape sequences in the final product if (licenseName.contains(escapeSequenceStart)) { // escape existing escape sequence lookalikes StringBuilder escapeSequenceReplacementBuilder = new StringBuilder(); for (int codepoint : escapeSequenceStart.codePoints().toArray()) { escapeSequenceReplacementBuilder.append(String.format(escapeFormatter, codepoint)); } // replace all literal occurrences with the calculated replacement sequence licenseName = licenseName.replace(escapeSequenceStart, escapeSequenceReplacementBuilder.toString()); } StringBuilder licenseRef = new StringBuilder(); // replace characters not allowed in the specification for (int codepoint : licenseName.codePoints().toArray()) { if (codepoint < 123 && (Character.isAlphabetic(codepoint) || Character.isDigit(codepoint) || codepoint == '-' || codepoint == '.')) { licenseRef.appendCodePoint(codepoint); } else { licenseRef.append(String.format(escapeFormatter, codepoint)); } } return licenseRef.toString(); } /** * Unescapes a string escaped with {@link #escapeForLicenseRef(String)}.
* * @param escapedLicenseId A license id that may contain escaped characters * @return Returns a String with all characters encoded back into the String * @see #unwrapLicenseRef(String) */ public static String unescapeFromLicenseRef(String escapedLicenseId) { final String escapeSequenceStart = "-.H0x"; final String escapeSequenceEnd = "-"; Pattern escapeSequenceBeginPattern = Pattern.compile(Pattern.quote(escapeSequenceStart)); List split = new ArrayList<>(Arrays.asList(escapeSequenceBeginPattern.split(escapedLicenseId))); StringBuilder resultBuilder = new StringBuilder(); // first part of the string in front of the previously escaped character resultBuilder.append(split.remove(0)); for (String sub : split) { // the next few hex before escapeSequenceEnd contain the escaped character int endIndex = sub.indexOf(escapeSequenceEnd); String hex = sub.substring(0, endIndex); String rest = sub.substring(endIndex + escapeSequenceEnd.length()); int resultCodepoint; try { resultCodepoint = (int) Long.parseUnsignedLong(hex, 16); } catch (NumberFormatException e) { throw new RuntimeException("Invalid escaped hex codepoint in input string: " + e); } resultBuilder.appendCodePoint(resultCodepoint); resultBuilder.append(rest); } return resultBuilder.toString(); } /** * Takes in any Unicode input and wraps it into an Spdx LicenseRef.
* Characters are escaped in a custom format ({@link LicenseStringUtils#escapeForLicenseRef(String)}) to fit * Spdx charset requirements.
* For consistency's sake, consider using the create*LicenseRef variants for methods that use preset namespaces. * * @param licenseName the name to use for creation of the LicenseRef * @return Returns a LicenseRef with the input name * @see #unwrapLicenseRef */ protected static String createLicenseRef(String namespaceId, String licenseName) { if (!supportedNamespacesSet.contains(namespaceId)) { LOG.warn("Unsupported namespace [{}].", namespaceId); } String escapedNamespaceId = escapeForLicenseRef(namespaceId); if (!namespaceId.equals(escapedNamespaceId)) { LOG.warn("The namespaceId [{}] will be escaped to [{}]. Namespaces shouldn't need escaping!", namespaceId, escapedNamespaceId); } return "LicenseRef-" + escapedNamespaceId + "-" + escapeForLicenseRef(licenseName); } public static String createAeLicenseRef(String licenseName) { return createLicenseRef("ae", licenseName); } public static String createScancodeLicenseRef(String licenseName) { return createLicenseRef("scancode", licenseName); } public static String createUnknownLicenseRef(String licenseName) { return createLicenseRef("unknown", licenseName); } public static String createSpdxLicenseRef(String licenseName) { return createLicenseRef("spdx", licenseName); } /** * Unwraps and unescapes a LicenseRef generated by {@link #createLicenseRef(String, String)}.
* This means removal of the "LicenseRef-namespaceId-" prefix and unescaping the suffix. * * @param licenseRef Spdx LicenseRef- in the format of {@link #createLicenseRef(String, String)} * @return Returns the original license name that was used when generating the ref * @see #createLicenseRef */ public static String unwrapLicenseRef(String licenseRef) { // it might be faster to split off the "LicenseRef-" first but this might just be fast enough for (String prefix : supportedLicenseRefPrefixes) { if (licenseRef.startsWith(prefix)) { return unescapeFromLicenseRef(licenseRef.substring(prefix.length())); } } throw new IllegalArgumentException("Argument should be a license ref."); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy