com.metaeffekt.artifact.analysis.spdxbom.LicenseStringUtils Maven / Gradle / Ivy
/*
* Copyright 2021-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.metaeffekt.artifact.analysis.spdxbom;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.NavigableMap;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Pattern;
public class LicenseStringUtils {
private static final Logger LOG = LoggerFactory.getLogger(LicenseStringUtils.class);
protected static final String escapeSequenceStart = "-.H0x";
protected static final String escapeSequenceEnd = "-";
protected static final String hexFormatter = "%x";
protected static final String escapeFormatter = escapeSequenceStart + hexFormatter + escapeSequenceEnd;
public static final String PREFIX_LICENSE_REF_COMMON = "LicenseRef-";
/**
* A list of supported licenseref namespaces.
*/
protected static final String[] supportedNamespaces = {
"ae",
"scancode",
"spdx",
"unknown"
};
/**
* Set of supported namespaces, initialized off of {@link #supportedNamespaces}.
*/
protected static final Set supportedNamespacesSet = new HashSet<>(Arrays.asList(supportedNamespaces));
/**
* Convenience object for class-internal use, automatically built from {@link #supportedNamespaces}.
*/
protected static final Set supportedLicenseRefPrefixes = Collections.unmodifiableSet(
Arrays.stream(supportedNamespaces)
.map(namespaceString -> PREFIX_LICENSE_REF_COMMON + escapeForLicenseRef(namespaceString) + "-")
.collect(LinkedHashSet::new, Set::add, Set::addAll)
);
/**
* Checks if the top level logic (meaning the immediately effective logic operator) is "+".
* This also ignores brackets, which are invalid in our format but may be generated to preserve logic.
* If the entire string is already bracketed, this will ignore everything and return true.
* Used to detect when brackets need to be added.
* Helps with simplifying logic during conversion between spdx and our license string.
*
* @param licensesString Input licensesString (in our format, supports ignoring brackets).
* @return Returns true if only OR is immediately effective.
*/
protected static boolean containsTopAnd(String licensesString) {
// we'll skip all bracketing since bracketing overrides logic operator precedence.
NavigableMap skippable = findTopBrackets(licensesString);
for (int i = 0; i < licensesString.length(); i++) {
// skip skippable ranges
Integer queryResult = skippable.get(i);
if (queryResult != null) {
// leave last bracket in, will be checked unnecessarily.
// tradeoff to avoid checking for the end of the string outside of the for check.
i = queryResult;
}
// check if the remaining characters contain the "," operator
if (licensesString.charAt(i) == ',') {
return true;
}
}
return false;
}
/**
* Finds and checks a string's (round) brackets. The returned index numbers match {@link String#charAt(int)}
* Brackets must be correctly balanced, or an Exception will be thrown.
*
* @param string String to check for brackets.
* @return Returns a NavigableMap with entries of opening bracket index to closing bracket index.
* @throws IllegalArgumentException Throws IllegalArgumentException if brackets are unbalanced.
*/
public static NavigableMap findTopBrackets(String string) throws IllegalArgumentException {
return findTopBrackets(string, '(', ')');
}
/**
* Finds and checks a string's (round) brackets. The returned index numbers match {@link String#charAt(int)}
* Brackets must be correctly balanced, or an Exception will be thrown.
*
* @param string String to check for brackets.
* @param openingBracket The opening bracket character to search for.
* @param closingBracket The closing bracket character to search for.
* @return Returns a NavigableMap with entries of opening bracket index to closing bracket index.
* @throws IllegalArgumentException Throws IllegalArgumentException if brackets are unbalanced.
*/
public static NavigableMap findTopBrackets(String string,
char openingBracket,
char closingBracket) throws IllegalArgumentException {
Objects.requireNonNull(string);
NavigableMap beginToEndBracketIndex = new TreeMap<>();
// while there are still some sort of brackets, determine their position and span.
while (string.contains(Character.toString(openingBracket)) ||
string.contains(Character.toString(closingBracket))) {
int openBracketPos = string.indexOf(openingBracket);
if (openBracketPos == -1) {
throw new IllegalArgumentException("Imbalanced brackets in string '" + string + "'.");
}
int closeBracketPos = findClosingBracket(string, openBracketPos, openingBracket, closingBracket);
if (closeBracketPos == -1) {
throw new IllegalArgumentException("Imbalanced brackets in string '" + string + "'.");
}
// ignore bracket range without changing the string's length (to preserve character positions)
string = string.substring(0, openBracketPos)
+ StringUtils.repeat('*',
closeBracketPos - openBracketPos + 1)
+ string.substring(closeBracketPos + 1);
// note the found brackets
beginToEndBracketIndex.put(openBracketPos, closeBracketPos);
}
return beginToEndBracketIndex;
}
/**
* Finds the closing bracket that corresponds to the opening brachet at openingBracketIndex.
* Defaults to round brackets.
*
* @param stringToSearch The string to search for the closing bracket.
* @param openingBracketIndex Where the opening bracket is located. Works like {@code string.charAt(index)}.
* @return Returns the index of the corresponding closing bracket or {@code -1} if none could be found..
* @see #findClosingBracket(String, int, char, char)
*/
private static int findClosingBracket(String stringToSearch, int openingBracketIndex) {
return findClosingBracket(stringToSearch, openingBracketIndex, '(', ')');
}
/**
* Finds the closing bracket that corresponds to the opening brachet at openingBracketIndex.
* Only works with round brackets.
*
* @param stringToSearch The string to search for the closing bracket.
* @param openingBracketIndex Where the opening bracket is located. Works like {@code string.charAt(index)}.
* @param openingBracket Opening Bracket character.
* @param closingBracket Closing Bracket character.
*
* @return Returns the index of the corresponding closing bracket or {@code -1} if none could be found.
*/
public static int findClosingBracket(
String stringToSearch,
int openingBracketIndex,
char openingBracket,
char closingBracket) {
if (stringToSearch.charAt(openingBracketIndex) != openingBracket) {
throw new IllegalArgumentException("Didn't find opening bracket at specified index.");
}
int depth = 0;
int searchIndex = openingBracketIndex;
do {
if (searchIndex >= stringToSearch.length()) {
// there is no closing bracket. return -1 for error.
return -1;
}
char foundChar = stringToSearch.charAt(searchIndex);
if (foundChar == openingBracket) {
depth++;
} else if (foundChar == closingBracket) {
depth--;
}
searchIndex++;
} while (depth > 0);
// undo last searchIndex++ and return
return searchIndex - 1;
}
/**
* Escape characters disallowed in Spdx license ids (especially LicenseRefs).
* This uses a custom, rather odd escape mechanism to fit spdx's tight requirements.
*
* @param licenseName The input string which may contain disallowed characters.
* @return Returns a string with invalid characters encoded using the hexadecimal escape mechanism.
*/
protected static String escapeForLicenseRef(String licenseName) {
// treat some edge cases
licenseName = licenseName.replaceAll("\\s+", "-");
licenseName = licenseName.replaceAll("!", "");
licenseName = licenseName.replaceAll("\\+", "-or-later");
// omit -?
if (licenseName.endsWith("-?")) licenseName = licenseName.substring(0, licenseName.length() - 2);
// can't have floating escape sequences in the final product
if (licenseName.contains(escapeSequenceStart)) {
// escape existing escape sequence lookalikes
StringBuilder escapeSequenceReplacementBuilder = new StringBuilder();
for (int codepoint : escapeSequenceStart.codePoints().toArray()) {
escapeSequenceReplacementBuilder.append(String.format(escapeFormatter, codepoint));
}
// replace all literal occurrences with the calculated replacement sequence
licenseName = licenseName.replace(escapeSequenceStart, escapeSequenceReplacementBuilder.toString());
}
StringBuilder licenseRef = new StringBuilder();
// replace characters not allowed in the specification
for (int codepoint : licenseName.codePoints().toArray()) {
if (codepoint < 123
&& (Character.isAlphabetic(codepoint)
|| Character.isDigit(codepoint)
|| codepoint == '-' || codepoint == '.')) {
licenseRef.appendCodePoint(codepoint);
} else {
licenseRef.append(String.format(escapeFormatter, codepoint));
}
}
return licenseRef.toString();
}
/**
* Unescapes a string escaped with {@link #escapeForLicenseRef(String)}.
*
* @param escapedLicenseId A license id that may contain escaped characters
* @return Returns a String with all characters encoded back into the String
* @see #unwrapLicenseRef(String)
*/
public static String unescapeFromLicenseRef(String escapedLicenseId) {
final String escapeSequenceStart = "-.H0x";
final String escapeSequenceEnd = "-";
Pattern escapeSequenceBeginPattern = Pattern.compile(Pattern.quote(escapeSequenceStart));
List split = new ArrayList<>(Arrays.asList(escapeSequenceBeginPattern.split(escapedLicenseId)));
StringBuilder resultBuilder = new StringBuilder();
// first part of the string in front of the previously escaped character
resultBuilder.append(split.remove(0));
for (String sub : split) {
// the next few hex before escapeSequenceEnd contain the escaped character
int endIndex = sub.indexOf(escapeSequenceEnd);
String hex = sub.substring(0, endIndex);
String rest = sub.substring(endIndex + escapeSequenceEnd.length());
int resultCodepoint;
try {
resultCodepoint = (int) Long.parseUnsignedLong(hex, 16);
} catch (NumberFormatException e) {
throw new RuntimeException("Invalid escaped hex codepoint in input string: " + e);
}
resultBuilder.appendCodePoint(resultCodepoint);
resultBuilder.append(rest);
}
return resultBuilder.toString();
}
/**
* Takes in any Unicode input and wraps it into an Spdx LicenseRef.
* Characters are escaped in a custom format ({@link LicenseStringUtils#escapeForLicenseRef(String)}) to fit
* Spdx charset requirements.
* For consistency's sake, consider using the create*LicenseRef variants for methods that use preset namespaces.
*
* @param licenseName the name to use for creation of the LicenseRef
* @return Returns a LicenseRef with the input name
* @see #unwrapLicenseRef
*/
protected static String createLicenseRef(String namespaceId, String licenseName) {
if (!supportedNamespacesSet.contains(namespaceId)) {
LOG.warn("Unsupported namespace [{}].", namespaceId);
}
String escapedNamespaceId = escapeForLicenseRef(namespaceId);
if (!namespaceId.equals(escapedNamespaceId)) {
LOG.warn("The namespaceId [{}] will be escaped to [{}]. Namespaces shouldn't need escaping!",
namespaceId, escapedNamespaceId);
}
return "LicenseRef-" + escapedNamespaceId + "-" + escapeForLicenseRef(licenseName);
}
public static String createAeLicenseRef(String licenseName) {
return createLicenseRef("ae", licenseName);
}
public static String createScancodeLicenseRef(String licenseName) {
return createLicenseRef("scancode", licenseName);
}
public static String createUnknownLicenseRef(String licenseName) {
return createLicenseRef("unknown", licenseName);
}
public static String createSpdxLicenseRef(String licenseName) {
return createLicenseRef("spdx", licenseName);
}
/**
* Unwraps and unescapes a LicenseRef generated by {@link #createLicenseRef(String, String)}.
* This means removal of the "LicenseRef-namespaceId-" prefix and unescaping the suffix.
*
* @param licenseRef Spdx LicenseRef- in the format of {@link #createLicenseRef(String, String)}
* @return Returns the original license name that was used when generating the ref
* @see #createLicenseRef
*/
public static String unwrapLicenseRef(String licenseRef) {
// it might be faster to split off the "LicenseRef-" first but this might just be fast enough
for (String prefix : supportedLicenseRefPrefixes) {
if (licenseRef.startsWith(prefix)) {
return unescapeFromLicenseRef(licenseRef.substring(prefix.length()));
}
}
throw new IllegalArgumentException("Argument should be a license ref.");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy