com.metaeffekt.artifact.terms.model.NormalizationMetaData Maven / Gradle / Ivy
/*
* Copyright 2021-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.metaeffekt.artifact.terms.model;
import com.metaeffekt.artifact.analysis.preprocess.filter.wordlist.WordlistGenerator;
import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.artifact.analysis.utils.StringStats;
import com.metaeffekt.artifact.analysis.utils.StringUtils;
import org.apache.tools.ant.DirectoryScanner;
import org.metaeffekt.core.inventory.processor.model.Inventory;
import org.metaeffekt.core.inventory.processor.model.LicenseData;
import org.metaeffekt.core.inventory.processor.reader.InventoryReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.Yaml;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* Created by kklein on 16/01/2017.
*/
public class NormalizationMetaData implements Serializable {
private static final long serialVersionUID = -1;
private static final Logger LOG = LoggerFactory.getLogger(NormalizationMetaData.class);
public static final String STRING_WHITESPACE = " ";
public static final String PREFIX_LICENSE_REF = "LicenseRef-";
public static final String ORDER_DEFAULT = "999";
private final Map licenseNameMap = new HashMap<>();
private final Map categoryNameMap = new HashMap<>();
private final Map licenseMetaDataMap = new HashMap<>();
private transient Map shortNameMap = new HashMap<>();
private transient Map spdxIdentifierMap = new HashMap<>();
// these transient maps are implicitly recreated
private transient Map> mappingMap;
private transient Map historicalCanonicalNameMap = null;
private transient Set normalizedMasks = null;
/**
* Dummy NormalizationMetaData instance to support processes without access to a terms database.
*/
public static final NormalizationMetaData EMPTY_NORMALIZATION_METADATA = new NormalizationMetaData();
/**
* A list of "words" generated using text metadata and license texts.
*/
private List wordlist = new ArrayList<>();
public NormalizationMetaData(File baseDir) {
LOG.info("Reading Terms Database [" + getCanonicalPath(baseDir) + "]...");
readFromFileSystem(baseDir);
processExternalMetaData(baseDir);
preprocess();
// NOTE:
// We do not initialize the wordlist as this point. Currently
// the wordlist is only initialized for building the encrypted
// TMD packages.
LOG.info("Reading Terms Database [" + getCanonicalPath(baseDir) + "] completed.");
}
/**
* Private default constructor used for construction of EMPTY_NORMALIZATION_METADATA.
*/
private NormalizationMetaData() {
}
private String getCanonicalPath(File baseDir) {
try {
return baseDir.getCanonicalFile().getAbsolutePath();
} catch (IOException e) {
return baseDir.getAbsolutePath();
}
}
/**
* Generates and sets the wordlist in this NormalizationMetaData object.
* Note that this needs a fresh NormalizationMetaData object, read from an existing source repo on disk.
*/
public void generateAndSetWordlist() {
// need to prefilter licenseMetaDataMap to remove customer metadata which should never be included
final Map filteredTermsMetaDataMap = new HashMap<>(getLicenseMetaDataMap());
for (Map.Entry e : getLicenseMetaDataMap().entrySet()) {
if (e.getValue().isCustomerMetaData()) {
filteredTermsMetaDataMap.remove(e.getKey());
}
}
try {
// pass the filtered licenseMetaDataMap to avoid inclusion of customer licenses
this.wordlist = WordlistGenerator.createWordlist(filteredTermsMetaDataMap);
} catch (IOException e) {
throw new IllegalStateException("Failed to generate wordlist for termsMetaData.");
}
}
private void processExternalMetaData(File baseDir) {
final File openCodeConfigDir = new File(baseDir, "_external/opencode");
if (openCodeConfigDir.exists()) {
// load official OpenCoDE approved list
processOpenCoDEApprovedInputFile(new File(openCodeConfigDir, "approved-licenses.txt"), false);
// load unofficial similarity based approvals
processOpenCoDEApprovedInputFile(new File(openCodeConfigDir, "approved-licenses_pwc.txt"), true);
// load unofficial similarity based approvals
processOpenCoDEApprovedInputFile(new File(openCodeConfigDir, "approved-licenses_ae.txt"), true);
// load not approved license list
processOpenCoDENotApprovedInputFile(new File(openCodeConfigDir, "not-approved-licenses.txt"));
}
// apply postprocessing inheriting open code status along baseTerms relationship
for (TermsMetaData tmd : licenseMetaDataMap.values()) {
final String baseTerms = tmd.getBaseTerms();
if (baseTerms != null) {
if (tmd.getOpenCoDEStatus() == null) {
final TermsMetaData representedAsTmd = findTermsMetaData(baseTerms);
if (representedAsTmd != null) {
tmd.setOpenCoDEStatus(representedAsTmd.getOpenCoDEStatus());
}
}
}
}
final File osiConfigDir = new File(baseDir, "_external/osi");
if (osiConfigDir.exists()) {
final File osiLicenseStatus = new File(osiConfigDir, "osi-license-status.xls");
if (osiLicenseStatus.exists()) {
try {
processOsiLicenseStatus(osiLicenseStatus);
} catch (IOException exception) {
LOG.error("Cannot parse OSI license status information.", exception);
}
} else {
LOG.warn("Cannot parse OSI license status information. File [{}] does not exist.", osiLicenseStatus);
}
}
}
private void processOpenCoDENotApprovedInputFile(File openCodeNotApprovedLicenses) {
if (openCodeNotApprovedLicenses.exists()) {
try {
processOpenCoDENotApproved(openCodeNotApprovedLicenses);
} catch (IOException exception) {
LOG.error("Cannot parse Open CoDE not approved licenses.", exception);
}
} else {
LOG.warn("Cannot parse Open CoDE not approved licenses. File [{}] does not exist.", openCodeNotApprovedLicenses);
}
}
private void processOpenCoDEApprovedInputFile(File openCodeApprovedLicenses, boolean similarLicenseMapping) {
if (openCodeApprovedLicenses.exists()) {
try {
processOpenCoDEApproved(openCodeApprovedLicenses, similarLicenseMapping);
} catch (IOException exception) {
LOG.error("Cannot parse Open CoDE approved licenses.", exception);
}
} else {
LOG.warn("Cannot parse Open CoDE approved licenses. File [{}] does not exist.", openCodeApprovedLicenses);
}
}
private void processOsiLicenseStatus(File osiLicenseStatus) throws IOException {
Inventory inventory = new InventoryReader().readInventory(osiLicenseStatus);
for (LicenseData licenseData : inventory.getLicenseData()) {
final String canonicalName = licenseData.get(LicenseData.Attribute.CANONICAL_NAME);
final TermsMetaData termsMetaData = getTermsMetaData(canonicalName);
if (termsMetaData == null) {
if (!"x".equalsIgnoreCase(licenseData.get("License Missing"))) {
LOG.warn("Cannot find TermsMetaData for [" + canonicalName + "].");
}
continue;
}
final String osiLicenseId = licenseData.get("OSI License Id");
final String osiSupersededBy = licenseData.get("OSI Superseded-by Id");
final String osiStatus = licenseData.get("OSI Status");
final String osiCategory = licenseData.get("OSI Category");
final String osiRationale = licenseData.get("OSI Rationale");
// validate license ids are in sync
final String tmdOsiId = termsMetaData.getOtherId("osi");
if (StringUtils.notEmpty(tmdOsiId)) {
if (!tmdOsiId.equals(osiLicenseId)) {
LOG.warn("OSI license ids inconsistent for [" + canonicalName + "].");
}
}
// approved license must have an OSI id.
if ("approved".equalsIgnoreCase(osiStatus) && StringUtils.isEmpty(tmdOsiId)) {
LOG.warn("Approved OSI licenses must carry an OSI id. Check [" + canonicalName + "].");
}
termsMetaData.setOsiStatus(osiStatus);
termsMetaData.setOsiRationale(osiRationale);
termsMetaData.setOsiCategory(osiCategory);
termsMetaData.setOsiSupersededBy(osiSupersededBy);
}
}
private void processOpenCoDEApproved(File openCodeApprovedLicenses, boolean similarLicenseMapping) throws IOException {
final List lines = FileUtils.readLines(openCodeApprovedLicenses, FileUtils.ENCODING_UTF_8);
for (String line : lines) {
if (line.startsWith("#")) continue;
final String[] split = line.split("\t");
if (!similarLicenseMapping) {
if (split.length == 4) {
// the license name may not always be provided
final String licenseName = split[0];
// the short id is either an SPDX license identifier or a LicenseRef-- combination
final String licenseShortId = split[1];
final TermsMetaData termsMetaData = findByOpenCodeLicenseId(licenseShortId);
if (termsMetaData != null) {
termsMetaData.setOpenCoDEStatus(TermsMetaData.STATUS_APPROVED);
} else {
LOG.warn("No terms metadata for Open CoDE license ['{}' ({})].", licenseName, licenseShortId);
}
} else {
LOG.error("Cannot parse Open CoDE license data: " + line);
}
} else {
if (split.length == 2) {
// expecting Canonical Name and Similar License (SPDX or ScanCode)
// the license name may not always be provided
final String licenseName = split[0];
// the short id is either an SPDX license identifier or a LicenseRef-- combination
final String licenseShortId = split[1];
final TermsMetaData termsMetaData = getTermsMetaData(licenseName);
if (termsMetaData != null) {
// also try to resolve similar license
final TermsMetaData similarTmd = findByOpenCodeLicenseId(licenseShortId);
if (similarTmd == null) {
LOG.warn("Cannot find similar license [{}].", licenseShortId);
}
if (!StringUtils.hasText(termsMetaData.getOpenCoDEStatus())) {
termsMetaData.setOpenCoDEStatus(TermsMetaData.STATUS_APPROVED_IMPLICIT);
termsMetaData.setOpenCoDESimilarLicenseId(licenseShortId);
if (!similarTmd.isOpenCodeApproved()) {
LOG.warn("Inconsistency detected. Using similar license to derive implicit Open CoDE approval for [{}], but similar license [{}] not approved.", licenseName, licenseShortId);
}
}
} else {
LOG.warn("No terms metadata for canonical names [{}].", licenseName);
}
} else {
LOG.error("Cannot parse Open CoDE license data: " + line);
}
}
}
}
private TermsMetaData findByOpenCodeLicenseId(String openCodeLicenseId) {
if (openCodeLicenseId.startsWith("LicenseRef-")) {
final String otherId = openCodeLicenseId
.replace(PREFIX_LICENSE_REF, "")
.replaceFirst("-", ":");
return findByOtherId(otherId);
} else {
// license id does not use SPDX
TermsMetaData termsMetaData = this.findBySpdxIdentifier(openCodeLicenseId);
// alternatively find by shortname to also support concluded approved list
if (termsMetaData == null) {
termsMetaData = findByShortName(openCodeLicenseId);
}
return termsMetaData;
}
}
private TermsMetaData findByOtherId(String otherId) {
TermsMetaData tmd = null;
for (TermsMetaData termsMetaData : licenseMetaDataMap.values()) {
final List otherIds = termsMetaData.getOtherIds();
if (otherIds != null) {
if (otherIds.contains(otherId)) {
tmd = termsMetaData;
break;
}
}
}
return tmd;
}
public void processOpenCoDENotApproved(File openCodeNotApprovedLicenses) throws IOException {
final List lines = FileUtils.readLines(openCodeNotApprovedLicenses, FileUtils.ENCODING_UTF_8);
for (String line : lines) {
if (line.startsWith("#")) continue;
final String[] split = line.split("\t");
if (split.length == 4) {
final String licenseName = split[0];
final String licenseShortId = split[1];
final TermsMetaData termsMetaData = findByOpenCodeLicenseId(licenseShortId);
if (termsMetaData != null) {
termsMetaData.setOpenCoDEStatus(TermsMetaData.STATUS_NOT_APPROVED);
} else {
LOG.warn("No terms metadata for Open CoDE license [{} ({})].", licenseName, licenseShortId);
}
} else {
LOG.error("Cannot parse Open CoDE license data: " + line);
}
}
}
private void preprocess() {
// check for dependencies between the license meta data instances.
for (TermsMetaData licenseMetaData : licenseMetaDataMap.values()) {
final Map references = licenseMetaData.getReferences();
if (references != null) {
for (String licenseReference : references.keySet()) {
for (TermsMetaData candidate : licenseMetaDataMap.values()) {
if (candidate.getCategory().equals(licenseReference) ||
candidate.getCanonicalName().equals(licenseReference)) {
Reference reference = references.get(licenseReference);
candidate.addReference(reference);
}
}
}
}
}
}
private void readFromFileSystem(File baseDir) {
final DirectoryScanner scanner = new DirectoryScanner();
scanner.setBasedir(baseDir);
scanner.setIncludes(new String[]{"**/license.meta.yaml"});
scanner.setExcludes(new String[]{"**/.meta/**/*"});
scanner.scan();
for (String path : scanner.getIncludedFiles()) {
File file = new File(baseDir, path);
try {
// compatibility
String yamlContent = FileUtils.readFileToString(file, FileUtils.ENCODING_UTF_8);
Yaml yaml = new Yaml();
TermsMetaData licenseMetaData = yaml.loadAs(yamlContent, TermsMetaData.class);
licenseMetaData.setNormalizationMetaData(this);
licenseMetaData.setFile(file);
licenseMetaData.mergeExternalMetaData();
licenseMetaData.readPartialMatches();
licenseMetaData.readMatchedMarkers();
// scan for license folder
File licenseDir = new File(file.getParentFile(), "license");
if (licenseDir.exists()) {
String[] licenses = FileUtils.scanDirectoryForFiles(licenseDir, true, "*.*");
if (licenses.length == 1) {
licenseMetaData.setLicenseFile(new File(licenseDir, licenses[0]).getAbsolutePath());
} else if (licenses.length > 1) {
throw new IllegalStateException(String.format("%s: more that one license file detected." + Arrays.asList(licenses), licenseMetaData.getCanonicalName()));
}
}
// scan for readme folder
File readmeDir = new File(file.getParentFile(), "readme");
if (readmeDir.exists()) {
String[] files = FileUtils.scanDirectoryForFiles(readmeDir, true, "*.*");
if (files.length == 1) {
licenseMetaData.setReadmeFile(new File(readmeDir, files[0]).getAbsolutePath());
} else if (files.length > 1) {
throw new IllegalStateException(String.format("%s: more that one readme file supported." + Arrays.asList(files), licenseMetaData.getCanonicalName()));
}
}
if (licenseMetaDataMap.containsKey(licenseMetaData.getCanonicalName())) {
throw new IllegalStateException("Canonical name " + licenseMetaData.getCanonicalName() + " already registered." +
" Duplicate use detected in " + path);
}
if (LOG.isTraceEnabled()) {
LOG.trace("Registering license meta data [{}].", licenseMetaData.getCanonicalName());
}
licenseMetaDataMap.put(licenseMetaData.getCanonicalName(), licenseMetaData);
// contribute shortnames to shortname map
if (licenseMetaData.getShortName() != null) {
shortNameMap.put(licenseMetaData.getShortName(), licenseMetaData);
}
// contribute alternative names to shortname map; do not replace existing
if (licenseMetaData.getAlternativeShortNames() != null) {
licenseMetaData.getAlternativeShortNames().stream().forEach(s -> shortNameMap.putIfAbsent(s, licenseMetaData));
}
// contribute SPDX identifiers to shortname map; do not replace existing
if (licenseMetaData.getSpdxIdentifier() != null) {
shortNameMap.putIfAbsent(licenseMetaData.getSpdxIdentifier(), licenseMetaData);
spdxIdentifierMap.put(licenseMetaData.getSpdxIdentifier(), licenseMetaData);
}
List alternativeNames = licenseMetaData.getAlternativeNames();
String canonicalName = licenseMetaData.getCanonicalName();
String category = licenseMetaData.getCategory();
if (canonicalName != null && alternativeNames != null) {
canonicalName = canonicalName.trim();
category = category.trim();
// check whether the string is already covered in another metadata set; compared by lower case
for (String alternativeName : alternativeNames) {
checkAlternativeName(alternativeName);
}
checkAlternativeName(canonicalName);
// integrate the canonical name and alternative names in map
for (String alternativeName : alternativeNames) {
addAlternativeNameToMap(canonicalName, category, alternativeName);
}
addAlternativeNameToMap(canonicalName, category, canonicalName);
if (licenseMetaData.getSpdxIdentifier() != null) {
addAlternativeNameToMap(canonicalName, category, licenseMetaData.getSpdxIdentifier());
}
// we consolidate after populating the map
licenseMetaData.consolidateAlternativeNames();
} else {
throw new IllegalStateException("License without a name.");
}
} catch (Exception e) {
throw new IllegalStateException("Cannot read license " + file.getAbsolutePath(), e);
}
}
}
private void addAlternativeNameToMap(String canonicalName, String category, String alternativeName) {
String trim = String.valueOf(alternativeName).trim();
if (org.springframework.util.StringUtils.hasText(trim) && !"[]".equals(trim)) {
if (org.springframework.util.StringUtils.hasText(canonicalName) && !"[]".equals(canonicalName)) {
licenseNameMap.put(trim, canonicalName);
}
if (org.springframework.util.StringUtils.hasText(category) && !"[]".equals(category)) {
categoryNameMap.put(trim, category);
}
}
}
private void checkAlternativeName(String alternativeName) {
String trim = String.valueOf(alternativeName).trim().toLowerCase();
if (licenseNameMap.containsKey(trim)) {
LOG.warn("License '" + alternativeName + "' already covered.");
}
}
public List analyze(String licenseText) {
final StringStats licenseTextStats = StringStats.normalize(licenseText, false);
return analyze(licenseTextStats, true, true);
}
public List analyze(StringStats licenseTextStats, boolean enableCombine, boolean enableIgnore) {
final ScanResultPart scanResultPart = doAnalyze(licenseTextStats, enableCombine, enableIgnore);
return scanResultPart.getMatchedTerms();
}
/**
* Analyze the given license (text stats) without processing. Callee takes responsibility.
*
* @param licenseTextStats The license text stats to analyze.
* @return ScanResultPart instance.
*/
public ScanResultPart doAnalyze(StringStats licenseTextStats) {
final ScanResultPart scanResult = new ScanResultPart();
for (TermsMetaData licenseMetaData : licenseMetaDataMap.values()) {
scanResult.merge(licenseMetaData.analyze(licenseTextStats));
}
return scanResult;
}
public ScanResultPart doAnalyze(StringStats licenseTextStats, boolean enableCombine, boolean enableIgnore) {
final ScanResultPart scanResultPart = new ScanResultPart();
// process the TMDs in parallel
licenseMetaDataMap.values().stream()
// .parallel()
.map(tmd -> tmd.analyze(licenseTextStats))
.forEach(r -> scanResultPart.merge(r));
// consolidate result
scanResultPart.process(this, enableIgnore, enableCombine);
return scanResultPart;
}
public Map getCategoryNameMap() {
return categoryNameMap;
}
public Map getLicenseNameMap() {
return licenseNameMap;
}
public TermsMetaData getTermsMetaData(String canonicalName) {
return licenseMetaDataMap.get(canonicalName);
}
public TermsMetaData findTermsMetaData(String name) {
// check canonical name match first
TermsMetaData termsMetaData = getTermsMetaData(name);
if (termsMetaData != null) {
return termsMetaData;
}
// alternatively use the name mapping
String mappedName = licenseNameMap.get(name);
if (mappedName == null) mappedName = name;
return getTermsMetaData(mappedName);
}
public Map getLicenseMetaDataMap() {
return licenseMetaDataMap;
}
public void applyMasks(StringStats licenseTextStats) {
String normalizedLicenseString = licenseTextStats.getNormalizedString();
for (final String normalizedMask : getNormalizedMasks()) {
int normalizedStringLength;
do {
normalizedStringLength = normalizedLicenseString.length();
normalizedLicenseString = normalizedLicenseString.replace(normalizedMask, STRING_WHITESPACE);
} while (normalizedStringLength != normalizedLicenseString.length());
}
licenseTextStats.update(normalizedLicenseString);
}
public Map compilePatternList(TermsMetaData termsMetaData) {
final Map patterns = new LinkedHashMap<>();
for (final Map.Entry mappings : termsMetaData.getMappings().entrySet()) {
patterns.put(Pattern.compile(mappings.getKey()), mappings.getValue());
}
return patterns;
}
public String applyMappings(String text) {
// if not existing yet, build the mappingMap
if (this.mappingMap == null) {
final Map> mappingMap = new TreeMap<>();
for (final TermsMetaData termsMetaData : getLicenseMetaDataMap().values()) {
if (termsMetaData.getMappings() != null) {
final List mappingOrder = termsMetaData.getMappingOrder();
if (mappingOrder != null) {
for (String order : mappingOrder) {
// compile patterns; expect several on the same level
final Map compiledPatternList = compilePatternList(termsMetaData);
mappingMap.computeIfAbsent(order, a -> new LinkedHashMap<>()).putAll(compiledPatternList);
}
} else {
final Map compiledPatternList = compilePatternList(termsMetaData);
mappingMap.computeIfAbsent(ORDER_DEFAULT, a -> new LinkedHashMap<>()).putAll(compiledPatternList);
}
}
}
this.mappingMap = mappingMap;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Mapping: {}", text);
}
for (final Map.Entry> entry : mappingMap.entrySet()) {
for (final Map.Entry patternEntry : entry.getValue().entrySet()) {
if (LOG.isDebugEnabled()) {
final String previousText = text;
text = replace(text, patternEntry);
if (!text.equals(previousText)) {
LOG.debug(" Effective mapping: ");
LOG.debug(" pattern: " + patternEntry.getKey().pattern());
LOG.debug(" replacement: " + patternEntry.getValue());
LOG.debug(text);
}
} else {
text = replace(text, patternEntry);
}
}
}
return text;
}
private static String replace(String text, Map.Entry patternEntry) {
final Matcher matcher = patternEntry.getKey().matcher(text);
text = matcher.replaceAll(patternEntry.getValue());
return text;
}
public List convert(final List canonicalTermNames) {
final List termsList = new ArrayList<>();
if (canonicalTermNames != null) {
for (final String canonicalName : canonicalTermNames) {
TermsMetaData termsMetaData = getTermsMetaData(canonicalName);
if (termsMetaData == null) {
termsMetaData = new TermsMetaData();
termsMetaData.setCanonicalName(canonicalName);
// unknown (not configured terms) require to be explained. Otherwise, a TermsMetaData object
// should be configured
termsMetaData.setRequiresAnnexNotice(true);
}
termsList.add(termsMetaData);
}
}
return termsList;
}
public void remove(TermsMetaData tmd) {
LOG.info("Removing term metadata: " + tmd.getCanonicalName());
// remove all names mapping to the tmd canonical name
for (Map.Entry entry : new HashSet<>(licenseNameMap.entrySet())) {
if (tmd.getCanonicalName().equals(entry.getValue())) {
LOG.info("Removing alternative name: " + entry.getKey());
licenseNameMap.remove(entry.getKey());
}
}
// remove mapping in category map
for (Map.Entry entry : new HashSet<>(categoryNameMap.entrySet())) {
if (tmd.getCategory().equals(entry.getValue())) {
LOG.info(" Removing category: " + tmd.getCategory());
categoryNameMap.remove(entry.getKey());
}
}
// remove mapping license metadata map
licenseMetaDataMap.remove(tmd.getCanonicalName());
for (TermsMetaData otherTmd : licenseMetaDataMap.values()) {
final List partialMatches = otherTmd.getPartialMatches();
if (partialMatches != null) {
final boolean remove = partialMatches.remove(tmd.getCanonicalName());
if (remove) {
LOG.info(" Removing partial match registered for: " + tmd.getCanonicalName());
}
}
final List excludedMatches = otherTmd.getExcludedMatches();
if (excludedMatches != null) {
final boolean remove = excludedMatches.remove(tmd.getCanonicalName());
if (remove) {
LOG.info(" Removing excluded match registered for: " + tmd.getCanonicalName());
}
}
}
}
public TermsMetaData findByShortName(String licenseId) {
return shortNameMap.get(licenseId);
}
public TermsMetaData findBySpdxIdentifier(String licenseId) {
return spdxIdentifierMap.get(licenseId);
}
public TermsMetaData findUsingCanonicalNameInHistory(String license) {
String canonicalName = getUpdatedCanonicalName(license);
if (license.equals(canonicalName)) return null;
return getTermsMetaData(canonicalName);
}
public String getUpdatedCanonicalName(String license) {
String canonicalName = findCanonicalNameInHistory(license);
if (canonicalName.equals(license)) return canonicalName;
String firstFinding = canonicalName;
String licenseCheck;
int count = 0;
do {
licenseCheck = canonicalName;
canonicalName = findCanonicalNameInHistory(canonicalName);
count++;
} while (!licenseCheck.equals(canonicalName) && !canonicalName.equals(firstFinding));
if (canonicalName.equals(firstFinding) && count > 1) {
throw new IllegalStateException("Circular reference detected while processing " + license + ".");
}
return canonicalName;
}
private String findCanonicalNameInHistory(String license) {
// FIXME: needs optimization
for (TermsMetaData tmd : getLicenseMetaDataMap().values()) {
if (tmd != null) {
if (tmd.getCanonicalNameHistory() != null) {
for (String historicalName : tmd.getCanonicalNameHistory()) {
if (historicalName.equals(license)) return tmd.getCanonicalName();
}
}
}
}
return license;
}
public synchronized Map getHistoricalCanonicalNameMap() {
if (historicalCanonicalNameMap == null) {
historicalCanonicalNameMap = new HashMap<>();
for (TermsMetaData tmd : getLicenseMetaDataMap().values()) {
if (tmd.getCanonicalNameHistory() != null) {
for (String s : tmd.getCanonicalNameHistory()) {
historicalCanonicalNameMap.put(s, getUpdatedCanonicalName(s));
}
}
}
}
return historicalCanonicalNameMap;
}
private synchronized Set getNormalizedMasks() {
if (normalizedMasks == null) {
normalizedMasks = new LinkedHashSet<>();
for (final TermsMetaData lmd : licenseMetaDataMap.values()) {
if (lmd.getMasks() != null) {
for (final String mask : lmd.getMasks().getMatches()) {
if (mask.isEmpty()) throw new IllegalStateException("Mask matches must be longer than 1 character.");
// NOTE: currently masks are only applied on mapped/normalized strings.
// Therefore, only the mapped versions are required to be processed
// normalize mask (not applying mappings anymore; mappings are applied only to content)
final StringStats normalizedMask = StringStats.normalize(mask, false);
normalizedMasks.add(normalizedMask.getNormalizedString());
}
}
}
}
return normalizedMasks;
}
public Collection getWordlist() {
return wordlist;
}
/**
* Resolves the {@link TermsMetaData} instance for the given license. Used implicitly the canonicalNameHistory to
* anticipate license renames and applies general license name transformations.
*
* @param canonicalName The canonicalName of the license to resolve.
*
* @return The resolved {@link TermsMetaData} instance or null
in case the name could not be resolved.
*/
public TermsMetaData resolveTermsMetaData(String canonicalName) {
// resolve with original name
TermsMetaData termsMetaData = resolveTermsMetaDataCurrentAndHistory(canonicalName);
// apply additional license transforms to resolve license with modulated name
if (termsMetaData == null) {
final String modulatedLicense = canonicalName.replace(" (or any later version)", "");
termsMetaData = resolveTermsMetaDataCurrentAndHistory(modulatedLicense);
}
return termsMetaData;
}
/**
* Resolves the {@link TermsMetaData} instance for the given license. Used implicitly the canonicalNameHistory to
* anticipate license renames, but does not apply general license name transformations.
*
* @param canonicalName The canonicalName of the license to resolve.
*
* @return The resolved {@link TermsMetaData} instance or null
in case the name could not be resolved.
*/
public TermsMetaData resolveTermsMetaDataCurrentAndHistory(String canonicalName) {
TermsMetaData termsMetaData = findTermsMetaData(canonicalName);
if (termsMetaData == null) {
termsMetaData = findUsingCanonicalNameInHistory(canonicalName);
}
return termsMetaData;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy