All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.artifact.terms.model.NormalizationMetaData Maven / Gradle / Ivy

There is a newer version: 0.132.0
Show newest version
/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.artifact.terms.model;

import com.metaeffekt.artifact.analysis.preprocess.filter.wordlist.WordlistGenerator;
import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.artifact.analysis.utils.StringStats;
import com.metaeffekt.artifact.analysis.utils.StringUtils;
import org.apache.tools.ant.DirectoryScanner;
import org.metaeffekt.core.inventory.processor.model.Inventory;
import org.metaeffekt.core.inventory.processor.model.LicenseData;
import org.metaeffekt.core.inventory.processor.reader.InventoryReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.Yaml;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/**
 * Created by kklein on 16/01/2017.
 */
public class NormalizationMetaData implements Serializable {

    private static final long serialVersionUID = -1;

    private static final Logger LOG = LoggerFactory.getLogger(NormalizationMetaData.class);

    public static final String STRING_WHITESPACE = " ";
    public static final String PREFIX_LICENSE_REF = "LicenseRef-";
    public static final String ORDER_DEFAULT = "999";

    private final Map licenseNameMap = new HashMap<>();
    private final Map categoryNameMap = new HashMap<>();
    private final Map licenseMetaDataMap = new HashMap<>();
    private transient Map shortNameMap = new HashMap<>();
    private transient Map spdxIdentifierMap = new HashMap<>();

    // these transient maps are implicitly recreated
    private transient Map> mappingMap;
    private transient Map historicalCanonicalNameMap = null;
    private transient Set normalizedMasks = null;

    /**
     * Dummy NormalizationMetaData instance to support processes without access to a terms database.
     */
    public static final NormalizationMetaData EMPTY_NORMALIZATION_METADATA = new NormalizationMetaData();

    /**
     * A list of "words" generated using text metadata and license texts.
     */
    private List wordlist = new ArrayList<>();

    public NormalizationMetaData(File baseDir) {
        LOG.info("Reading Terms Database [" + getCanonicalPath(baseDir) + "]...");

        readFromFileSystem(baseDir);
        processExternalMetaData(baseDir);
        preprocess();

        // NOTE:
        //  We do not initialize the wordlist as this point. Currently
        //  the wordlist is only initialized for building the encrypted
        //  TMD packages.

        LOG.info("Reading Terms Database [" + getCanonicalPath(baseDir) + "] completed.");
    }

    /**
     * Private default constructor used for construction of EMPTY_NORMALIZATION_METADATA.
     */
    private NormalizationMetaData() {
    }

    private String getCanonicalPath(File baseDir) {
        try {
            return baseDir.getCanonicalFile().getAbsolutePath();
        } catch (IOException e) {
            return baseDir.getAbsolutePath();
        }
    }

    /**
     * Generates and sets the wordlist in this NormalizationMetaData object.
* Note that this needs a fresh NormalizationMetaData object, read from an existing source repo on disk. */ public void generateAndSetWordlist() { // need to prefilter licenseMetaDataMap to remove customer metadata which should never be included final Map filteredTermsMetaDataMap = new HashMap<>(getLicenseMetaDataMap()); for (Map.Entry e : getLicenseMetaDataMap().entrySet()) { if (e.getValue().isCustomerMetaData()) { filteredTermsMetaDataMap.remove(e.getKey()); } } try { // pass the filtered licenseMetaDataMap to avoid inclusion of customer licenses this.wordlist = WordlistGenerator.createWordlist(filteredTermsMetaDataMap); } catch (IOException e) { throw new IllegalStateException("Failed to generate wordlist for termsMetaData."); } } private void processExternalMetaData(File baseDir) { final File openCodeConfigDir = new File(baseDir, "_external/opencode"); if (openCodeConfigDir.exists()) { // load official OpenCoDE approved list processOpenCoDEApprovedInputFile(new File(openCodeConfigDir, "approved-licenses.txt"), false); // load unofficial similarity based approvals processOpenCoDEApprovedInputFile(new File(openCodeConfigDir, "approved-licenses_pwc.txt"), true); // load unofficial similarity based approvals processOpenCoDEApprovedInputFile(new File(openCodeConfigDir, "approved-licenses_ae.txt"), true); // load not approved license list processOpenCoDENotApprovedInputFile(new File(openCodeConfigDir, "not-approved-licenses.txt")); } // apply postprocessing inheriting open code status along baseTerms relationship for (TermsMetaData tmd : licenseMetaDataMap.values()) { final String baseTerms = tmd.getBaseTerms(); if (baseTerms != null) { if (tmd.getOpenCoDEStatus() == null) { final TermsMetaData representedAsTmd = findTermsMetaData(baseTerms); if (representedAsTmd != null) { tmd.setOpenCoDEStatus(representedAsTmd.getOpenCoDEStatus()); } } } } final File osiConfigDir = new File(baseDir, "_external/osi"); if (osiConfigDir.exists()) { final File osiLicenseStatus = new File(osiConfigDir, "osi-license-status.xls"); if (osiLicenseStatus.exists()) { try { processOsiLicenseStatus(osiLicenseStatus); } catch (IOException exception) { LOG.error("Cannot parse OSI license status information.", exception); } } else { LOG.warn("Cannot parse OSI license status information. File [{}] does not exist.", osiLicenseStatus); } } } private void processOpenCoDENotApprovedInputFile(File openCodeNotApprovedLicenses) { if (openCodeNotApprovedLicenses.exists()) { try { processOpenCoDENotApproved(openCodeNotApprovedLicenses); } catch (IOException exception) { LOG.error("Cannot parse Open CoDE not approved licenses.", exception); } } else { LOG.warn("Cannot parse Open CoDE not approved licenses. File [{}] does not exist.", openCodeNotApprovedLicenses); } } private void processOpenCoDEApprovedInputFile(File openCodeApprovedLicenses, boolean similarLicenseMapping) { if (openCodeApprovedLicenses.exists()) { try { processOpenCoDEApproved(openCodeApprovedLicenses, similarLicenseMapping); } catch (IOException exception) { LOG.error("Cannot parse Open CoDE approved licenses.", exception); } } else { LOG.warn("Cannot parse Open CoDE approved licenses. File [{}] does not exist.", openCodeApprovedLicenses); } } private void processOsiLicenseStatus(File osiLicenseStatus) throws IOException { Inventory inventory = new InventoryReader().readInventory(osiLicenseStatus); for (LicenseData licenseData : inventory.getLicenseData()) { final String canonicalName = licenseData.get(LicenseData.Attribute.CANONICAL_NAME); final TermsMetaData termsMetaData = getTermsMetaData(canonicalName); if (termsMetaData == null) { if (!"x".equalsIgnoreCase(licenseData.get("License Missing"))) { LOG.warn("Cannot find TermsMetaData for [" + canonicalName + "]."); } continue; } final String osiLicenseId = licenseData.get("OSI License Id"); final String osiSupersededBy = licenseData.get("OSI Superseded-by Id"); final String osiStatus = licenseData.get("OSI Status"); final String osiCategory = licenseData.get("OSI Category"); final String osiRationale = licenseData.get("OSI Rationale"); // validate license ids are in sync final String tmdOsiId = termsMetaData.getOtherId("osi"); if (StringUtils.notEmpty(tmdOsiId)) { if (!tmdOsiId.equals(osiLicenseId)) { LOG.warn("OSI license ids inconsistent for [" + canonicalName + "]."); } } // approved license must have an OSI id. if ("approved".equalsIgnoreCase(osiStatus) && StringUtils.isEmpty(tmdOsiId)) { LOG.warn("Approved OSI licenses must carry an OSI id. Check [" + canonicalName + "]."); } termsMetaData.setOsiStatus(osiStatus); termsMetaData.setOsiRationale(osiRationale); termsMetaData.setOsiCategory(osiCategory); termsMetaData.setOsiSupersededBy(osiSupersededBy); } } private void processOpenCoDEApproved(File openCodeApprovedLicenses, boolean similarLicenseMapping) throws IOException { final List lines = FileUtils.readLines(openCodeApprovedLicenses, FileUtils.ENCODING_UTF_8); for (String line : lines) { if (line.startsWith("#")) continue; final String[] split = line.split("\t"); if (!similarLicenseMapping) { if (split.length == 4) { // the license name may not always be provided final String licenseName = split[0]; // the short id is either an SPDX license identifier or a LicenseRef-- combination final String licenseShortId = split[1]; final TermsMetaData termsMetaData = findByOpenCodeLicenseId(licenseShortId); if (termsMetaData != null) { termsMetaData.setOpenCoDEStatus(TermsMetaData.STATUS_APPROVED); } else { LOG.warn("No terms metadata for Open CoDE license ['{}' ({})].", licenseName, licenseShortId); } } else { LOG.error("Cannot parse Open CoDE license data: " + line); } } else { if (split.length == 2) { // expecting Canonical Name and Similar License (SPDX or ScanCode) // the license name may not always be provided final String licenseName = split[0]; // the short id is either an SPDX license identifier or a LicenseRef-- combination final String licenseShortId = split[1]; final TermsMetaData termsMetaData = getTermsMetaData(licenseName); if (termsMetaData != null) { // also try to resolve similar license final TermsMetaData similarTmd = findByOpenCodeLicenseId(licenseShortId); if (similarTmd == null) { LOG.warn("Cannot find similar license [{}].", licenseShortId); } if (!StringUtils.hasText(termsMetaData.getOpenCoDEStatus())) { termsMetaData.setOpenCoDEStatus(TermsMetaData.STATUS_APPROVED_IMPLICIT); termsMetaData.setOpenCoDESimilarLicenseId(licenseShortId); if (!similarTmd.isOpenCodeApproved()) { LOG.warn("Inconsistency detected. Using similar license to derive implicit Open CoDE approval for [{}], but similar license [{}] not approved.", licenseName, licenseShortId); } } } else { LOG.warn("No terms metadata for canonical names [{}].", licenseName); } } else { LOG.error("Cannot parse Open CoDE license data: " + line); } } } } private TermsMetaData findByOpenCodeLicenseId(String openCodeLicenseId) { if (openCodeLicenseId.startsWith("LicenseRef-")) { final String otherId = openCodeLicenseId .replace(PREFIX_LICENSE_REF, "") .replaceFirst("-", ":"); return findByOtherId(otherId); } else { // license id does not use SPDX TermsMetaData termsMetaData = this.findBySpdxIdentifier(openCodeLicenseId); // alternatively find by shortname to also support concluded approved list if (termsMetaData == null) { termsMetaData = findByShortName(openCodeLicenseId); } return termsMetaData; } } private TermsMetaData findByOtherId(String otherId) { TermsMetaData tmd = null; for (TermsMetaData termsMetaData : licenseMetaDataMap.values()) { final List otherIds = termsMetaData.getOtherIds(); if (otherIds != null) { if (otherIds.contains(otherId)) { tmd = termsMetaData; break; } } } return tmd; } public void processOpenCoDENotApproved(File openCodeNotApprovedLicenses) throws IOException { final List lines = FileUtils.readLines(openCodeNotApprovedLicenses, FileUtils.ENCODING_UTF_8); for (String line : lines) { if (line.startsWith("#")) continue; final String[] split = line.split("\t"); if (split.length == 4) { final String licenseName = split[0]; final String licenseShortId = split[1]; final TermsMetaData termsMetaData = findByOpenCodeLicenseId(licenseShortId); if (termsMetaData != null) { termsMetaData.setOpenCoDEStatus(TermsMetaData.STATUS_NOT_APPROVED); } else { LOG.warn("No terms metadata for Open CoDE license [{} ({})].", licenseName, licenseShortId); } } else { LOG.error("Cannot parse Open CoDE license data: " + line); } } } private void preprocess() { // check for dependencies between the license meta data instances. for (TermsMetaData licenseMetaData : licenseMetaDataMap.values()) { final Map references = licenseMetaData.getReferences(); if (references != null) { for (String licenseReference : references.keySet()) { for (TermsMetaData candidate : licenseMetaDataMap.values()) { if (candidate.getCategory().equals(licenseReference) || candidate.getCanonicalName().equals(licenseReference)) { Reference reference = references.get(licenseReference); candidate.addReference(reference); } } } } } } private void readFromFileSystem(File baseDir) { final DirectoryScanner scanner = new DirectoryScanner(); scanner.setBasedir(baseDir); scanner.setIncludes(new String[]{"**/license.meta.yaml"}); scanner.setExcludes(new String[]{"**/.meta/**/*"}); scanner.scan(); for (String path : scanner.getIncludedFiles()) { File file = new File(baseDir, path); try { // compatibility String yamlContent = FileUtils.readFileToString(file, FileUtils.ENCODING_UTF_8); Yaml yaml = new Yaml(); TermsMetaData licenseMetaData = yaml.loadAs(yamlContent, TermsMetaData.class); licenseMetaData.setNormalizationMetaData(this); licenseMetaData.setFile(file); licenseMetaData.mergeExternalMetaData(); licenseMetaData.readPartialMatches(); licenseMetaData.readMatchedMarkers(); // scan for license folder File licenseDir = new File(file.getParentFile(), "license"); if (licenseDir.exists()) { String[] licenses = FileUtils.scanDirectoryForFiles(licenseDir, true, "*.*"); if (licenses.length == 1) { licenseMetaData.setLicenseFile(new File(licenseDir, licenses[0]).getAbsolutePath()); } else if (licenses.length > 1) { throw new IllegalStateException(String.format("%s: more that one license file detected." + Arrays.asList(licenses), licenseMetaData.getCanonicalName())); } } // scan for readme folder File readmeDir = new File(file.getParentFile(), "readme"); if (readmeDir.exists()) { String[] files = FileUtils.scanDirectoryForFiles(readmeDir, true, "*.*"); if (files.length == 1) { licenseMetaData.setReadmeFile(new File(readmeDir, files[0]).getAbsolutePath()); } else if (files.length > 1) { throw new IllegalStateException(String.format("%s: more that one readme file supported." + Arrays.asList(files), licenseMetaData.getCanonicalName())); } } if (licenseMetaDataMap.containsKey(licenseMetaData.getCanonicalName())) { throw new IllegalStateException("Canonical name " + licenseMetaData.getCanonicalName() + " already registered." + " Duplicate use detected in " + path); } if (LOG.isTraceEnabled()) { LOG.trace("Registering license meta data [{}].", licenseMetaData.getCanonicalName()); } licenseMetaDataMap.put(licenseMetaData.getCanonicalName(), licenseMetaData); // contribute shortnames to shortname map if (licenseMetaData.getShortName() != null) { shortNameMap.put(licenseMetaData.getShortName(), licenseMetaData); } // contribute alternative names to shortname map; do not replace existing if (licenseMetaData.getAlternativeShortNames() != null) { licenseMetaData.getAlternativeShortNames().stream().forEach(s -> shortNameMap.putIfAbsent(s, licenseMetaData)); } // contribute SPDX identifiers to shortname map; do not replace existing if (licenseMetaData.getSpdxIdentifier() != null) { shortNameMap.putIfAbsent(licenseMetaData.getSpdxIdentifier(), licenseMetaData); spdxIdentifierMap.put(licenseMetaData.getSpdxIdentifier(), licenseMetaData); } List alternativeNames = licenseMetaData.getAlternativeNames(); String canonicalName = licenseMetaData.getCanonicalName(); String category = licenseMetaData.getCategory(); if (canonicalName != null && alternativeNames != null) { canonicalName = canonicalName.trim(); category = category.trim(); // check whether the string is already covered in another metadata set; compared by lower case for (String alternativeName : alternativeNames) { checkAlternativeName(alternativeName); } checkAlternativeName(canonicalName); // integrate the canonical name and alternative names in map for (String alternativeName : alternativeNames) { addAlternativeNameToMap(canonicalName, category, alternativeName); } addAlternativeNameToMap(canonicalName, category, canonicalName); if (licenseMetaData.getSpdxIdentifier() != null) { addAlternativeNameToMap(canonicalName, category, licenseMetaData.getSpdxIdentifier()); } // we consolidate after populating the map licenseMetaData.consolidateAlternativeNames(); } else { throw new IllegalStateException("License without a name."); } } catch (Exception e) { throw new IllegalStateException("Cannot read license " + file.getAbsolutePath(), e); } } } private void addAlternativeNameToMap(String canonicalName, String category, String alternativeName) { String trim = String.valueOf(alternativeName).trim(); if (org.springframework.util.StringUtils.hasText(trim) && !"[]".equals(trim)) { if (org.springframework.util.StringUtils.hasText(canonicalName) && !"[]".equals(canonicalName)) { licenseNameMap.put(trim, canonicalName); } if (org.springframework.util.StringUtils.hasText(category) && !"[]".equals(category)) { categoryNameMap.put(trim, category); } } } private void checkAlternativeName(String alternativeName) { String trim = String.valueOf(alternativeName).trim().toLowerCase(); if (licenseNameMap.containsKey(trim)) { LOG.warn("License '" + alternativeName + "' already covered."); } } public List analyze(String licenseText) { final StringStats licenseTextStats = StringStats.normalize(licenseText, false); return analyze(licenseTextStats, true, true); } public List analyze(StringStats licenseTextStats, boolean enableCombine, boolean enableIgnore) { final ScanResultPart scanResultPart = doAnalyze(licenseTextStats, enableCombine, enableIgnore); return scanResultPart.getMatchedTerms(); } /** * Analyze the given license (text stats) without processing. Callee takes responsibility. * * @param licenseTextStats The license text stats to analyze. * @return ScanResultPart instance. */ public ScanResultPart doAnalyze(StringStats licenseTextStats) { final ScanResultPart scanResult = new ScanResultPart(); for (TermsMetaData licenseMetaData : licenseMetaDataMap.values()) { scanResult.merge(licenseMetaData.analyze(licenseTextStats)); } return scanResult; } public ScanResultPart doAnalyze(StringStats licenseTextStats, boolean enableCombine, boolean enableIgnore) { final ScanResultPart scanResultPart = new ScanResultPart(); // process the TMDs in parallel licenseMetaDataMap.values().stream() // .parallel() .map(tmd -> tmd.analyze(licenseTextStats)) .forEach(r -> scanResultPart.merge(r)); // consolidate result scanResultPart.process(this, enableIgnore, enableCombine); return scanResultPart; } public Map getCategoryNameMap() { return categoryNameMap; } public Map getLicenseNameMap() { return licenseNameMap; } public TermsMetaData getTermsMetaData(String canonicalName) { return licenseMetaDataMap.get(canonicalName); } public TermsMetaData findTermsMetaData(String name) { // check canonical name match first TermsMetaData termsMetaData = getTermsMetaData(name); if (termsMetaData != null) { return termsMetaData; } // alternatively use the name mapping String mappedName = licenseNameMap.get(name); if (mappedName == null) mappedName = name; return getTermsMetaData(mappedName); } public Map getLicenseMetaDataMap() { return licenseMetaDataMap; } public void applyMasks(StringStats licenseTextStats) { String normalizedLicenseString = licenseTextStats.getNormalizedString(); for (final String normalizedMask : getNormalizedMasks()) { int normalizedStringLength; do { normalizedStringLength = normalizedLicenseString.length(); normalizedLicenseString = normalizedLicenseString.replace(normalizedMask, STRING_WHITESPACE); } while (normalizedStringLength != normalizedLicenseString.length()); } licenseTextStats.update(normalizedLicenseString); } public Map compilePatternList(TermsMetaData termsMetaData) { final Map patterns = new LinkedHashMap<>(); for (final Map.Entry mappings : termsMetaData.getMappings().entrySet()) { patterns.put(Pattern.compile(mappings.getKey()), mappings.getValue()); } return patterns; } public String applyMappings(String text) { // if not existing yet, build the mappingMap if (this.mappingMap == null) { final Map> mappingMap = new TreeMap<>(); for (final TermsMetaData termsMetaData : getLicenseMetaDataMap().values()) { if (termsMetaData.getMappings() != null) { final List mappingOrder = termsMetaData.getMappingOrder(); if (mappingOrder != null) { for (String order : mappingOrder) { // compile patterns; expect several on the same level final Map compiledPatternList = compilePatternList(termsMetaData); mappingMap.computeIfAbsent(order, a -> new LinkedHashMap<>()).putAll(compiledPatternList); } } else { final Map compiledPatternList = compilePatternList(termsMetaData); mappingMap.computeIfAbsent(ORDER_DEFAULT, a -> new LinkedHashMap<>()).putAll(compiledPatternList); } } } this.mappingMap = mappingMap; } if (LOG.isDebugEnabled()) { LOG.debug("Mapping: {}", text); } for (final Map.Entry> entry : mappingMap.entrySet()) { for (final Map.Entry patternEntry : entry.getValue().entrySet()) { if (LOG.isDebugEnabled()) { final String previousText = text; text = replace(text, patternEntry); if (!text.equals(previousText)) { LOG.debug(" Effective mapping: "); LOG.debug(" pattern: " + patternEntry.getKey().pattern()); LOG.debug(" replacement: " + patternEntry.getValue()); LOG.debug(text); } } else { text = replace(text, patternEntry); } } } return text; } private static String replace(String text, Map.Entry patternEntry) { final Matcher matcher = patternEntry.getKey().matcher(text); text = matcher.replaceAll(patternEntry.getValue()); return text; } public List convert(final List canonicalTermNames) { final List termsList = new ArrayList<>(); if (canonicalTermNames != null) { for (final String canonicalName : canonicalTermNames) { TermsMetaData termsMetaData = getTermsMetaData(canonicalName); if (termsMetaData == null) { termsMetaData = new TermsMetaData(); termsMetaData.setCanonicalName(canonicalName); // unknown (not configured terms) require to be explained. Otherwise, a TermsMetaData object // should be configured termsMetaData.setRequiresAnnexNotice(true); } termsList.add(termsMetaData); } } return termsList; } public void remove(TermsMetaData tmd) { LOG.info("Removing term metadata: " + tmd.getCanonicalName()); // remove all names mapping to the tmd canonical name for (Map.Entry entry : new HashSet<>(licenseNameMap.entrySet())) { if (tmd.getCanonicalName().equals(entry.getValue())) { LOG.info("Removing alternative name: " + entry.getKey()); licenseNameMap.remove(entry.getKey()); } } // remove mapping in category map for (Map.Entry entry : new HashSet<>(categoryNameMap.entrySet())) { if (tmd.getCategory().equals(entry.getValue())) { LOG.info(" Removing category: " + tmd.getCategory()); categoryNameMap.remove(entry.getKey()); } } // remove mapping license metadata map licenseMetaDataMap.remove(tmd.getCanonicalName()); for (TermsMetaData otherTmd : licenseMetaDataMap.values()) { final List partialMatches = otherTmd.getPartialMatches(); if (partialMatches != null) { final boolean remove = partialMatches.remove(tmd.getCanonicalName()); if (remove) { LOG.info(" Removing partial match registered for: " + tmd.getCanonicalName()); } } final List excludedMatches = otherTmd.getExcludedMatches(); if (excludedMatches != null) { final boolean remove = excludedMatches.remove(tmd.getCanonicalName()); if (remove) { LOG.info(" Removing excluded match registered for: " + tmd.getCanonicalName()); } } } } public TermsMetaData findByShortName(String licenseId) { return shortNameMap.get(licenseId); } public TermsMetaData findBySpdxIdentifier(String licenseId) { return spdxIdentifierMap.get(licenseId); } public TermsMetaData findUsingCanonicalNameInHistory(String license) { String canonicalName = getUpdatedCanonicalName(license); if (license.equals(canonicalName)) return null; return getTermsMetaData(canonicalName); } public String getUpdatedCanonicalName(String license) { String canonicalName = findCanonicalNameInHistory(license); if (canonicalName.equals(license)) return canonicalName; String firstFinding = canonicalName; String licenseCheck; int count = 0; do { licenseCheck = canonicalName; canonicalName = findCanonicalNameInHistory(canonicalName); count++; } while (!licenseCheck.equals(canonicalName) && !canonicalName.equals(firstFinding)); if (canonicalName.equals(firstFinding) && count > 1) { throw new IllegalStateException("Circular reference detected while processing " + license + "."); } return canonicalName; } private String findCanonicalNameInHistory(String license) { // FIXME: needs optimization for (TermsMetaData tmd : getLicenseMetaDataMap().values()) { if (tmd != null) { if (tmd.getCanonicalNameHistory() != null) { for (String historicalName : tmd.getCanonicalNameHistory()) { if (historicalName.equals(license)) return tmd.getCanonicalName(); } } } } return license; } public synchronized Map getHistoricalCanonicalNameMap() { if (historicalCanonicalNameMap == null) { historicalCanonicalNameMap = new HashMap<>(); for (TermsMetaData tmd : getLicenseMetaDataMap().values()) { if (tmd.getCanonicalNameHistory() != null) { for (String s : tmd.getCanonicalNameHistory()) { historicalCanonicalNameMap.put(s, getUpdatedCanonicalName(s)); } } } } return historicalCanonicalNameMap; } private synchronized Set getNormalizedMasks() { if (normalizedMasks == null) { normalizedMasks = new LinkedHashSet<>(); for (final TermsMetaData lmd : licenseMetaDataMap.values()) { if (lmd.getMasks() != null) { for (final String mask : lmd.getMasks().getMatches()) { if (mask.isEmpty()) throw new IllegalStateException("Mask matches must be longer than 1 character."); // NOTE: currently masks are only applied on mapped/normalized strings. // Therefore, only the mapped versions are required to be processed // normalize mask (not applying mappings anymore; mappings are applied only to content) final StringStats normalizedMask = StringStats.normalize(mask, false); normalizedMasks.add(normalizedMask.getNormalizedString()); } } } } return normalizedMasks; } public Collection getWordlist() { return wordlist; } /** * Resolves the {@link TermsMetaData} instance for the given license. Used implicitly the canonicalNameHistory to * anticipate license renames and applies general license name transformations. * * @param canonicalName The canonicalName of the license to resolve. * * @return The resolved {@link TermsMetaData} instance or null in case the name could not be resolved. */ public TermsMetaData resolveTermsMetaData(String canonicalName) { // resolve with original name TermsMetaData termsMetaData = resolveTermsMetaDataCurrentAndHistory(canonicalName); // apply additional license transforms to resolve license with modulated name if (termsMetaData == null) { final String modulatedLicense = canonicalName.replace(" (or any later version)", ""); termsMetaData = resolveTermsMetaDataCurrentAndHistory(modulatedLicense); } return termsMetaData; } /** * Resolves the {@link TermsMetaData} instance for the given license. Used implicitly the canonicalNameHistory to * anticipate license renames, but does not apply general license name transformations. * * @param canonicalName The canonicalName of the license to resolve. * * @return The resolved {@link TermsMetaData} instance or null in case the name could not be resolved. */ public TermsMetaData resolveTermsMetaDataCurrentAndHistory(String canonicalName) { TermsMetaData termsMetaData = findTermsMetaData(canonicalName); if (termsMetaData == null) { termsMetaData = findUsingCanonicalNameInHistory(canonicalName); } return termsMetaData; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy