All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.artifact.analysis.metascan.SourceSegmentationSupport Maven / Gradle / Ivy

/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.artifact.analysis.metascan;

import com.metaeffekt.artifact.analysis.model.PropertyProvider;
import com.metaeffekt.artifact.analysis.utils.*;
import com.metaeffekt.artifact.terms.model.NormalizationMetaData;
import org.json.JSONArray;
import org.json.JSONObject;
import org.metaeffekt.core.inventory.processor.model.Artifact;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.*;

public class SourceSegmentationSupport extends AbstractScanSupport {

    public SourceSegmentationSupport(NormalizationMetaData normalizationMetaData, PropertyProvider propertyProvider) {
        super(normalizationMetaData, propertyProvider);
    }

    public boolean runSegmentation(Artifact artifact, File sourceDir) throws IOException {
        final File analysisDir = sourceDir.getParentFile();
        final String sourceFolderName = sourceDir.getName();

        final File targetFolder = new File(analysisDir, sourceFolderName + "-analysis");

        final String filename = artifact.getId().replace("/", "_");
        final File metascanResultJson = new File(targetFolder, filename + "_metascan.json");
        final File scancodeResultJson = new File(targetFolder, filename + "_scancode.json");

        final boolean applySourceSegmentation = getPropertyProvider().
                isProperty("analyze.scan.segmentation.enabled", "true", "false");

        if (!applySourceSegmentation) return false;

        final File resultFile = new File(targetFolder, filename + "_license-segmentation.properties");
        final File resultLogFile = new File(targetFolder, filename + "_license-segmentation.txt");
        final File resultJsonFile = new File(targetFolder, filename + "_license-segmentation.json");

        // apply file path filters
        final String[] filterPatterns = getPropertyProvider().
                getPropertyArray("analyze.scan.segmentation.filter.includes", "-none-", ",");

        // apply file path filters
        final String[] ignorePatterns = getPropertyProvider().
                getPropertyArray("analyze.scan.segmentation.ignore.includes", "-none-", ",");

        validatePatterns(filterPatterns, "analyze.scan.segmentation.filter.includes");
        validatePatterns(ignorePatterns, "analyze.scan.segmentation.ignore.includes");

        long metascanResultJsonTimestamp = metascanResultJson.lastModified();
        long scancodeResultJsonTimestamp = scancodeResultJson.lastModified();
        long resultFileTimestamp = resultFile.lastModified();

        boolean outdatedResult =
                metascanResultJsonTimestamp > resultFileTimestamp ||
                scancodeResultJsonTimestamp > resultFileTimestamp;

        final boolean overwrite = outdatedResult || getPropertyProvider().
                isProperty("analyze.scan.segmentation.overwrite", "true", "false");

        // FIXME: what if scancodeResultJson was created lateron; need to overwrite

        if (!overwrite && resultFile.exists()) {
            Properties p = PropertyUtils.loadProperties(resultFile);
            applyToArtifact(artifact, p);
            return false;
        }

        // exit in case no pre-requisite result files exist
        if (!metascanResultJson.exists()) return false;

        // validate all input folders available
        FileUtils.validateExists(analysisDir);
        FileUtils.validateExists(metascanResultJson);

        // NOTE: scancodeResultJson must be treated as optional

        init(resultLogFile, "License Segmentation");

        final ParseResult parseResult = parseMetascanResult(metascanResultJson, scancodeResultJson);

        // inherit incomplete match from meta scan result
        inheritIncompleteMatchMarkerFromMetaScan(analysisDir, sourceFolderName, parseResult);

        final Map> licenseGroups = parseResult.licenseFileGroups;
        final Set scancodeLicenseExpressions = parseResult.scancodeExpressionList;

        final JSONObject resultJson = new JSONObject();

        logGroups(licenseGroups, "License Overview", false, resultLogFile);
        resultJson.put("license.overview", new JSONObject(licenseGroups));

        logGroups(parseResult.markerFileGroups, "Marker Overview", false, resultLogFile);
        resultJson.put("marker.overview", new JSONObject(parseResult.markerFileGroups));

        // memorize all licenses (to compute the ignored list later)
        final List allLicensesIncludingIgnored = new ArrayList<>(licenseGroups.keySet());

        // apply the ignore filters
        applyFileFilters(licenseGroups, ignorePatterns);

        final List licenses = new ArrayList<>(licenseGroups.keySet());
        applyFileFilters(licenseGroups, filterPatterns);

        final List filteredLicenses = new ArrayList<>(licenseGroups.keySet());
        final List removedLicenses = new ArrayList<>(licenses);
        removedLicenses.removeAll(filteredLicenses);

        final List ignoredLicenses = new ArrayList<>(allLicensesIncludingIgnored);
        ignoredLicenses.removeAll(licenses);

        InventoryUtils.normalize(licenses);
        InventoryUtils.normalize(removedLicenses);

        Collections.sort(filteredLicenses);
        Collections.sort(removedLicenses);

        logLicenses(resultLogFile, "> Licenses <", licenses);
        logLicenses(resultLogFile, "> Filtered Licenses <", filteredLicenses);
        logLicenses(resultLogFile, "> Removed Licenses <", removedLicenses);
        logLicenses(resultLogFile, "> Ignored Licenses <", ignoredLicenses);

        // NOTE-KKL: had some trouble with this code in the workbench, there seems to be something wrong with the
        //   java erasures. The runtime code was not able to bind the String, Collection<> signature anymore. So
        //   looking at the code I choose the more explicit way.
        resultJson.put("licenses", new JSONArray(licenses));
        resultJson.put("licenses.filtered", new JSONArray(filteredLicenses));
        resultJson.put("licenses.removed", new JSONArray(removedLicenses));
        resultJson.put("licenses.ignored", new JSONArray(ignoredLicenses));

        final Properties properties = new Properties();
        licenses.sort(String::compareToIgnoreCase);
        filteredLicenses.sort(String::compareToIgnoreCase);
        removedLicenses.sort(String::compareToIgnoreCase);
        properties.setProperty("licenses", InventoryUtils.joinLicenses(licenses));
        properties.setProperty("licenses.filtered", InventoryUtils.joinLicenses(filteredLicenses));
        properties.setProperty("licenses.removed", InventoryUtils.joinLicenses(removedLicenses));
        properties.setProperty("licenses.ignored", InventoryUtils.joinLicenses(ignoredLicenses));
        properties.setProperty("scancode.license.expressions", InventoryUtils.joinLicenses(scancodeLicenseExpressions));

        // FIXME: transfer incomplete match to marker
        if ("x".equals(artifact.get("Incomplete Match"))) {
            parseResult.markerList.add("Incomplete Match");
        }

        properties.setProperty("markers", InventoryUtils.joinLicenses(parseResult.markerList));
        properties.setProperty("scan.dir", new File(analysisDir, sourceFolderName).getAbsolutePath());

        applyToArtifact(artifact, properties);

        FileUtils.write(resultJsonFile, resultJson.toString(), StandardCharsets.UTF_8);
        PropertyUtils.saveProperties(resultFile, properties);

        return true;
    }

    private void validatePatterns(String[] patterns, String property) {
        for (String pattern : patterns) {
            if (StringUtils.isEmpty(pattern)) {
                throw new IllegalStateException("Pattern [" + property + "] may not include empty strings.");
            }
            if (pattern.contains("//")) {
                throw new IllegalStateException("Pattern [" + property + "] may not include '//'.");
            }
        }
    }

    protected void inheritIncompleteMatchMarkerFromMetaScan(File analysisDir, String sourceFolderName, ParseResult parseResult) {
        final File metaScanPropertiesFile = new File(analysisDir, sourceFolderName + "_license.properties");
        final Properties p = PropertyUtils.loadProperties(metaScanPropertiesFile);
        if (p.getProperty("derived.licenses", "").contains("Incomplete Match")) {
            parseResult.markerList.add("Incomplete Match");
        }
    }

    protected void logLicenses(File resultLogFile, String context, List licenses) throws IOException {
        log(resultLogFile, String.format("%n%s", context));
        for (String license : licenses) {
            log(resultLogFile, String.format("  %s", license));
        }
    }

    private void applyToArtifact(Artifact artifact, Properties p) {
        artifact.set(Constants.KEY_IDENTIFIED_TERMS, p.getProperty("licenses"));
        artifact.set(Constants.KEY_FILTERED_TERMS, p.getProperty("licenses.removed"));
        artifact.set(Constants.KEY_IGNORED_TERMS, p.getProperty("licenses.ignored"));
        artifact.set(Constants.KEY_DERIVED_MARKERS, p.getProperty("markers"));
        artifact.set(Constants.KEY_SCAN_CODE_LICENSE_EXPRESSIONS, p.getProperty("scancode.license.expressions"));

        artifact.set(Constants.KEY_DERIVED_LICENSES, p.getProperty("licenses.filtered"));

        artifact.set("Analysis Path", p.getProperty("scan.dir"));
    }

    protected static class ParseResult {
        /**
         * The file groups map the license name to a list of files the license was found in.
         */
        final Map> licenseFileGroups;

        //TODO: Should we segment this?
        /**
         * The list of markers aggregated by the scan. These are not segmented at this moment.
         */
        final Set markerList;


        /**
         * The file groups map the markers name to a list of files the marker was found in.
         */
        final Map> markerFileGroups;

        /**
         * The list of scancode expressions aggregated by the scan.
         */
        final Set scancodeExpressionList;


        public ParseResult(Map> licenseFileGroups, Map> markerFileGroups, Set scancodeExpressionList, Set markerList) {
            this.licenseFileGroups = licenseFileGroups;
            this.markerFileGroups = markerFileGroups;
            this.markerList = markerList;
            this.scancodeExpressionList = scancodeExpressionList;

        }
    }

    protected ParseResult parseMetascanResult(File metascanResultJson, File scancodeResultJson) throws IOException {
        final Map> licenseGroups;
        final Set scancodeLicenseExpressions = new HashSet<>();

        SegmentationUtils segmentationUtils = new SegmentationUtils();
        licenseGroups = segmentationUtils.getLicenseFileMap(metascanResultJson, true);

        if (scancodeResultJson.exists()) {
            final JSONObject jsonObject = new JSONObject(FileUtils.readFileToString(scancodeResultJson, StandardCharsets.UTF_8));
            JSONArray scancodeSegments = jsonObject.optJSONArray("files");
            for (int i = 0; i < scancodeSegments.length(); i++) {
                final JSONObject segment = scancodeSegments.optJSONObject(i);
                JSONArray licenseExpressions = segment.optJSONArray("license_expressions");
                if (licenseExpressions != null) {
                    for (int j = 0; j < licenseExpressions.length(); j++) {
                        scancodeLicenseExpressions.add(licenseExpressions.optString(j));
                    }
                } else {
                    JSONArray licenseDetections = segment.optJSONArray("license_detections");
                    if (licenseDetections == null) {
                        licenseDetections = segment.getJSONArray("license_expressions");
                    }

                }
            }
        }

        List licenseList = new ArrayList<>(licenseGroups.keySet());
        final Map> markerList = new HashMap<>(licenseGroups);
        InventoryUtils.removeMarkers(licenseList, getNormalizationMetaData());

        // markerList is the delta
        licenseList.forEach(markerList.keySet()::remove);

        // contribute to overall list
        markerList.keySet().forEach(licenseGroups::remove);

        return new ParseResult(licenseGroups, markerList, scancodeLicenseExpressions, new HashSet<>(markerList.keySet()));
    }

    private void logGroups(Map> licenseGroups, String context, boolean collapse, File resultFile) throws IOException {
        List licenseList = new ArrayList<>(licenseGroups.keySet());
        Collections.sort(licenseList);
        log(resultFile, String.format("%n>>>> %s <<<<", context));
        for (String license : licenseList) {
            log(resultFile, String.format("%n  >>> %s <<<", license));
            List filesInGroup = new ArrayList<>(licenseGroups.get(license));
            Collection discriminators = filesInGroup;
            if (collapse) {
                discriminators = collapse(discriminators);
                discriminators = collapse(discriminators);
            }
            for (String file : discriminators) {
                log(resultFile, String.format("    %s", file));
            }
        }
    }

    private void applyFileFilters(Map> licenseGroups, String[] filterPatterns) {
        // remove files matching patterns
        for (List files : licenseGroups.values()) {
            List toBeDeleted = new ArrayList<>();
            for (String file : files) {
                for (String pattern : filterPatterns)
                    if (file.contains(pattern)) {
                        toBeDeleted.add(file);
                        continue;
                    }
            }
            files.removeAll(toBeDeleted);
        }

        // remove empty groups
        for (Map.Entry> entry : new ArrayList<>(licenseGroups.entrySet())) {
            if (entry.getValue().isEmpty()) {
                licenseGroups.remove(entry.getKey());
            }
        }
    }

    private Set collapse(Collection filesInGroup) {
        if (filesInGroup.isEmpty()) {
            throw new IllegalStateException("Group may not be empty");
        }
        if (filesInGroup.size() == 1) {
            return new HashSet<>(filesInGroup);
        }

        Set collapsed = new LinkedHashSet<>();
        for (String file : filesInGroup) {
            collapsed.add(new File(file).getParent());
        }
        return collapsed;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy