com.metaeffekt.artifact.analysis.metascan.SourceSegmentationSupport Maven / Gradle / Ivy
/*
* Copyright 2021-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.metaeffekt.artifact.analysis.metascan;
import com.metaeffekt.artifact.analysis.model.PropertyProvider;
import com.metaeffekt.artifact.analysis.utils.*;
import com.metaeffekt.artifact.terms.model.NormalizationMetaData;
import org.json.JSONArray;
import org.json.JSONObject;
import org.metaeffekt.core.inventory.processor.model.Artifact;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.*;
public class SourceSegmentationSupport extends AbstractScanSupport {
public SourceSegmentationSupport(NormalizationMetaData normalizationMetaData, PropertyProvider propertyProvider) {
super(normalizationMetaData, propertyProvider);
}
public boolean runSegmentation(Artifact artifact, File sourceDir) throws IOException {
final File analysisDir = sourceDir.getParentFile();
final String sourceFolderName = sourceDir.getName();
final File targetFolder = new File(analysisDir, sourceFolderName + "-analysis");
final String filename = artifact.getId().replace("/", "_");
final File metascanResultJson = new File(targetFolder, filename + "_metascan.json");
final File scancodeResultJson = new File(targetFolder, filename + "_scancode.json");
final boolean applySourceSegmentation = getPropertyProvider().
isProperty("analyze.scan.segmentation.enabled", "true", "false");
if (!applySourceSegmentation) return false;
final File resultFile = new File(targetFolder, filename + "_license-segmentation.properties");
final File resultLogFile = new File(targetFolder, filename + "_license-segmentation.txt");
final File resultJsonFile = new File(targetFolder, filename + "_license-segmentation.json");
// apply file path filters
final String[] filterPatterns = getPropertyProvider().
getPropertyArray("analyze.scan.segmentation.filter.includes", "-none-", ",");
// apply file path filters
final String[] ignorePatterns = getPropertyProvider().
getPropertyArray("analyze.scan.segmentation.ignore.includes", "-none-", ",");
validatePatterns(filterPatterns, "analyze.scan.segmentation.filter.includes");
validatePatterns(ignorePatterns, "analyze.scan.segmentation.ignore.includes");
long metascanResultJsonTimestamp = metascanResultJson.lastModified();
long scancodeResultJsonTimestamp = scancodeResultJson.lastModified();
long resultFileTimestamp = resultFile.lastModified();
boolean outdatedResult =
metascanResultJsonTimestamp > resultFileTimestamp ||
scancodeResultJsonTimestamp > resultFileTimestamp;
final boolean overwrite = outdatedResult || getPropertyProvider().
isProperty("analyze.scan.segmentation.overwrite", "true", "false");
// FIXME: what if scancodeResultJson was created lateron; need to overwrite
if (!overwrite && resultFile.exists()) {
Properties p = PropertyUtils.loadProperties(resultFile);
applyToArtifact(artifact, p);
return false;
}
// exit in case no pre-requisite result files exist
if (!metascanResultJson.exists()) return false;
// validate all input folders available
FileUtils.validateExists(analysisDir);
FileUtils.validateExists(metascanResultJson);
// NOTE: scancodeResultJson must be treated as optional
init(resultLogFile, "License Segmentation");
final ParseResult parseResult = parseMetascanResult(metascanResultJson, scancodeResultJson);
// inherit incomplete match from meta scan result
inheritIncompleteMatchMarkerFromMetaScan(analysisDir, sourceFolderName, parseResult);
final Map> licenseGroups = parseResult.licenseFileGroups;
final Set scancodeLicenseExpressions = parseResult.scancodeExpressionList;
final JSONObject resultJson = new JSONObject();
logGroups(licenseGroups, "License Overview", false, resultLogFile);
resultJson.put("license.overview", new JSONObject(licenseGroups));
logGroups(parseResult.markerFileGroups, "Marker Overview", false, resultLogFile);
resultJson.put("marker.overview", new JSONObject(parseResult.markerFileGroups));
// memorize all licenses (to compute the ignored list later)
final List allLicensesIncludingIgnored = new ArrayList<>(licenseGroups.keySet());
// apply the ignore filters
applyFileFilters(licenseGroups, ignorePatterns);
final List licenses = new ArrayList<>(licenseGroups.keySet());
applyFileFilters(licenseGroups, filterPatterns);
final List filteredLicenses = new ArrayList<>(licenseGroups.keySet());
final List removedLicenses = new ArrayList<>(licenses);
removedLicenses.removeAll(filteredLicenses);
final List ignoredLicenses = new ArrayList<>(allLicensesIncludingIgnored);
ignoredLicenses.removeAll(licenses);
InventoryUtils.normalize(licenses);
InventoryUtils.normalize(removedLicenses);
Collections.sort(filteredLicenses);
Collections.sort(removedLicenses);
logLicenses(resultLogFile, "> Licenses <", licenses);
logLicenses(resultLogFile, "> Filtered Licenses <", filteredLicenses);
logLicenses(resultLogFile, "> Removed Licenses <", removedLicenses);
logLicenses(resultLogFile, "> Ignored Licenses <", ignoredLicenses);
// NOTE-KKL: had some trouble with this code in the workbench, there seems to be something wrong with the
// java erasures. The runtime code was not able to bind the String, Collection<> signature anymore. So
// looking at the code I choose the more explicit way.
resultJson.put("licenses", new JSONArray(licenses));
resultJson.put("licenses.filtered", new JSONArray(filteredLicenses));
resultJson.put("licenses.removed", new JSONArray(removedLicenses));
resultJson.put("licenses.ignored", new JSONArray(ignoredLicenses));
final Properties properties = new Properties();
licenses.sort(String::compareToIgnoreCase);
filteredLicenses.sort(String::compareToIgnoreCase);
removedLicenses.sort(String::compareToIgnoreCase);
properties.setProperty("licenses", InventoryUtils.joinLicenses(licenses));
properties.setProperty("licenses.filtered", InventoryUtils.joinLicenses(filteredLicenses));
properties.setProperty("licenses.removed", InventoryUtils.joinLicenses(removedLicenses));
properties.setProperty("licenses.ignored", InventoryUtils.joinLicenses(ignoredLicenses));
properties.setProperty("scancode.license.expressions", InventoryUtils.joinLicenses(scancodeLicenseExpressions));
// FIXME: transfer incomplete match to marker
if ("x".equals(artifact.get("Incomplete Match"))) {
parseResult.markerList.add("Incomplete Match");
}
properties.setProperty("markers", InventoryUtils.joinLicenses(parseResult.markerList));
properties.setProperty("scan.dir", new File(analysisDir, sourceFolderName).getAbsolutePath());
applyToArtifact(artifact, properties);
FileUtils.write(resultJsonFile, resultJson.toString(), StandardCharsets.UTF_8);
PropertyUtils.saveProperties(resultFile, properties);
return true;
}
private void validatePatterns(String[] patterns, String property) {
for (String pattern : patterns) {
if (StringUtils.isEmpty(pattern)) {
throw new IllegalStateException("Pattern [" + property + "] may not include empty strings.");
}
if (pattern.contains("//")) {
throw new IllegalStateException("Pattern [" + property + "] may not include '//'.");
}
}
}
protected void inheritIncompleteMatchMarkerFromMetaScan(File analysisDir, String sourceFolderName, ParseResult parseResult) {
final File metaScanPropertiesFile = new File(analysisDir, sourceFolderName + "_license.properties");
final Properties p = PropertyUtils.loadProperties(metaScanPropertiesFile);
if (p.getProperty("derived.licenses", "").contains("Incomplete Match")) {
parseResult.markerList.add("Incomplete Match");
}
}
protected void logLicenses(File resultLogFile, String context, List licenses) throws IOException {
log(resultLogFile, String.format("%n%s", context));
for (String license : licenses) {
log(resultLogFile, String.format(" %s", license));
}
}
private void applyToArtifact(Artifact artifact, Properties p) {
artifact.set(Constants.KEY_IDENTIFIED_TERMS, p.getProperty("licenses"));
artifact.set(Constants.KEY_FILTERED_TERMS, p.getProperty("licenses.removed"));
artifact.set(Constants.KEY_IGNORED_TERMS, p.getProperty("licenses.ignored"));
artifact.set(Constants.KEY_DERIVED_MARKERS, p.getProperty("markers"));
artifact.set(Constants.KEY_SCAN_CODE_LICENSE_EXPRESSIONS, p.getProperty("scancode.license.expressions"));
artifact.set(Constants.KEY_DERIVED_LICENSES, p.getProperty("licenses.filtered"));
artifact.set("Analysis Path", p.getProperty("scan.dir"));
}
protected static class ParseResult {
/**
* The file groups map the license name to a list of files the license was found in.
*/
final Map> licenseFileGroups;
//TODO: Should we segment this?
/**
* The list of markers aggregated by the scan. These are not segmented at this moment.
*/
final Set markerList;
/**
* The file groups map the markers name to a list of files the marker was found in.
*/
final Map> markerFileGroups;
/**
* The list of scancode expressions aggregated by the scan.
*/
final Set scancodeExpressionList;
public ParseResult(Map> licenseFileGroups, Map> markerFileGroups, Set scancodeExpressionList, Set markerList) {
this.licenseFileGroups = licenseFileGroups;
this.markerFileGroups = markerFileGroups;
this.markerList = markerList;
this.scancodeExpressionList = scancodeExpressionList;
}
}
protected ParseResult parseMetascanResult(File metascanResultJson, File scancodeResultJson) throws IOException {
final Map> licenseGroups;
final Set scancodeLicenseExpressions = new HashSet<>();
SegmentationUtils segmentationUtils = new SegmentationUtils();
licenseGroups = segmentationUtils.getLicenseFileMap(metascanResultJson, true);
if (scancodeResultJson.exists()) {
final JSONObject jsonObject = new JSONObject(FileUtils.readFileToString(scancodeResultJson, StandardCharsets.UTF_8));
JSONArray scancodeSegments = jsonObject.optJSONArray("files");
for (int i = 0; i < scancodeSegments.length(); i++) {
final JSONObject segment = scancodeSegments.optJSONObject(i);
JSONArray licenseExpressions = segment.optJSONArray("license_expressions");
if (licenseExpressions != null) {
for (int j = 0; j < licenseExpressions.length(); j++) {
scancodeLicenseExpressions.add(licenseExpressions.optString(j));
}
} else {
JSONArray licenseDetections = segment.optJSONArray("license_detections");
if (licenseDetections == null) {
licenseDetections = segment.getJSONArray("license_expressions");
}
}
}
}
List licenseList = new ArrayList<>(licenseGroups.keySet());
final Map> markerList = new HashMap<>(licenseGroups);
InventoryUtils.removeMarkers(licenseList, getNormalizationMetaData());
// markerList is the delta
licenseList.forEach(markerList.keySet()::remove);
// contribute to overall list
markerList.keySet().forEach(licenseGroups::remove);
return new ParseResult(licenseGroups, markerList, scancodeLicenseExpressions, new HashSet<>(markerList.keySet()));
}
private void logGroups(Map> licenseGroups, String context, boolean collapse, File resultFile) throws IOException {
List licenseList = new ArrayList<>(licenseGroups.keySet());
Collections.sort(licenseList);
log(resultFile, String.format("%n>>>> %s <<<<", context));
for (String license : licenseList) {
log(resultFile, String.format("%n >>> %s <<<", license));
List filesInGroup = new ArrayList<>(licenseGroups.get(license));
Collection discriminators = filesInGroup;
if (collapse) {
discriminators = collapse(discriminators);
discriminators = collapse(discriminators);
}
for (String file : discriminators) {
log(resultFile, String.format(" %s", file));
}
}
}
private void applyFileFilters(Map> licenseGroups, String[] filterPatterns) {
// remove files matching patterns
for (List files : licenseGroups.values()) {
List toBeDeleted = new ArrayList<>();
for (String file : files) {
for (String pattern : filterPatterns)
if (file.contains(pattern)) {
toBeDeleted.add(file);
continue;
}
}
files.removeAll(toBeDeleted);
}
// remove empty groups
for (Map.Entry> entry : new ArrayList<>(licenseGroups.entrySet())) {
if (entry.getValue().isEmpty()) {
licenseGroups.remove(entry.getKey());
}
}
}
private Set collapse(Collection filesInGroup) {
if (filesInGroup.isEmpty()) {
throw new IllegalStateException("Group may not be empty");
}
if (filesInGroup.size() == 1) {
return new HashSet<>(filesInGroup);
}
Set collapsed = new LinkedHashSet<>();
for (String file : filesInGroup) {
collapsed.add(new File(file).getParent());
}
return collapsed;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy