com.metaeffekt.artifact.analysis.metascan.MetaScanSupport Maven / Gradle / Ivy
/*
* Copyright 2021-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.metaeffekt.artifact.analysis.metascan;
import com.metaeffekt.artifact.analysis.model.PropertyProvider;
import com.metaeffekt.artifact.analysis.preprocess.filter.TextSieve;
import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.artifact.analysis.utils.InventoryUtils;
import com.metaeffekt.artifact.analysis.utils.PropertyUtils;
import com.metaeffekt.artifact.analysis.utils.StringStats;
import com.metaeffekt.artifact.analysis.utils.StringUtils;
import com.metaeffekt.artifact.terms.model.FileSegment;
import com.metaeffekt.artifact.terms.model.FileSegmentation;
import com.metaeffekt.artifact.terms.model.NormalizationMetaData;
import com.metaeffekt.artifact.terms.model.ScanResultPart;
import com.metaeffekt.artifact.terms.model.TermsMetaData;
import org.apache.tools.ant.DirectoryScanner;
import org.json.JSONArray;
import org.json.JSONObject;
import org.metaeffekt.core.inventory.processor.model.Artifact;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import static com.metaeffekt.artifact.analysis.metascan.Constants.KEY_IDENTIFIED_TERMS;
public class MetaScanSupport extends AbstractScanSupport {
private static final Logger LOG = LoggerFactory.getLogger(MetaScanSupport.class);
public static final String FOLDER_INCOMPLETE_MATCH = "incomplete-match";
public static final String FOLDER_INCOMPLETE_MATCH_FILES = FOLDER_INCOMPLETE_MATCH + "-files";
public static final String FOLDER_INDICATED_EXCEPTION = "indicated-exception";
public static final String FOLDER_INDICATED_EXCEPTIONS_FILES = FOLDER_INDICATED_EXCEPTION + "-files";
public static final String FOLDER_LICENSING_OPTION = "licensing-option";
public static final String FOLDER_LICENSING_OPTION_FILES = FOLDER_LICENSING_OPTION + "-files";
public static final String FOLDER_UNSPECIFIC_LICENSE = "unspecific-license";
public static final String FOLDER_UNSPECIFIC_LICENSES_FILES = FOLDER_UNSPECIFIC_LICENSE + "-files";
public static final String FOLDER_INSUFFICIENT_SEGMENTATION = "insufficient-segmentation";
public static final String FOLDER_INSUFFICIENT_SEGMENTATION_FILES = FOLDER_INSUFFICIENT_SEGMENTATION + "-files";
// FIXME: this is a yet undocumented issue type. We need to decide whether we keep this.
public static final String FOLDER_INSUFFICIENT_LICENSE_DETAILS = "insufficient-license-details";
private transient TextSieve textSieve;
public MetaScanSupport(NormalizationMetaData normalizationMetaData, PropertyProvider propertyProvider) {
super(normalizationMetaData, propertyProvider);
// produce / retrieve wordlist if not available (works only with fully available tmd)
try {
if (normalizationMetaData.getWordlist() == null || normalizationMetaData.getWordlist().isEmpty()) {
normalizationMetaData.generateAndSetWordlist();
}
} catch (Exception e) {
LOG.warn("Failure while generating wordlist: [{}]", e.getMessage(), e);
}
if (normalizationMetaData.getWordlist() != null && !normalizationMetaData.getWordlist().isEmpty()) {
textSieve = TextSieve.builder()
.wordlist(normalizationMetaData.getWordlist())
.build();
}
}
public boolean execute(Artifact artifact, File unpackedDir) throws IOException {
return execute(artifact, unpackedDir, "no context");
}
public boolean execute(Artifact artifact, File unpackedDir, String context) throws IOException {
final File targetFolder = deriveAnalysisFolder(unpackedDir);
final File intermediateFolder = deriveIntermediateFolder(unpackedDir);
final File scratchFolder = deriveScratchFolder(unpackedDir);
final String filename = artifact.getId().replace("/", "_");
final File resultPropertiesFile = new File(targetFolder, filename + "_license.properties");
final File resultJsonFile = new File(targetFolder, filename + "_metascan.json");
final File logFile = new File(targetFolder, filename + "_license-scan.txt");
final File segmentFile = new File(targetFolder, filename + "_license-scan-segments.txt");
final File segmentDebugFile = new File(targetFolder, filename + "_license-scan-segments_debug.txt");
final boolean metaScanEnabled = getPropertyProvider().isProperty("analyze.scan.license.enabled", "true", "false");
// collect licenses per artifact
final Set derivedLicenses = new HashSet<>();
String artifactScanConfiguration = artifact.get("Scan Configuration");
boolean artifactScanOverwrite = false;
if (artifactScanConfiguration != null) {
artifactScanOverwrite = artifactScanConfiguration.contains("analyze.scan.license.overwrite=true");
}
long resultFileTimestamp = resultPropertiesFile.lastModified();
// use a reference timestamp to determine whether a new scan is required
long overwriteResultsOlderThan = Long.parseLong(getPropertyProvider().
getProperty("analyze.scan.license.overwrite.timestamp", "0"));
boolean outdatedResult = resultFileTimestamp < overwriteResultsOlderThan;
boolean overwrite = outdatedResult || artifactScanOverwrite || getPropertyProvider().
isProperty("analyze.scan.license.overwrite", "true", "false");
boolean overwriteOnUnknown = getPropertyProvider().isProperty("analyze.scan.license.overwrite.unknown", "true", "false");
boolean overwriteOnIncompleteMatch = getPropertyProvider().isProperty("analyze.scan.license.overwrite.incomplete", "true", "false");
// check existing results to determine whether scan needs to be redone
if (!overwrite && resultPropertiesFile.exists() && (overwriteOnUnknown || overwriteOnIncompleteMatch)) {
final Properties p = PropertyUtils.loadProperties(resultPropertiesFile);
final String oldIncompleteMatch = p.getProperty("incomplete.match");
if (overwriteOnIncompleteMatch && "true".equalsIgnoreCase(oldIncompleteMatch)) {
LOG.info("{} Rescanning due to incomplete match.", context);
overwrite = true;
}
final String oldDerivedLicenses = p.getProperty("derived.licenses");
// decompose into atomic licenses
final List licenses = InventoryUtils.tokenizeLicense(oldDerivedLicenses, false, false);
boolean detectedUnknown = false;
for (String license : licenses) {
final TermsMetaData termsMetaData = InventoryUtils.getNormalizationMetaData().getTermsMetaData(license);
if (termsMetaData == null) {
String updatedCanonicalName = InventoryUtils.getNormalizationMetaData().getUpdatedCanonicalName(license);
if (updatedCanonicalName.equalsIgnoreCase(license)) {
LOG.info("{} Rescanning due to unknown license [{}].", context, license);
detectedUnknown = true;
break;
}
}
}
if (overwriteOnUnknown && detectedUnknown) {
overwrite = true;
}
}
// attempt parsing the results file; if not possible manage overwrite
if (resultJsonFile.exists()) {
try {
new JSONArray(FileUtils.readFileToString(resultJsonFile, StandardCharsets.UTF_8));
} catch (Exception e) {
// cannot parse; manage overwrite
overwrite = true;
LOG.info("{} Rescanning due to incomplete result file [{}].", context, resultJsonFile.getAbsolutePath());
}
}
// in case overwrite is not set and a result file exists, we apply the already evaluated result and return fast.
if (!overwrite && resultPropertiesFile.exists() && intermediateFolder.exists()) {
final Properties p = PropertyUtils.loadProperties(resultPropertiesFile);
applyToArtifact(artifact, p);
return false;
}
// in case scan is not enabled we skip all further processing
if (!metaScanEnabled) {
return false;
}
// the properties file serves as semaphore
FileUtils.deleteQuietly(resultPropertiesFile);
FileUtils.deleteQuietly(resultJsonFile);
final File incompleteMatchesFolder = new File(targetFolder, filename + "-" + FOLDER_INCOMPLETE_MATCH);
final File incompleteMatchesFileFolder = new File(targetFolder, filename + "-" + FOLDER_INCOMPLETE_MATCH_FILES);
final File indicatedExceptionsFolder = new File(targetFolder, filename + "-" + FOLDER_INDICATED_EXCEPTION);
final File indicatedExceptionsFileFolder = new File(targetFolder, filename + "-" + FOLDER_INDICATED_EXCEPTIONS_FILES);
final File licenseOptionFolder = new File(targetFolder, filename + "-" + FOLDER_LICENSING_OPTION);
final File licenseOptionFileFolder = new File(targetFolder, filename + "-" + FOLDER_LICENSING_OPTION_FILES);
final File unspecificLicenseFolder = new File(targetFolder, filename + "-" + FOLDER_UNSPECIFIC_LICENSE);
final File unspecificLicenseFileFolder = new File(targetFolder, filename + "-" + FOLDER_UNSPECIFIC_LICENSES_FILES);
final File unsufficientSegmentationFolder = new File(targetFolder, filename + "-" + FOLDER_INSUFFICIENT_SEGMENTATION);
final File unsufficientSegmentationFileFolder = new File(targetFolder, filename + "-" + FOLDER_INSUFFICIENT_SEGMENTATION_FILES);
final File insufficientLicenseDetailsFolder = new File(targetFolder, filename + "-" + FOLDER_INSUFFICIENT_LICENSE_DETAILS);
final File reportFolder = new File(targetFolder, filename + "-reports");
if (incompleteMatchesFolder.exists()) FileUtils.deleteDir(incompleteMatchesFolder);
if (incompleteMatchesFileFolder.exists()) FileUtils.deleteDir(incompleteMatchesFileFolder);
if (licenseOptionFolder.exists()) FileUtils.deleteDir(licenseOptionFolder);
if (licenseOptionFileFolder.exists()) FileUtils.deleteDir(licenseOptionFileFolder);
if (unspecificLicenseFolder.exists()) FileUtils.deleteDir(unspecificLicenseFolder);
if (unspecificLicenseFileFolder.exists()) FileUtils.deleteDir(unspecificLicenseFileFolder);
if (indicatedExceptionsFolder.exists()) FileUtils.deleteDir(indicatedExceptionsFolder);
if (indicatedExceptionsFileFolder.exists()) FileUtils.deleteDir(indicatedExceptionsFileFolder);
if (unsufficientSegmentationFolder.exists()) FileUtils.deleteDir(unsufficientSegmentationFolder);
if (unsufficientSegmentationFileFolder.exists()) FileUtils.deleteDir(unsufficientSegmentationFileFolder);
if (insufficientLicenseDetailsFolder.exists()) FileUtils.deleteDir(insufficientLicenseDetailsFolder);
if (reportFolder.exists()) FileUtils.deleteDir(reportFolder);
if (intermediateFolder.exists()) {
FileUtils.cleanDirectory(intermediateFolder);
}
final String[] scanIncludes = getPropertyProvider().getProperty("analyze.metascan.license.includes", "**/*").split(",");
final String[] scanExcludes = getPropertyProvider().getProperty("analyze.metascan.license.excludes", "**/.git/**/*").split(",");
final boolean debugSegments = getPropertyProvider().isProperty("analyze.metascan.license.debug.enabled", "true", "false");
boolean enableReport = getPropertyProvider().isProperty("analyze.metascan.report.enable", "true", "false");
boolean forceReport = getPropertyProvider().isProperty("analyze.metascan.report.force", "true", "false");
// evaluate sieve support; currently disabled by default
boolean useTextSieve = getPropertyProvider().isProperty("analyze.sieve.enabled", "true", "false");
final NormalizationMetaData normalizationMetaData = getNormalizationMetaData();
final DirectoryScanner scanner = new DirectoryScanner();
scanner.setBasedir(unpackedDir);
scanner.setIncludes(scanIncludes);
scanner.setExcludes(scanExcludes);
scanner.scan();
final String[] filesToScan = scanner.getIncludedFiles();
init(logFile, unpackedDir.getName());
init(segmentFile, unpackedDir.getName());
if (debugSegments) {
init(segmentDebugFile, unpackedDir.getName());
}
boolean[] resultJsonFileSemaphore = new boolean[1];
resultJsonFileSemaphore[0] = true;
FileUtils.forceMkDirQuietly(intermediateFolder);
// Begin JSON file
FileUtils.write(resultJsonFile, "[", StandardCharsets.UTF_8);
int size = filesToScan.length;
int i = 0;
for (String fileToScan : filesToScan) {
i++;
// collect licenses per file
Set derivedLicensesForFile = new HashSet<>();
final File file = new File(unpackedDir, fileToScan);
// don't care for symlinks
if (FileUtils.isSymlink(file)) {
continue;
}
if (FileUtils.matches(file.getAbsolutePath(), scanExcludes)) {
continue;
}
LOG.info("{} ({}/{}) Analyzing file [{}]...", context, i, size, file.getAbsolutePath());
final String relativeFilePath = extractRelativePath(unpackedDir, file);
try {
// detect encoding
final String detectedEncoding = FileUtils.detectEncoding(file);
// read content (applying textSieve or fallback to non-sieve loading)
String fileContent;
if (useTextSieve && textSieve != null) {
try {
final Charset detectedCharset = Charset.forName(detectedEncoding);
fileContent = textSieve.loadFiltered(file, detectedCharset, scratchFolder).toString();
} catch (Exception e) {
LOG.warn("Could not use TextSieve due to exception: [{}]", e.getMessage(), e);
// in case of an exception with encoding, charset or loading we return to the non-sieve mode
fileContent = FileUtils.readFileToString(file, FileUtils.detectEncoding(file));
}
} else {
// non-sieve mode
fileContent = FileUtils.readFileToString(file, FileUtils.detectEncoding(file));
}
// FIXME: isolate the whole segmentation aspect in a separate SegmentationSupport class.
final FileSegmentation fileSegmentation = new FileSegmentation(fileContent, normalizationMetaData);
// TODO log debug information for segments
if (debugSegments) {
log(segmentDebugFile, fileSegmentation.getMarkedSegmentsString());
}
log(segmentFile, String.format("%n>>>> [%s] analysis START:", relativeFilePath));
// process the individual segments
for (int j = 0; j < fileSegmentation.getSegmentCount(); j++) {
final StringBuilder resultSummary = new StringBuilder();
final FileSegment fileSegment = fileSegmentation.getFileSegment(j);
final String segmentContent = fileSegment.getContent();
final String id = relativeFilePath + "/" + j;
log(segmentFile, String.format("%n>>> Segment %d [%s] analysis:%n", j, relativeFilePath));
final StringStats licenseTextStats = fileSegment.getNormalizedContent();
final ScanResultPart normalizedLicensesSRP = normalizationMetaData.doAnalyze(licenseTextStats);
final List matchedLicenses = normalizedLicensesSRP.getMatchedTerms();
for (String license : matchedLicenses) {
if (org.springframework.util.StringUtils.hasText(license) && !"[]".equals(license)) {
final String message = String.format("> Matched license [%s] in file [%s/%s]", license, relativeFilePath, j);
log(segmentFile, message);
resultSummary.append(message).append("\n");
}
}
// TODO: filter name matches that are equal to "represented as" of evidence matched results
// process
normalizedLicensesSRP.process(normalizationMetaData, true, true);
fileSegment.setNormalizedSRP(normalizedLicensesSRP);
final List matchedTerms = normalizedLicensesSRP.getMatchedTerms();
// extract variable-content for all licenses with variables
try {
if (segmentHasVariableLicense(matchedTerms)) {
fileSegment.setLicenseVariables(getVariablesPerLicenseInSegment(matchedTerms, licenseTextStats));
}
} catch (Exception e) {
LOG.warn("Variable extraction failed: {}. Execution continued.", e.getMessage(), e);
}
if (matchedTerms.isEmpty()) {
final String message = String.format("> No terms resolved in file [%s/%s]", relativeFilePath, j);
log(segmentFile, message);
resultSummary.append(message).append("\n");
} else {
for (String license : new LinkedHashSet<>(matchedTerms)) {
final String message = String.format("> Resolved terms [%s] in file [%s/%s]", license, relativeFilePath, j);
log(segmentFile, message);
resultSummary.append(message).append("\n");
derivedLicensesForFile.add(license);
}
}
// diff the partial matches to detect not fully matched licenses
// iterate through matched licenses and resolve the pre-computed partial matches
final Set aggregatedPartialMatches = new HashSet<>();
final Set aggregatedExcludeMatches = new HashSet<>();
for (String matchedLicense : matchedLicenses) {
final TermsMetaData lmd = normalizationMetaData.getTermsMetaData(matchedLicense);
List partialMatches = lmd.getPartialMatches();
List excludeMatches = lmd.getExcludedMatches();
if (partialMatches != null) aggregatedPartialMatches.addAll(partialMatches);
if (excludeMatches != null) aggregatedExcludeMatches.addAll(excludeMatches);
}
final List retainedPartialMatches = normalizedLicensesSRP.getPartialMatchedTerms();
retainedPartialMatches.removeAll(aggregatedPartialMatches);
retainedPartialMatches.removeAll(aggregatedExcludeMatches);
// also remove those identified (by name)
retainedPartialMatches.removeAll(matchedTerms);
// markers do not contribute to incomplete matches
InventoryUtils.removeMarkers(retainedPartialMatches, normalizationMetaData);
if (LOG.isDebugEnabled()) {
LOG.debug(" Aggregates PMs: {}", aggregatedPartialMatches);
LOG.debug(" Excluded PMs: {}", aggregatedExcludeMatches);
LOG.debug(" Matched PMs: {}", normalizedLicensesSRP.getPartialMatchedTerms());
LOG.debug(" Retained PMs: {}", retainedPartialMatches);
}
final boolean hasIncompleteMatches = !retainedPartialMatches.isEmpty();
final String fileCopyName = FileUtils.computeChecksum(file) + "-" + file.getName();
if (hasIncompleteMatches) {
final String message = String.format("> Incomplete license identification in file [%s/%s]: individual matches indicate one of %s", relativeFilePath, j, retainedPartialMatches);
log(logFile, message);
LOG.info("{} ({}/{}) {}", context, i, size, message);
log(segmentFile, message);
resultSummary.append(message).append("\n");
derivedLicensesForFile.add("Incomplete Match");
if (enableReport) {
// create a html report when an incomplete match was detected
createHtmlReport("Incomplete Match " + relativeFilePath, normalizedLicensesSRP,
incompleteMatchesFolder, licenseTextStats, id, retainedPartialMatches,
"incomplete-match", filename);
}
FileUtils.copyFile(file, new File(incompleteMatchesFileFolder, fileCopyName));
}
boolean hasIndicatedExceptions = isIndicatedExceptionWithoutReference(normalizedLicensesSRP.getMatches());
if (hasIndicatedExceptions) {
String message = String.format("> Indicated exception without reference detected in file [%s/%s].", relativeFilePath, j);
log(logFile, message);
LOG.info("{} ({}/{}) {}", context, i, size, message);
log(segmentFile, message);
resultSummary.append(message).append("\n");
derivedLicensesForFile.add("Indicated Exception");
if (enableReport) {
// create a html report when an incomplete match was detected
createHtmlReport("Indicated Exception " + relativeFilePath, normalizedLicensesSRP,
indicatedExceptionsFolder, licenseTextStats, id, retainedPartialMatches,
"indicated-exceptions", filename);
}
FileUtils.copyFile(file, new File(indicatedExceptionsFileFolder, fileCopyName));
}
boolean hasUnspecificLicenses = containsUnspecificLicenses(normalizedLicensesSRP.getMatches());
if (hasUnspecificLicenses) {
String message = String.format("> Unspecific license detected in file [%s/%s].", relativeFilePath, j);
log(logFile, message);
LOG.info("{} ({}/{}) {}", context, i, size, message);
log(segmentFile, message);
resultSummary.append(message).append("\n");
if (enableReport) {
if (ReportController.getInstance().createReportFor(normalizedLicensesSRP, licenseTextStats)) {
createHtmlReport("Unspecific License " + relativeFilePath, normalizedLicensesSRP,
unspecificLicenseFolder, licenseTextStats, id, retainedPartialMatches,
"unspecific-licenses", filename);
}
}
FileUtils.copyFile(file, new File(unspecificLicenseFileFolder, fileCopyName));
}
boolean hasSegmentationIssue = hasSegmentationIssue(normalizedLicensesSRP.getTextMatchedTerms(), matchedTerms);
if (hasSegmentationIssue) {
String message = String.format("> Segmentation Issue detected in file [%s/%s].", relativeFilePath, j);
log(logFile, message);
LOG.info("{} ({}/{}) {}", context, i, size, message);
log(segmentFile, message);
resultSummary.append(message).append("\n");
if (enableReport) {
createHtmlReport("Segmentation Issue " + relativeFilePath, normalizedLicensesSRP,
unsufficientSegmentationFolder, licenseTextStats, id, retainedPartialMatches,
"segmentation-issue", filename);
}
FileUtils.copyFile(file, new File(unsufficientSegmentationFileFolder, fileCopyName));
}
boolean hasLicensingOption =
// legacy marker name
matchedTerms.contains("Licensing Option") ||
// current marker name; FIXME: some markers are hardcoded; here we need to be careful
matchedTerms.contains("License Option Marker");
if (hasLicensingOption) {
String message = String.format("> License options detected in file [%s/%s].", relativeFilePath, j);
log(logFile, message);
LOG.info("{} ({}/{}) {}", context, i, size, message);
log(segmentFile, message);
resultSummary.append(message).append("\n");
if (enableReport) {
createHtmlReport("Licensing Option " + relativeFilePath, normalizedLicensesSRP,
licenseOptionFolder, licenseTextStats, id, retainedPartialMatches,
"licensing-option", filename);
}
FileUtils.copyFile(file, new File(licenseOptionFileFolder, fileCopyName));
}
if (forceReport) {
createHtmlReport("Scan Report " + relativeFilePath, normalizedLicensesSRP,
reportFolder, licenseTextStats, id, retainedPartialMatches,
"scan-report", filename);
}
// log the content
log(segmentFile, String.format("%n>>> Segment %d [%s] content START: >>>%n%n%s%n%n<<< Segment %d [%s] content END <<<%n", j, relativeFilePath, segmentContent, j, relativeFilePath));
// log summary (below content)
log(segmentFile, resultSummary);
derivedLicenses.addAll(derivedLicensesForFile);
}
writeIntermediateFileStructure(unpackedDir, file, fileSegmentation, intermediateFolder, resultJsonFile, resultJsonFileSemaphore);
} catch (Exception e) {
LOG.error("EM1: " + e.getMessage(), e);
}
// insufficientLicenseDetails issue
final HashSet insufficientLicenseDetails = new HashSet<>();
for (String license : derivedLicenses) {
TermsMetaData tmd = normalizationMetaData.getTermsMetaData(license);
if (tmd != null) {
if (tmd.isException() || tmd.isExpression() || tmd.isMarker()) continue;
if (tmd.isUnspecific()) continue;
if (tmd.allowLaterVersions()) continue;
if (tmd.getRequiresLicenseText() == null || tmd.getRequiresCopyright() == null) {
insufficientLicenseDetails.add(license);
}
}
}
if (!insufficientLicenseDetails.isEmpty()) {
FileUtils.forceMkdir(insufficientLicenseDetailsFolder);
File output = new File(insufficientLicenseDetailsFolder + "/insufficientLicenseDetails.txt");
FileWriter writer = new FileWriter(output);
for (String s : insufficientLicenseDetails) {
writer.write(s + System.lineSeparator());
}
writer.close();
}
// FIXME-2020: here would be the place to consolidate derivedLicensesPerFile
final Set removableLicenses = InventoryUtils.collectCoveredRemovableLicenses(derivedLicensesForFile);
derivedLicensesForFile.removeAll(removableLicenses);
// log summary to segment file
if (derivedLicensesForFile.size() > 0) {
log(segmentFile, String.format("<<<< Resolved license set for [%s]:%n %s%n", fileToScan, derivedLicensesForFile));
log(logFile, String.format("<<<< Resolved license set for [%s]:%n %s", fileToScan, derivedLicensesForFile));
}
if (removableLicenses.size() > 0) {
log(segmentFile, String.format("<<<< Removed license set for [%s]:%n %s%n", fileToScan, removableLicenses));
log(logFile, String.format("<<<< Removed license set for [%s]:%n %s", fileToScan, removableLicenses));
}
log(segmentFile, String.format("<<<< [%s] analysis END <<<<", relativeFilePath));
if (derivedLicensesForFile.size() > 0) {
LOG.info("{} ({}/{}) Analyzing file [{}] resolved {}", context, i, size, file.getAbsolutePath(), derivedLicensesForFile);
} else {
LOG.info("{} ({}/{}) Analyzing file [{}].", context, i, size, file.getAbsolutePath());
}
if (removableLicenses.size() > 0) {
LOG.info("{} ({}/{}) Analyzing file [{}] removed [{}]", context, i, size, file.getAbsolutePath(), removableLicenses);
}
}
// complete resultJsonFile
FileUtils.write(resultJsonFile, "]", StandardCharsets.UTF_8, true);
String deriveLicenseResult = "";
if (!derivedLicenses.isEmpty()) {
ArrayList orderedList = new ArrayList<>(derivedLicenses);
Collections.sort(orderedList, String.CASE_INSENSITIVE_ORDER);
deriveLicenseResult = StringUtils.toString(orderedList);
}
// store the derived licenses in a property file service as cache
Properties result = new Properties();
result.setProperty("derived.licenses", deriveLicenseResult);
result.setProperty("incomplete.match", String.valueOf(derivedLicenses.contains("Incomplete Match")));
applyToArtifact(artifact, result);
PropertyUtils.saveProperties(resultPropertiesFile, result);
return true;
}
public static File deriveIntermediateFolder(File analysisDir) {
return new File(analysisDir.getParentFile(), analysisDir.getName() + "-intermediate");
}
public static File deriveAnalysisFolder(File analysisDir) {
return new File(analysisDir.getParentFile(), analysisDir.getName() + "-analysis");
}
public static File deriveScratchFolder(File analysisDir) {
return new File(analysisDir.getParentFile(), analysisDir.getName() + "-scratch");
}
/**
* Determines if segmentation issues occured by checking if a license has no relevant matches.
*
* @param licenses The licenses which need to be checked
* @param relevantMatches The relevant matches for checking if a license has issues
*/
private boolean hasSegmentationIssue(List licenses, List relevantMatches) {
licenses.removeIf(license -> !relevantMatches.contains(license));
InventoryUtils.removeMarkers(licenses, getNormalizationMetaData());
return licenses.size() > 1;
}
/**
* Write result of scanning into intermediate folder.
*
* @param unpackBaseDir The directory in which the intermediate Folder is saved
* @param file The path of the license file
* @param fileSegmentation The FileSegmentation for processing the license
* @param intermediateFolder The folder in which the intermediate results of a license scan are saved
* @param resultJsonFile The JSON file containing the results of a scan
* @param resultJsonFileSemaphore The semaphore of the result JSON file
*/
private void writeIntermediateFileStructure(File unpackBaseDir, File file, FileSegmentation fileSegmentation,
File intermediateFolder, File resultJsonFile, boolean[] resultJsonFileSemaphore) {
final String filePath = FileUtils.asRelativePath(unpackBaseDir, file);
final File intermediateFile = new File(intermediateFolder, filePath);
try {
JSONObject jsonFileObject = new JSONObject();
JSONObject jsonSegmentsObject = new JSONObject();
jsonFileObject.put("file", filePath);
List segmentFoldersForScancode = fileSegmentation.combineSegmentsAndWriteFoldersForScancode(intermediateFile);
jsonFileObject.put("segmentCount", segmentFoldersForScancode.size());
for (int j = 0; j < segmentFoldersForScancode.size(); j++) {
FileSegment fileSegment = segmentFoldersForScancode.get(j);
ScanResultPart normalizedSRP = fileSegment.getNormalizedSRP();
if (normalizedSRP != null) {
JSONObject segmentResult = new JSONObject();
List nameMatchedLicenses = normalizedSRP.getNameMatchedTerms();
List textMatchedLicenses = normalizedSRP.getTextMatchedTerms();
List resolvedLicenses = normalizedSRP.getMatchedTerms();
if (!nameMatchedLicenses.isEmpty()) {
segmentResult.put("nameMatches", nameMatchedLicenses);
}
if (!textMatchedLicenses.isEmpty()) {
segmentResult.put("textMatches", textMatchedLicenses);
}
if (!resolvedLicenses.isEmpty()) {
segmentResult.put("resolvedLicenses", resolvedLicenses);
}
if (fileSegment.getLicenseVariables() != null) {
segmentResult.put("variables", fileSegment.getLicenseVariables());
}
jsonSegmentsObject.put("segment-" + j, segmentResult);
}
jsonFileObject.put("segments", jsonSegmentsObject);
}
if (!resultJsonFileSemaphore[0]) {
FileUtils.write(resultJsonFile, ",", StandardCharsets.UTF_8, true);
}
FileUtils.write(resultJsonFile, jsonFileObject.toString(), StandardCharsets.UTF_8, true);
resultJsonFileSemaphore[0] = false;
} catch (Exception e) {
LOG.warn("Creating folder for scancode failed for {}: {}. Execution continued.", file.getName(), e.getMessage());
}
}
/**
* Determines, if terms meta data contains unspecific licenses.
*
* @param termsMetaData The license terms meta data
*/
private boolean containsUnspecificLicenses(List termsMetaData) {
for (TermsMetaData tmd : termsMetaData) {
if (tmd != null) {
if (tmd.isUnspecific()) {
return true;
}
}
}
return false;
}
/**
* Apply property to artifact.
*
* @param artifact The artifact to be analyzed
* @param p The property to apply to the artifact
*/
protected void applyToArtifact(Artifact artifact, Properties p) {
String derivedLicensesFromP = p.getProperty("derived.licenses");
if (derivedLicensesFromP == null) {
derivedLicensesFromP = p.getProperty("identified.terms");
}
if (derivedLicensesFromP == null) {
derivedLicensesFromP = "";
}
if (derivedLicensesFromP.contains("Incomplete Match")) {
final String incompleteMatch = p.getProperty("incomplete.match", "false");
artifact.set("Incomplete Match", incompleteMatch);
}
artifact.set(KEY_IDENTIFIED_TERMS, derivedLicensesFromP);
}
/**
* Creates the HTML report for a license.
*
* @param htmlReportTitle Title of the report
* @param scanResultPart FIXME: Add description of variable
* @param targetDir Directory for saving the report
* @param textStats FIXME: Add description of variable
* @param segmentId ID of the segment of ehich the segment consists
* @param retainedPartialMatches FIXME: Add description of variable
* @param type The type of the report
* @param id The ID of the report
*/
protected void createHtmlReport(String htmlReportTitle, ScanResultPart scanResultPart, File targetDir, StringStats textStats, String segmentId, List retainedPartialMatches, String type, String id) {
try {
File htmlReportFile = new File(targetDir, id + "_" + type + segmentId.replace("/", "_") + ".html");
TermsMetaData tempLicenseMetaData = new TermsMetaData();
tempLicenseMetaData.setCanonicalName(htmlReportTitle);
tempLicenseMetaData.createMatchReportHtml(textStats, scanResultPart, htmlReportFile, retainedPartialMatches);
} catch (Throwable e) {
// rationale for throwable: we may get an out of memory error on binaries (based on string size limits;
// for the error not to cause the process to break we catch throwable)
LOG.error("Cannot generate HTML Report!", e);
}
}
/**
* Extracts the relative path of a given file.
*
* @param baseDir The base directory of a given file for comparison.
* @param file The file from which the relative path is being extracted.
*
* @return The relative path from baseDir to file.
*/
public String extractRelativePath(File baseDir, File file) {
final String filePath = file.getPath();
final String baseDirPath = baseDir.getPath();
if (filePath.startsWith(baseDirPath)) {
return filePath.substring(baseDirPath.length());
}
return filePath;
}
// FIXME: use dedicated java class to capture the variables
private JSONArray getVariablesPerLicenseInSegment(List normalizedLicenses, StringStats licenseTextStats) {
JSONArray licenseVariablesArray = new JSONArray();
for (String license : normalizedLicenses) {
JSONObject licenseVariables = new JSONObject();
if (licenseHasVariable(license)) {
licenseVariables.put(license, getVariableKeyValuePerLicense(license, licenseTextStats));
licenseVariablesArray.put(licenseVariables);
}
}
return licenseVariablesArray;
}
/**
* Determines if a segment has variable licenses.
*
* @param matchedLicenses The licenses to be checked
*/
private boolean segmentHasVariableLicense(List matchedLicenses) {
for (String license : matchedLicenses) {
if (licenseHasVariable(license)) return true;
}
return false;
}
/**
* Determines if a license has variables.
*
* @param license The license to be checked
*/
private boolean licenseHasVariable(String license) {
TermsMetaData termsMetaData = getNormalizationMetaData().getTermsMetaData(license);
if (termsMetaData != null) {
if (termsMetaData.getLicenseTemplate() != null) {
String licenseTemplate = termsMetaData.getLicenseTemplate();
if (licenseTemplate.matches(".*\\{\\{([^\\}]+)}}.*")) {
return true;
} else {
return false;
}
}
}
return false;
}
protected JSONObject getVariableKeyValuePerLicense(String license, StringStats licenseTextStats) {
final JSONObject keyValuePairs = new JSONObject();
final List processedKeys = new ArrayList<>();
final String licenseTemplateOriginal = getNormalizationMetaData().getTermsMetaData(license).getLicenseTemplate();
final StringStats normalizedLicenseTemplate = StringStats.normalize(licenseTemplateOriginal, false);
String licenseTemplate = normalizedLicenseTemplate.getNormalizedString();
licenseTemplate = licenseTemplate.replaceAll("\\{ \\{ ", "{{").replaceAll(" } }", "}}");
// FIXME: Normalize licenseTemplate
licenseTemplate = licenseTemplate.replaceAll("\\<.*?\\>", "");
licenseTemplate = licenseTemplate.replaceAll("\"", "");
licenseTemplate = licenseTemplate.replaceAll(" {2,}", " ");
licenseTemplate = licenseTemplate.replaceAll(" ?\\{\\{", "˜{{").replaceAll("}} ?", "}}˜");
String[] licenseTemplateWord = licenseTemplate.split((" |˜"));
// FIXME: DO WITH LOOKAROUNDS
// iterating through every word of the licenseTemplate
for (int i = 0; i < licenseTemplateWord.length; i++) {
// if the word is a variable key and variable key is not existing already in keyValueMap
if (licenseTemplateWord[i].matches("\\{\\{.*?}}") && !processedKeys.contains(licenseTemplateWord[i])) {
boolean matched = false;
boolean failed = false;
int index = 1;
while (!matched) {
String before = "";
String after = "";
// find the word before and after the variableKey (licenseTemplate)
for (int b = index; b > 0; b--) {
if ((i - b) < 0) {
failed = true;
break;
}
if (licenseTemplateWord[i - b] != null) {
for (int a = b; a > 0; a--) {
before = (before + " " + licenseTemplateWord[i - a]).trim();
}
break;
}
}
for (int b = index; b > 0; b--) {
if (i + b < licenseTemplateWord.length) {
for (int a = 1; a <= b; a++) {
after = (after + " " + licenseTemplateWord[i + a]).trim();
}
break;
}
}
if ((before.equals("")) && (after.equals("")) || failed) {
keyValuePairs.put(licenseTemplateWord[i].replace("{{", "").replace("}}", ""), "");
processedKeys.add(licenseTemplateWord[i]);
matched = true;
break;
}
StringStats stringStatsBefore = StringStats.normalize(before, true);
StringStats stringStatsAfter = StringStats.normalize(after, true);
// all matches in the actual SegmentText of the words before and after the variableKey
int[] beforeMatches = licenseTextStats.allMatchesOriginalString(stringStatsBefore);
int[] afterMatches = licenseTextStats.allMatchesOriginalString(stringStatsAfter);
// FIXME: Safe index of allMatches if its unique
// FIXME: BSD 4-Clause Problem
// TODO: Enable content extraction if matches are not all unique
// if matches unique then extract content
if (beforeMatches.length == 1 && afterMatches.length == 1 && beforeMatches[0] < afterMatches[0]) {
String content = licenseTextStats.getNormalizedString().substring(beforeMatches[0] + before.length(), afterMatches[0] - 1);
if (!(noSpaceBeforePlaceholder(licenseTemplateOriginal, licenseTemplateWord[i]) && content.startsWith(" "))) {
content = content.trim();
}
// TODO: re-normalize Content
keyValuePairs.put(licenseTemplateWord[i].replace("{{", "").replace("}}", ""), "\"" + content + "\"");
matched = true;
processedKeys.add(licenseTemplateWord[i]);
} else if (beforeMatches.length == 1 && afterMatches.length == 1 && beforeMatches[0] > afterMatches[0]) {
keyValuePairs.put(licenseTemplateWord[i].replace("{{", "").replace("}}", ""), "\n" + "");
matched = true;
processedKeys.add(licenseTemplateWord[i]);
} else if (beforeMatches.length == 0 | afterMatches.length == 0) {
keyValuePairs.put(licenseTemplateWord[i].replace("{{", "").replace("}}", ""), "");
matched = true;
processedKeys.add(licenseTemplateWord[i]);
} else {
Arrays.fill(beforeMatches, -1);
Arrays.fill(afterMatches, -1);
index++;
}
}
}
}
return keyValuePairs;
}
/**
* Determines if there is a whitespace before a placeholder, returns true if there is a whitespace, returns false if not.
*
* @param licenseTemplate The given license template
* @param placeholder A placeholder within the license tempalte
*/
private boolean noSpaceBeforePlaceholder(String licenseTemplate, String placeholder) {
int i = licenseTemplate.indexOf(placeholder);
char c = licenseTemplate.charAt(i - 1);
if (c == ' ' | c == '>') {
return false;
} else {
return true;
}
}
/**
* Determines, if a license without reference is an exception, returns true if it's an exception, returns false if not.
*
* @param terms license terms meta data
*/
private boolean isIndicatedExceptionWithoutReference(List terms) {
for (TermsMetaData tmd : terms) {
if (!tmd.isException()) continue;
return true;
}
return false;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy