com.metaeffekt.artifact.analysis.flow.ScanFlow Maven / Gradle / Ivy
/*
* Copyright 2021-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.metaeffekt.artifact.analysis.flow;
import com.metaeffekt.artifact.analysis.flow.notice.GenerateNoticeParameterFlow;
import com.metaeffekt.artifact.analysis.flow.notice.GenerateNoticeParameterFlowParam;
import com.metaeffekt.artifact.analysis.metascan.Constants;
import com.metaeffekt.artifact.analysis.metascan.CopyrightSegmentationSupport;
import com.metaeffekt.artifact.analysis.metascan.MetaScanSupport;
import com.metaeffekt.artifact.analysis.metascan.SourceSegmentationSupport;
import com.metaeffekt.artifact.analysis.model.PropertyProvider;
import com.metaeffekt.artifact.analysis.scancode.ScanCodeSupport;
import com.metaeffekt.artifact.analysis.utils.ArchiveUtils;
import com.metaeffekt.artifact.analysis.utils.ConcurrentUtils;
import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.artifact.analysis.utils.StringUtils;
import com.metaeffekt.artifact.terms.model.NormalizationMetaData;
import com.metaeffekt.flow.common.AbstractFlow;
import com.metaeffekt.resource.InventoryResource;
import org.joda.time.DateTime;
import org.metaeffekt.core.inventory.processor.model.Artifact;
import org.metaeffekt.core.inventory.processor.model.Inventory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import static com.metaeffekt.artifact.analysis.metascan.Constants.KEY_ARCHIVE_PATH;
import static com.metaeffekt.artifact.analysis.metascan.Constants.KEY_CONTENT_CHECKSUM;
public class ScanFlow extends AbstractFlow {
private static final Logger LOG = LoggerFactory.getLogger(ScanFlow.class);
public ScanFlowResult process(ScanFlowParam scanFlowParam) throws IOException {
final ScanSupport scanSupport = new ScanSupport(scanFlowParam);
return process(scanFlowParam, scanSupport);
}
private ScanFlowResult process(ScanFlowParam scanFlowParam, ScanSupport scanSupport) throws IOException {
final Inventory inventory = scanFlowParam.getInventoryResource().getInventory();
final int min = Math.min(scanFlowParam.getScanThreads(), inventory.getArtifacts().size());
final ExecutorService executor = Executors.newFixedThreadPool(Math.max(min, 1));
final Map exceptions = new ConcurrentHashMap<>();
// scan the provided artifacts
int index = 1;
int size = inventory.getArtifacts().size();
for (final Artifact artifact : inventory.getArtifacts()) {
String context = String.format("[%s/%s]", index++, size);
executor.submit(() -> scanArtifact(scanFlowParam, scanSupport, artifact, context));
}
ConcurrentUtils.awaitTerminationOrCancelOnException(executor, exceptions);
final InventoryResource inventoryResource = InventoryResource.fromInventory(inventory);
generateNoticeParameters(scanFlowParam, inventoryResource);
final File resultTargetDir = scanFlowParam.getResultTargetDir();
if (resultTargetDir != null) {
FileUtils.forceMkDirQuietly(resultTargetDir);
final File resultInventory = new File(resultTargetDir, DateTime.now().getMillis() + ".xlsx");
inventoryResource.sync(resultInventory);
}
return new ScanFlowResult(inventory);
}
private void generateNoticeParameters(ScanFlowParam scanFlowParam, InventoryResource inventoryResource) {
if (scanFlowParam.getPropertyProvider().isProperty("analyze.noticeparameter.generate", "true", "true")) {
// NOTE: since we have access to the scan results, we derive the notice parameter here
final GenerateNoticeParameterFlowParam generateNoticeParameterFlowParam =
GenerateNoticeParameterFlowParam.builder()
.inventoryResource(inventoryResource)
.normalizationMetaData(scanFlowParam.getNormalizationMetaData())
.build();
final GenerateNoticeParameterFlow generateNoticeParameterFlow = new GenerateNoticeParameterFlow();
generateNoticeParameterFlow.process(generateNoticeParameterFlowParam);
}
}
private void scanArtifact(ScanFlowParam scanFlowParam, ScanSupport scanSupport, Artifact inputArtifact, String context) {
// copy input artifact to isolate modifications from the provided instance
final Artifact artifact = new Artifact(inputArtifact);
try {
// analyze is currently a global switch
if (scanFlowParam.getPropertyProvider().isProperty("analyze", "true", "true")) {
if (scanFlowParam.getPropertyProvider().isProperty("analyze.binary.artifact", "true", "true")) {
scanSupport.cloneAndScan(scanFlowParam, inputArtifact, "Binary Artifact", artifact, context);
}
if (scanFlowParam.getPropertyProvider().isProperty("analyze.source.artifact", "true", "true")) {
scanSupport.cloneAndScan(scanFlowParam, inputArtifact, "Source Artifact", artifact, context);
}
if (scanFlowParam.getPropertyProvider().isProperty("analyze.source.archive", "true", "true")) {
scanSupport.cloneAndScan(scanFlowParam, inputArtifact, "Source Archive", artifact, context);
}
if (scanFlowParam.getPropertyProvider().isProperty("analyze.descriptor", "true", "true")) {
scanSupport.cloneAndScan(scanFlowParam, inputArtifact, "Descriptor", artifact, context);
}
if (scanFlowParam.getPropertyProvider().isProperty("analyze.observed.artifact", "true", "true")) {
scanSupport.scanObservedArtifact(scanFlowParam, inputArtifact, artifact, context);
}
}
} catch (Exception e) {
LOG.error("Exception will executing scan.", e);
}
// we merge everything that way produced
inputArtifact.merge(artifact);
}
// NOTE: consider moving this to a separate class
public static class ScanSupport {
private static String[] TRANSFER_KEY_SET = new String[] {
Constants.KEY_DERIVED_LICENSES,
Constants.KEY_DERIVED_MARKERS,
Constants.KEY_DERIVED_NOTICE_PARAMETER,
Constants.KEY_EXTRACTED_COPYRIGHTS_SCANCODE,
Constants.KEY_EXTRACTED_AUTHORS_SCANCODE,
Constants.KEY_ANALYSIS_PATH
};
final MetaScanSupport metaScanSupport;
final SourceSegmentationSupport sourceSegmentationSupport;
final ScanCodeSupport scanCodeSupport;
final CopyrightSegmentationSupport copyrightSegmentationSupport;
public ScanSupport(ScanFlowParam scanFlowParam) {
final NormalizationMetaData normalizationMetaData = scanFlowParam.getNormalizationMetaData();
final PropertyProvider propertyProvider = scanFlowParam.getPropertyProvider();
metaScanSupport = new MetaScanSupport(normalizationMetaData, propertyProvider);
sourceSegmentationSupport = new SourceSegmentationSupport(normalizationMetaData, propertyProvider);
scanCodeSupport = new ScanCodeSupport(propertyProvider);
copyrightSegmentationSupport = new CopyrightSegmentationSupport(normalizationMetaData, propertyProvider);
}
public void scan(Artifact artifact, File artifactAnalysisDir, String context) throws IOException {
metaScanSupport.execute(artifact, artifactAnalysisDir, context);
scanCodeSupport.execute(artifact, artifactAnalysisDir);
sourceSegmentationSupport.runSegmentation(artifact, artifactAnalysisDir);
copyrightSegmentationSupport.runSegmentation(artifact, artifactAnalysisDir);
}
private void cloneAndScan(ScanFlowParam scanFlowParam, Artifact inputArtifact, String partPrefix, Artifact artifact, String context) {
try {
// isolate substream scan
final Artifact clone = new Artifact(inputArtifact);
// to not implicitly copy existing information clear the attributes, we are going to copy after the scan.
Arrays.stream(TRANSFER_KEY_SET).forEach(a -> clone.set(a, null));
final Artifact result = scanArtifactPath(scanFlowParam, clone,
inputArtifact.get(partPrefix + " - Path"), null, partPrefix, context);
Arrays.stream(TRANSFER_KEY_SET).forEach(a -> transferAttributes(result, artifact, partPrefix, a));
} catch (Exception e) {
LOG.error("Exception during scan of [{}].", inputArtifact.deriveQualifier(), e);
}
}
private void transferAttributes(Artifact clone, Artifact artifact, String partPrefix, String key) {
artifact.set(partPrefix + " - " + key, clone.get(key));
}
private Artifact scanArtifactPath(final ScanFlowParam scanFlowParam,
final Artifact inputArtifact,
final String archivePath,
final String archiveContentCheckSumIfAvailable, final String part, String context) {
final Artifact artifact = new Artifact(inputArtifact);
// the archive path is a mandatory input to perform scanning; if the archive part does not exist the part
// is regarded not present
if (StringUtils.notEmpty(archivePath)) {
// NOTE: the archivePath may not exist (synchronized analysis folders); so we continue ... [*]
final File archiveFile = new File(archivePath);
final String name = archiveFile.getName();
// NOTE: the checksum is used from the inventory; not from the file itself
final File artifactAnalysisDir = deriveAnalysisFolder(
scanFlowParam.getAnalysisBaseDir(), archiveFile, archiveContentCheckSumIfAvailable);
// manage attributes before scan (the invoked scan uses the KEY_ARCHIVE_PATH as input)
artifact.setId(name);
artifact.set(KEY_ARCHIVE_PATH, archivePath);
artifact.set(Constants.KEY_ANALYSIS_PATH, artifactAnalysisDir.getAbsolutePath());
LOG.info("{} Scanning ({}) [{}]", context, part, artifact.get(KEY_ARCHIVE_PATH));
LOG.info("{} Scanning ({}) in [{}]", context, part, artifact.get(Constants.KEY_ANALYSIS_PATH));
// the scan part operates on shared resources; we synchronize on context + analysis path
final String semaphore = getClass().getName() + artifactAnalysisDir.getAbsolutePath();
synchronized (semaphore.intern()) {
// ensure the artifact was extracted or copied to the analysis folder
try {
// NOTE: validation of existence is responsibility of the unpackArchiveOrCopyFile() method
ArchiveUtils.unpackArchiveOrCopyFile(archiveFile, artifactAnalysisDir);
// [*] ... after all the artifactAnalysisDir is expected; if not we cannot scan
FileUtils.validateExists(artifactAnalysisDir);
scan(artifact, artifactAnalysisDir, context);
} catch (Exception e) {
artifact.append("Errors", e.getMessage(), "\n");
}
}
// manage "Analysis Path"
artifact.set(Constants.KEY_ANALYSIS_PATH, artifactAnalysisDir.getAbsolutePath());
}
return artifact;
}
private File deriveAnalysisFolder(File analysisBaseDir, File partFile, String archiveContentChecksumIfAvailable) {
final String filename = partFile.getName();
final File organizedTargetDir = deriveAnalysisFolder(analysisBaseDir, filename);
final String checksum = StringUtils.hasText(archiveContentChecksumIfAvailable) ?
archiveContentChecksumIfAvailable : FileUtils.computeChecksum(partFile);
return new File(organizedTargetDir, deriveUnpackTargetFolder(filename, checksum));
}
public File deriveAnalysisFolder(File targetBaseDir, String filename) {
// convert everything to lowercase
String filenameLowerCase = filename.toLowerCase().trim();
// we use a regular expression to strip of everything that looks like a version (and the remaining string)
String noVersionFilename = filenameLowerCase.replaceFirst("-[0-9]+.*", "");
if (noVersionFilename.isEmpty()) {
noVersionFilename = filenameLowerCase;
}
noVersionFilename = noVersionFilename.replace("/", "_");
int length = noVersionFilename.length();
String level1 = "[" + noVersionFilename.substring(0, Math.min(length, 1)) + "]";
String level2 = "[" + noVersionFilename.substring(0, Math.min(length, 2)) + "]";
String level3 = "[" + noVersionFilename + "]";
return new File(new File(new File(targetBaseDir, level1), level2), level3);
}
public static String deriveUnpackTargetFolder(String filename, String checksum) {
filename = filename.replace("/", "_");
return "[" + filename + "-" + checksum + "]";
}
private void scanObservedArtifact(ScanFlowParam scanFlowParam, Artifact inputArtifact, Artifact artifact, String context) {
try {
// for a composite Archive Path the content checksum is provided instead of the checksum of the "artificial"
// compressed artifact; here we operate directly on artifact and do not need to merge afterward
final Artifact result = scanArtifactPath(scanFlowParam, artifact,
inputArtifact.get(KEY_ARCHIVE_PATH), inputArtifact.get(KEY_CONTENT_CHECKSUM), "observed", context);
// NOTE: we need to ensure that we do not mix up cases and are duplicating scans due to different
// analysis paths and checksums used. Observed artifacts and original artifacts have to tbe separated.
artifact.merge(result);
} catch (Exception e) {
LOG.error("Exception will executing scan.", e);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy