All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.artifact.analysis.flow.ScanFlow Maven / Gradle / Ivy

There is a newer version: 0.126.0
Show newest version
/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.artifact.analysis.flow;

import com.metaeffekt.artifact.analysis.flow.notice.GenerateNoticeParameterFlow;
import com.metaeffekt.artifact.analysis.flow.notice.GenerateNoticeParameterFlowParam;
import com.metaeffekt.artifact.analysis.metascan.Constants;
import com.metaeffekt.artifact.analysis.metascan.CopyrightSegmentationSupport;
import com.metaeffekt.artifact.analysis.metascan.MetaScanSupport;
import com.metaeffekt.artifact.analysis.metascan.SourceSegmentationSupport;
import com.metaeffekt.artifact.analysis.model.PropertyProvider;
import com.metaeffekt.artifact.analysis.scancode.ScanCodeSupport;
import com.metaeffekt.artifact.analysis.utils.ArchiveUtils;
import com.metaeffekt.artifact.analysis.utils.ConcurrentUtils;
import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.artifact.analysis.utils.StringUtils;
import com.metaeffekt.artifact.terms.model.NormalizationMetaData;
import com.metaeffekt.flow.common.AbstractFlow;
import com.metaeffekt.resource.InventoryResource;
import org.joda.time.DateTime;
import org.metaeffekt.core.inventory.processor.model.Artifact;
import org.metaeffekt.core.inventory.processor.model.Inventory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import static com.metaeffekt.artifact.analysis.metascan.Constants.KEY_ARCHIVE_PATH;
import static com.metaeffekt.artifact.analysis.metascan.Constants.KEY_CONTENT_CHECKSUM;

public class ScanFlow extends AbstractFlow {

    private static final Logger LOG = LoggerFactory.getLogger(ScanFlow.class);

    public ScanFlowResult process(ScanFlowParam scanFlowParam) throws IOException {
        final ScanSupport scanSupport = new ScanSupport(scanFlowParam);
        return process(scanFlowParam, scanSupport);
    }

    private ScanFlowResult process(ScanFlowParam scanFlowParam, ScanSupport scanSupport) throws IOException {
        final Inventory inventory = scanFlowParam.getInventoryResource().getInventory();

        final int min = Math.min(scanFlowParam.getScanThreads(), inventory.getArtifacts().size());
        final ExecutorService executor = Executors.newFixedThreadPool(Math.max(min, 1));

        final Map exceptions = new ConcurrentHashMap<>();

        // scan the provided artifacts
        int index = 1;
        int size = inventory.getArtifacts().size();
        for (final Artifact artifact : inventory.getArtifacts()) {
            String context = String.format("[%s/%s]", index++, size);
            executor.submit(() -> scanArtifact(scanFlowParam, scanSupport, artifact, context));
        }

        ConcurrentUtils.awaitTerminationOrCancelOnException(executor, exceptions);

        final InventoryResource inventoryResource = InventoryResource.fromInventory(inventory);

        generateNoticeParameters(scanFlowParam, inventoryResource);

        final File resultTargetDir = scanFlowParam.getResultTargetDir();
        if (resultTargetDir != null) {
            FileUtils.forceMkDirQuietly(resultTargetDir);
            final File resultInventory = new File(resultTargetDir, DateTime.now().getMillis() + ".xlsx");
            inventoryResource.sync(resultInventory);
        }

        return new ScanFlowResult(inventory);
    }

    private void generateNoticeParameters(ScanFlowParam scanFlowParam, InventoryResource inventoryResource) {
        if (scanFlowParam.getPropertyProvider().isProperty("analyze.noticeparameter.generate", "true", "true")) {

            // NOTE: since we have access to the scan results, we derive the notice parameter here

            final GenerateNoticeParameterFlowParam generateNoticeParameterFlowParam =
                    GenerateNoticeParameterFlowParam.builder()
                        .inventoryResource(inventoryResource)
                        .normalizationMetaData(scanFlowParam.getNormalizationMetaData())
                        .build();

            final GenerateNoticeParameterFlow generateNoticeParameterFlow = new GenerateNoticeParameterFlow();
            generateNoticeParameterFlow.process(generateNoticeParameterFlowParam);
        }
    }

    private void scanArtifact(ScanFlowParam scanFlowParam, ScanSupport scanSupport, Artifact inputArtifact, String context) {

        // copy input artifact to isolate modifications from the provided instance
        final Artifact artifact = new Artifact(inputArtifact);

        try {

            // analyze is currently a global switch
            if (scanFlowParam.getPropertyProvider().isProperty("analyze", "true", "true")) {

                if (scanFlowParam.getPropertyProvider().isProperty("analyze.binary.artifact", "true", "true")) {
                    scanSupport.cloneAndScan(scanFlowParam, inputArtifact, "Binary Artifact", artifact, context);
                }

                if (scanFlowParam.getPropertyProvider().isProperty("analyze.source.artifact", "true", "true")) {
                    scanSupport.cloneAndScan(scanFlowParam, inputArtifact, "Source Artifact", artifact, context);
                }

                if (scanFlowParam.getPropertyProvider().isProperty("analyze.source.archive", "true", "true")) {
                    scanSupport.cloneAndScan(scanFlowParam, inputArtifact, "Source Archive", artifact, context);
                }

                if (scanFlowParam.getPropertyProvider().isProperty("analyze.descriptor", "true", "true")) {
                    scanSupport.cloneAndScan(scanFlowParam, inputArtifact, "Descriptor", artifact, context);
                }

                if (scanFlowParam.getPropertyProvider().isProperty("analyze.observed.artifact", "true", "true")) {
                    scanSupport.scanObservedArtifact(scanFlowParam, inputArtifact, artifact, context);
                }
            }

        } catch (Exception e) {
            LOG.error("Exception will executing scan.", e);
        }

        // we merge everything that way produced
        inputArtifact.merge(artifact);
    }

    // NOTE: consider moving this to a separate class
    public static class ScanSupport {

        private static String[] TRANSFER_KEY_SET = new String[] {
            Constants.KEY_DERIVED_LICENSES,
            Constants.KEY_DERIVED_MARKERS,
            Constants.KEY_DERIVED_NOTICE_PARAMETER,

            Constants.KEY_EXTRACTED_COPYRIGHTS_SCANCODE,
            Constants.KEY_EXTRACTED_AUTHORS_SCANCODE,

            Constants.KEY_ANALYSIS_PATH
        };

        final MetaScanSupport metaScanSupport;
        final SourceSegmentationSupport sourceSegmentationSupport;
        final ScanCodeSupport scanCodeSupport;
        final CopyrightSegmentationSupport copyrightSegmentationSupport;

        public ScanSupport(ScanFlowParam scanFlowParam) {
            final NormalizationMetaData normalizationMetaData = scanFlowParam.getNormalizationMetaData();
            final PropertyProvider propertyProvider = scanFlowParam.getPropertyProvider();

            metaScanSupport = new MetaScanSupport(normalizationMetaData, propertyProvider);
            sourceSegmentationSupport = new SourceSegmentationSupport(normalizationMetaData, propertyProvider);
            scanCodeSupport = new ScanCodeSupport(propertyProvider);
            copyrightSegmentationSupport = new CopyrightSegmentationSupport(normalizationMetaData, propertyProvider);
        }

        public void scan(Artifact artifact, File artifactAnalysisDir, String context) throws IOException {
            metaScanSupport.execute(artifact, artifactAnalysisDir, context);
            scanCodeSupport.execute(artifact, artifactAnalysisDir);
            sourceSegmentationSupport.runSegmentation(artifact, artifactAnalysisDir);
            copyrightSegmentationSupport.runSegmentation(artifact, artifactAnalysisDir);
        }

        private void cloneAndScan(ScanFlowParam scanFlowParam, Artifact inputArtifact, String partPrefix, Artifact artifact, String context) {
            try {
                // isolate substream scan
                final Artifact clone = new Artifact(inputArtifact);

                // to not implicitly copy existing information clear the attributes, we are going to copy after the scan.
                Arrays.stream(TRANSFER_KEY_SET).forEach(a -> clone.set(a, null));

                final Artifact result = scanArtifactPath(scanFlowParam, clone,
                        inputArtifact.get(partPrefix + " - Path"), null, partPrefix, context);

                Arrays.stream(TRANSFER_KEY_SET).forEach(a -> transferAttributes(result, artifact, partPrefix, a));
            } catch (Exception e) {
                LOG.error("Exception during scan of [{}].", inputArtifact.deriveQualifier(), e);
            }
        }

        private void transferAttributes(Artifact clone, Artifact artifact, String partPrefix, String key) {
            artifact.set(partPrefix + " - " + key, clone.get(key));
        }

        private Artifact scanArtifactPath(final ScanFlowParam scanFlowParam,
                                          final Artifact inputArtifact,
                                          final String archivePath,
                                          final String archiveContentCheckSumIfAvailable, final String part, String context) {

            final Artifact artifact = new Artifact(inputArtifact);

            // the archive path is a mandatory input to perform scanning; if the archive part does not exist the part
            // is regarded not present
            if (StringUtils.notEmpty(archivePath)) {

                // NOTE: the archivePath may not exist (synchronized analysis folders); so we continue ... [*]

                final File archiveFile = new File(archivePath);
                final String name = archiveFile.getName();

                // NOTE: the checksum is used from the inventory; not from the file itself
                final File artifactAnalysisDir = deriveAnalysisFolder(
                        scanFlowParam.getAnalysisBaseDir(), archiveFile, archiveContentCheckSumIfAvailable);

                // manage attributes before scan (the invoked scan uses the KEY_ARCHIVE_PATH as input)
                artifact.setId(name);
                artifact.set(KEY_ARCHIVE_PATH, archivePath);
                artifact.set(Constants.KEY_ANALYSIS_PATH, artifactAnalysisDir.getAbsolutePath());

                LOG.info("{} Scanning ({}) [{}]", context, part, artifact.get(KEY_ARCHIVE_PATH));
                LOG.info("{} Scanning ({}) in [{}]", context, part, artifact.get(Constants.KEY_ANALYSIS_PATH));

                // the scan part operates on shared resources; we synchronize on context + analysis path
                final String semaphore = getClass().getName() + artifactAnalysisDir.getAbsolutePath();
                synchronized (semaphore.intern()) {
                    // ensure the artifact was extracted or copied to the analysis folder
                    try {
                        // NOTE: validation of existence is responsibility of the unpackArchiveOrCopyFile() method
                        ArchiveUtils.unpackArchiveOrCopyFile(archiveFile, artifactAnalysisDir);

                        // [*] ... after all the artifactAnalysisDir is expected; if not we cannot scan
                        FileUtils.validateExists(artifactAnalysisDir);

                        scan(artifact, artifactAnalysisDir, context);
                    } catch (Exception e) {
                        artifact.append("Errors", e.getMessage(), "\n");
                    }
                }

                // manage "Analysis Path"
                artifact.set(Constants.KEY_ANALYSIS_PATH, artifactAnalysisDir.getAbsolutePath());
            }

            return artifact;
        }

        private File deriveAnalysisFolder(File analysisBaseDir, File partFile, String archiveContentChecksumIfAvailable) {
            final String filename = partFile.getName();
            final File organizedTargetDir = deriveAnalysisFolder(analysisBaseDir, filename);

            final String checksum = StringUtils.hasText(archiveContentChecksumIfAvailable) ?
                    archiveContentChecksumIfAvailable : FileUtils.computeChecksum(partFile);

            return new File(organizedTargetDir, deriveUnpackTargetFolder(filename, checksum));
        }

        public File deriveAnalysisFolder(File targetBaseDir, String filename) {
            // convert everything to lowercase
            String filenameLowerCase = filename.toLowerCase().trim();

            // we use a regular expression to strip of everything that looks like a version (and the remaining string)
            String noVersionFilename = filenameLowerCase.replaceFirst("-[0-9]+.*", "");

            if (noVersionFilename.isEmpty()) {
                noVersionFilename = filenameLowerCase;
            }

            noVersionFilename = noVersionFilename.replace("/", "_");

            int length = noVersionFilename.length();
            String level1 = "[" + noVersionFilename.substring(0, Math.min(length, 1)) + "]";
            String level2 = "[" + noVersionFilename.substring(0, Math.min(length, 2)) + "]";
            String level3 = "[" + noVersionFilename + "]";

            return new File(new File(new File(targetBaseDir, level1), level2), level3);
        }

        public static String deriveUnpackTargetFolder(String filename, String checksum) {
            filename = filename.replace("/", "_");
            return "[" + filename + "-" + checksum + "]";
        }

        private void scanObservedArtifact(ScanFlowParam scanFlowParam, Artifact inputArtifact, Artifact artifact, String context) {
            try {
                // for a composite Archive Path the content checksum is provided instead of the checksum of the "artificial"
                // compressed artifact; here we operate directly on artifact and do not need to merge afterward
                final Artifact result = scanArtifactPath(scanFlowParam, artifact,
                        inputArtifact.get(KEY_ARCHIVE_PATH), inputArtifact.get(KEY_CONTENT_CHECKSUM), "observed", context);

                // NOTE: we need to ensure that we do not mix up cases and are duplicating scans due to different
                // analysis paths and checksums used. Observed artifacts and original artifacts have to tbe separated.

                artifact.merge(result);
            } catch (Exception e) {
                LOG.error("Exception will executing scan.", e);
            }
        }

    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy