All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.artifact.extractors.configuration.DirectoryScanExtractorConfiguration Maven / Gradle / Ivy

/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.artifact.extractors.configuration;

import com.metaeffekt.artifact.analysis.metascan.Constants;
import com.metaeffekt.artifact.analysis.utils.*;
import org.apache.commons.lang3.Validate;
import org.metaeffekt.core.inventory.processor.filescan.FileSystemScanConstants;
import org.metaeffekt.core.inventory.processor.model.Artifact;
import org.metaeffekt.core.inventory.processor.model.ComponentPatternData;
import org.metaeffekt.core.inventory.processor.model.Inventory;
import org.metaeffekt.core.inventory.processor.patterns.ComponentPatternProducer;
import org.metaeffekt.core.inventory.processor.reader.InventoryReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.util.*;

import static com.metaeffekt.artifact.analysis.metascan.Constants.KEY_CONTENT_CHECKSUM;

public class DirectoryScanExtractorConfiguration extends AbstractExtractorConfiguration {

    private static final Logger LOG = LoggerFactory.getLogger(DirectoryScanExtractorConfiguration.class);

    // FIXME: create InventoryResource that hides the file/object
    final private File referenceInventoryFile;

    final private Inventory referenceInventory;

    final private File scanBaseDir;

    public DirectoryScanExtractorConfiguration(String id, File referenceInventoryFile, File scanResultInventoryFile, File scanBaseDir) {
        super(id, scanResultInventoryFile);

        this.scanBaseDir = scanBaseDir;
        this.referenceInventoryFile = referenceInventoryFile;
        this.referenceInventory = null;
    }

    public DirectoryScanExtractorConfiguration(String id, Inventory referenceInventory, File scanResultInventoryFile, File scanBaseDir) {
        super(id, scanResultInventoryFile);

        this.scanBaseDir = scanBaseDir;
        this.referenceInventory = referenceInventory;
        this.referenceInventoryFile = null;
    }

    @Override
    public void contribute(File targetDir, Inventory aggregatedInventory) throws IOException {

        // load reference inventory
        final Inventory referenceInventory = loadReferenceInventory();

        // load result inventory
        final Inventory resultInventory = loadResultInventory();

        // initialize component pattern map
        final Map> qualifierToComponentPatternMap = new HashMap<>();

        // contribute component pattern from reference inventory
        contributeReferenceComponentPatterns(referenceInventory, qualifierToComponentPatternMap);

        // contribute project component patterns (no overwrite; reference has the control)
        contributeReferenceComponentPatterns(resultInventory, qualifierToComponentPatternMap);

        // use an intermediate folder to aggregate all files matching the component pattern
        final File tmpFolder = initializeTmpFolder(targetDir);

        final Set removableArtifacts = new HashSet<>();

        // iterate extracted artifacts and match with component patterns
        int i = 1;
        for (Artifact artifact : resultInventory.getArtifacts()) {
            LOG.info("Processing artifact {}/{}: {}", i++, resultInventory.getArtifacts().size(), artifact.deriveQualifier());

            aggregatedInventory.getArtifacts().add(artifact);

            // check artifact is covered by a component pattern
            String componentName = artifact.getComponent();
            String componentVersion = artifact.getVersion();
            String componentPart = artifact.getId();

            // identify matching component patterns (this may overlap with real artifacts)
            final ComponentPatternMatches componentPatternMatches = findComponentPatternMatches(
                    qualifierToComponentPatternMap, componentName, componentVersion, componentPart);

            // iterate found component patterns for artifact
            if (componentPatternMatches.list != null) {
                contributeComponentPatternCoveredFiles(artifact, componentPatternMatches, tmpFolder);
            } else {
                
                // deep scanned artifacts are not further scanned; it would be good to get an aggregated view however
                // NOTE: in the future we should collect the delta
                if (ArtifactUtils.hasScanClassification(artifact)) {
                    manageArtifactsWithScanClassification(artifact, targetDir, tmpFolder, removableArtifacts);

                    // skip other process parts
                    continue;
                }

                final Set files = artifact.getProjects();
                for (String file : files) {
                    final File srcFile = new File(getExtractedFilesBaseDir(), file);
                    final String path = FileUtils.asRelativePath(getExtractedFilesBaseDir(), srcFile);
                    final File dstFile = new File(targetDir, path);

                    if (srcFile.exists() && !srcFile.isDirectory()) {
                        FileUtils.copyFile(srcFile, dstFile);
                        artifact.set("Archive Path", dstFile.getAbsolutePath());
                    }
                }
            }
        }

        // now zip the files to one archive to be consumed by the scanner
        compress(targetDir, resultInventory, qualifierToComponentPatternMap, tmpFolder, removableArtifacts);

        // compensate artifacts of DirectoryScan (matched ComponentPatterns; no files)
        resultInventory.getArtifacts().removeAll(removableArtifacts);

        checkCompletenessOfArchivePath(resultInventory);

        // NOTE: we keep the tmp folder to be able to debug the process; the tmp folder will be deleted by the next run
        // this could be done by simply renaming the directory
    }

    private void manageArtifactsWithScanClassification(Artifact artifact, File targetDir, File tmpFolder, Set removableArtifacts) throws IOException {
        final Set relativePaths = getRelativePaths(artifact);
        final String path = relativePaths.size() > 0 ? relativePaths.iterator().next() : null;
        if (path != null) {
            final File file = new File(getExtractedFilesBaseDir(), path);
            if (file.exists()) {
                // this case should be covered anyways by normal processing
            } else {
                // FIXME: extract helper function in core
                File unpackedFile = new File(file.getParentFile(), "[" + file.getName() + "]");
                if (unpackedFile.exists()) {
                    // archive the unpacked folder
                    final File targetZipFile = new File(targetDir, artifact.getId() + "_scanned.zip");
                    FileUtils.zipAnt(unpackedFile, targetZipFile);

                    // mark as covered/removable
                    if (!targetZipFile.exists()) {
                        removableArtifacts.add(artifact);
                        return;
                    }

                    // point Archive Path to zip
                    artifact.set(Constants.KEY_ARCHIVE_PATH, targetZipFile.getAbsolutePath());

                    // compute content checksum of original folder and set on artifact
                    final File contentChecksumFile = new File(tmpFolder, targetZipFile.getName() + ".content.md5");
                    FileUtils.createDirectoryContentChecksumFile(unpackedFile, contentChecksumFile);
                    final String contentCheckSum = FileUtils.computeChecksum(contentChecksumFile);
                    artifact.set(KEY_CONTENT_CHECKSUM, contentCheckSum);
                } else {
                    artifact.append("Errors", "Cannot extract files from extraction project.", ",");
                }
            }
        }
    }

    private void contributeComponentPatternCoveredFiles(Artifact artifact, ComponentPatternMatches componentPatternMatches, File tmpFolder) throws IOException {
        final File targetFolder = new File(tmpFolder, componentPatternMatches.key);
        FileUtils.forceMkdir(targetFolder);

        // aggregate all covered files into one directory
        for (ComponentPatternData cpd : componentPatternMatches.list) {
            final String includes = cpd.get(ComponentPatternData.Attribute.INCLUDE_PATTERN);
            final String excludes = cpd.get(ComponentPatternData.Attribute.EXCLUDE_PATTERN);

            // use the projects attribute to pre-select the folder to be scanned; can be multiple
            final Set componentBaseDirs = getRelativePaths(artifact);

            for (final String baseDir : componentBaseDirs) {

                // derive absolute componentBaseDir
                final File componentBaseDir = new File(getExtractedFilesBaseDir(), baseDir);

                final String fileName = componentBaseDir.getName();
                if (fileName.startsWith("[")) {
                    // this may be a scanned artifact; if not existing we need to unpack the file again
                    if (!componentBaseDir.exists()) {
                        File archiveFile = new File(componentBaseDir.getParentFile(), fileName.substring(1, fileName.length() - 1));
                        ArchiveUtils.unpackIfPossible(archiveFile, componentBaseDir, new ArrayList<>());
                    }
                }

                // check the directory (now) exists and can be used to further evaluate the component patterns
                if (componentBaseDir.exists()) {

                    // aggregate matching files to dedicated folders
                    if (LOG.isTraceEnabled()) {
                        LOG.trace("Scanning {} including {} excluding {}", componentBaseDir, includes, excludes);
                    }

                    // differentiate directories and single files
                    if (componentBaseDir.isDirectory()) {
                        aggregateComponentFiles(cpd, getExtractedFilesBaseDir(), componentBaseDir, includes, excludes, targetFolder);
                    } else {
                        // the component pattern matches a single file; this is what we copy
                        final File targetFile = new File(targetFolder, componentBaseDir.getName());
                        FileUtils.copyFile(componentBaseDir, targetFile);
                    }
                }
            }
        }
    }

    private void aggregateComponentFiles(ComponentPatternData cpd, File baseDir, File componentBaseDir, String includes, String excludes, File targetFolder) throws IOException {

        // split includes/excludes in relative and absolute paths
        final ComponentPatternProducer.NormalizedPatternSet includePatternSet = ComponentPatternProducer.normalizePattern(includes);
        final ComponentPatternProducer.NormalizedPatternSet excludePatternSet = ComponentPatternProducer.normalizePattern(excludes);

        final Set relativizedIncludePatterns = relativizePatterns(includePatternSet.absolutePatterns);
        final Set relativizedExcludePatterns = relativizePatterns(excludePatternSet.absolutePatterns);

        int count = 0;

        if (includePatternSet.relativePatterns.size() > 0) {
            final String[] relativeCoveredFiles = FileUtils.scanDirectoryForFiles(componentBaseDir,
                    toArray(includePatternSet.relativePatterns), toArray(excludePatternSet.relativePatterns));
            aggregateFiles(componentBaseDir, relativeCoveredFiles, targetFolder);
            count += relativeCoveredFiles.length;
        }

        if (relativizedIncludePatterns.size() > 0) {
            final String[] absoluteCoveredFiles = FileUtils.scanDirectoryForFiles(baseDir,
                    toArray(relativizedIncludePatterns), toArray(relativizedExcludePatterns));
            aggregateFiles(baseDir, absoluteCoveredFiles, targetFolder);
            count += absoluteCoveredFiles.length;
        }

        if (count == 0) {
            // FIXME: activate exception or at least log a warning; perhaps control by parameter
//            throw new IllegalStateException("Identified component pattern does not match any file: " + cpd.deriveQualifier());
        }

    }

    private void aggregateFiles(File baseDir, String[] coveredFiles, File targetFolder) throws IOException {
        for (String file : coveredFiles) {
            final File targetFile = new File(targetFolder, file);
            // do not overwrite existing files
            if (!targetFile.exists()) {
                final File srcFile = new File(baseDir, file);
                FileUtils.copyFile(srcFile, targetFile);
            }
        }
    }

    private String[] toArray(Set patternSet) {
        if (patternSet.isEmpty()) return null;
        return patternSet.toArray(new String[0]);
    }

    private Set relativizePatterns(Set patternSet) {
        final Set relativizedPatterns = new HashSet<>();
        if (patternSet != null) {
            for (String absoluteInclude : patternSet) {
                if (absoluteInclude.startsWith("/")) {
                    relativizedPatterns.add(absoluteInclude.substring(1));
                } else {
                    throw new IllegalStateException("Absolute normalized pattern does not start with '/': " + absoluteInclude);
                }
            }
        }
        return relativizedPatterns;
    }

    private static File initializeTmpFolder(File targetDir) throws IOException {
        final File tmpFolder = new File(targetDir.getParentFile(), ".tmp");
        if (tmpFolder.exists()) {
            FileUtils.deleteDirectory(tmpFolder);
        }
        tmpFolder.mkdirs();
        return tmpFolder;
    }

    private Inventory loadResultInventory() throws IOException {
        final File scanResultInventoryFile = getResultInventoryFile();
        FileUtils.validateExists(scanResultInventoryFile);
        final Inventory inventory = new InventoryReader().readInventory(scanResultInventoryFile);
        return inventory;
    }

    private Inventory loadReferenceInventory() throws IOException {
        final Inventory referenceInventory;
        if (referenceInventoryFile != null) {
            FileUtils.validateExists(referenceInventoryFile);
            if (referenceInventoryFile.isDirectory()) {
                referenceInventory = InventoryUtils.readInventory(referenceInventoryFile, "*.xls");
            } else {
                referenceInventory = new InventoryReader().readInventory(referenceInventoryFile);
            }
        } else {
            Validate.notNull(this.referenceInventory);
            referenceInventory = this.referenceInventory;
        }
        return referenceInventory;
    }

    private void contributeReferenceComponentPatterns(Inventory referenceInventory, Map> componentPatternMap) {
        for (ComponentPatternData cpd : referenceInventory.getComponentPatternData()) {
            final String key = deriveMapQualifier(cpd);

            List list = componentPatternMap.computeIfAbsent(key, c -> new ArrayList<>());

            componentPatternMap.put(key, list);

            // also include fallback mapping (in case component name does not match)
            componentPatternMap.put(deriveFallbackMapQualifier(
                    cpd.get(ComponentPatternData.Attribute.COMPONENT_PART),
                    cpd.get(ComponentPatternData.Attribute.COMPONENT_VERSION)), list);

            list.add(cpd);
        }
    }

    private static Set getRelativePaths(Artifact artifact) {
        // use information from projects (relative to original scanBaseDir)
        final Set componentBaseDirs = new HashSet<>(artifact.getProjects());

        // in case information is not available fall back to ATTRIBUTE_KEY_ARTIFACT_PATH (also relative to original scanBaseDir)
        if (componentBaseDirs.isEmpty()) {
            final String artifactPath = artifact.get(FileSystemScanConstants.ATTRIBUTE_KEY_ARTIFACT_PATH);
            if (!StringUtils.isEmpty(artifactPath)) {
                componentBaseDirs.add(artifactPath);
            }
        }
        return componentBaseDirs;
    }

    private File getExtractedFilesBaseDir() {
        return scanBaseDir;
    }

    private ComponentPatternMatches findComponentPatternMatches(Map> componentPatternMap, String componentName, String componentVersion, String componentPart) {
        ComponentPatternMatches componentPatternMatch = new ComponentPatternMatches();
        componentPatternMatch.key = deriveMapQualifier(componentName, componentPart, componentVersion);
        componentPatternMatch.list = componentPatternMap.get(componentPatternMatch.key);
        if (componentPatternMatch.list == null) {
            String modulatedKey = deriveFallbackMapQualifier(componentPart, componentVersion);
            componentPatternMatch.list = componentPatternMap.get(modulatedKey);
        }
        return componentPatternMatch;
    }

    private static class ComponentPatternMatches {
        protected String key;
        protected List list;
    }

    private void compress(File targetDir, Inventory inventory, Map> componentPatternMap,
                          File tmpFolder, Set removableArtifacts) throws IOException {
        int i = 1;
        for (Artifact artifact : inventory.getArtifacts()) {

            LOG.info("Compressing artifact {}/{}: {}", i++, inventory.getArtifacts().size(), artifact.deriveQualifier());

            // check artifact is covered by a component pattern
            String componentName = artifact.getComponent();
            String componentVersion = artifact.getVersion();
            String componentPart = artifact.getId();

            ComponentPatternMatches componentPatternMatch = findComponentPatternMatches(componentPatternMap, componentName, componentVersion, componentPart);

            if (componentPatternMatch.list != null) {
                // now package the aggregated folder
                final File targetFolder = new File(tmpFolder, componentPatternMatch.key);
                final File targetZipFile = new File(targetDir, componentPatternMatch.key + ".zip");

                FileUtils.zipAnt(targetFolder, targetZipFile);
                if (!targetZipFile.exists()) {
                    removableArtifacts.add(artifact);
                    continue;
                }
                artifact.set(Constants.KEY_ARCHIVE_PATH, targetZipFile.getAbsolutePath());

                final File contentChecksumFile = new File(tmpFolder, targetZipFile.getName() + ".content.md5");
                FileUtils.createDirectoryContentChecksumFile(targetFolder, contentChecksumFile);

                // set the content checksum
                final String contentChecksum = FileUtils.computeChecksum(contentChecksumFile);
                artifact.set(KEY_CONTENT_CHECKSUM, contentChecksum);
            }
        }
    }

    /**
     * There are multiple reasons for empty archive paths:
     * 
    *
  • * The relevant component pattern is not included in the reference inventory, This is a configuration issue. *
  • *
  • * There is no content available for the artifact. E.g. a logical package configuration without physical files. * This may require provision of download urls or additional content (however at a later stage) *
  • *
  • * Artifacts that have been unpacked for scanning the content (classification contains 'scan') *
  • *
* * @param inventory The inventory to check for KEY_ARCHIVE_PATH completeness. */ private void checkCompletenessOfArchivePath(Inventory inventory) { for (final Artifact artifact : inventory.getArtifacts()) { String archivePath = artifact.get(Constants.KEY_ARCHIVE_PATH); // deep scanned artifacts are not further scanned. It would be good to get an aggregated view however if (ArtifactUtils.hasScanClassification(artifact)) continue; if (!StringUtils.hasText(archivePath)) { // only report issue, when we have a checksum; implicitly excluded shaded subcomponents from being reported if (StringUtils.hasText(artifact.getChecksum())) { LOG.warn("Artifact {} does not have an archive path! " + "Validate that the component patterns for this process are complete.", artifact); } } } } private String deriveFallbackMapQualifier(String componentPart, String componentVersion) { final StringBuilder sb = new StringBuilder(); sb.append(componentPart); if (StringUtils.notEmpty(componentVersion)) { sb.append("-"); sb.append(componentVersion); } return sb.toString(); } private String deriveMapQualifier(String componentName, String componentPart, String componentVersion) { final StringBuilder sb = new StringBuilder(); if (StringUtils.notEmpty(componentName)) { sb.append(componentName).append("-"); } sb.append(componentPart); if (StringUtils.notEmpty(componentVersion)) { sb.append("-"); sb.append(componentVersion); } return sb.toString(); } private String deriveMapQualifier(ComponentPatternData cpd) { return deriveMapQualifier( cpd.get(ComponentPatternData.Attribute.COMPONENT_NAME), cpd.get(ComponentPatternData.Attribute.COMPONENT_PART), cpd.get(ComponentPatternData.Attribute.COMPONENT_VERSION)); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy