com.metaeffekt.artifact.extractors.configuration.DirectoryScanExtractorConfiguration Maven / Gradle / Ivy
/*
* Copyright 2021-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.metaeffekt.artifact.extractors.configuration;
import com.metaeffekt.artifact.analysis.metascan.Constants;
import com.metaeffekt.artifact.analysis.utils.*;
import org.apache.commons.lang3.Validate;
import org.metaeffekt.core.inventory.processor.filescan.FileSystemScanConstants;
import org.metaeffekt.core.inventory.processor.model.Artifact;
import org.metaeffekt.core.inventory.processor.model.ComponentPatternData;
import org.metaeffekt.core.inventory.processor.model.Inventory;
import org.metaeffekt.core.inventory.processor.patterns.ComponentPatternProducer;
import org.metaeffekt.core.inventory.processor.reader.InventoryReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.*;
import static com.metaeffekt.artifact.analysis.metascan.Constants.KEY_CONTENT_CHECKSUM;
public class DirectoryScanExtractorConfiguration extends AbstractExtractorConfiguration {
private static final Logger LOG = LoggerFactory.getLogger(DirectoryScanExtractorConfiguration.class);
// FIXME: create InventoryResource that hides the file/object
final private File referenceInventoryFile;
final private Inventory referenceInventory;
final private File scanBaseDir;
public DirectoryScanExtractorConfiguration(String id, File referenceInventoryFile, File scanResultInventoryFile, File scanBaseDir) {
super(id, scanResultInventoryFile);
this.scanBaseDir = scanBaseDir;
this.referenceInventoryFile = referenceInventoryFile;
this.referenceInventory = null;
}
public DirectoryScanExtractorConfiguration(String id, Inventory referenceInventory, File scanResultInventoryFile, File scanBaseDir) {
super(id, scanResultInventoryFile);
this.scanBaseDir = scanBaseDir;
this.referenceInventory = referenceInventory;
this.referenceInventoryFile = null;
}
@Override
public void contribute(File targetDir, Inventory aggregatedInventory) throws IOException {
// load reference inventory
final Inventory referenceInventory = loadReferenceInventory();
// load result inventory
final Inventory resultInventory = loadResultInventory();
// initialize component pattern map
final Map> qualifierToComponentPatternMap = new HashMap<>();
// contribute component pattern from reference inventory
contributeReferenceComponentPatterns(referenceInventory, qualifierToComponentPatternMap);
// contribute project component patterns (no overwrite; reference has the control)
contributeReferenceComponentPatterns(resultInventory, qualifierToComponentPatternMap);
// use an intermediate folder to aggregate all files matching the component pattern
final File tmpFolder = initializeTmpFolder(targetDir);
final Set removableArtifacts = new HashSet<>();
// iterate extracted artifacts and match with component patterns
int i = 1;
for (Artifact artifact : resultInventory.getArtifacts()) {
LOG.info("Processing artifact {}/{}: {}", i++, resultInventory.getArtifacts().size(), artifact.deriveQualifier());
aggregatedInventory.getArtifacts().add(artifact);
// check artifact is covered by a component pattern
String componentName = artifact.getComponent();
String componentVersion = artifact.getVersion();
String componentPart = artifact.getId();
// identify matching component patterns (this may overlap with real artifacts)
final ComponentPatternMatches componentPatternMatches = findComponentPatternMatches(
qualifierToComponentPatternMap, componentName, componentVersion, componentPart);
// iterate found component patterns for artifact
if (componentPatternMatches.list != null) {
contributeComponentPatternCoveredFiles(artifact, componentPatternMatches, tmpFolder);
} else {
// deep scanned artifacts are not further scanned; it would be good to get an aggregated view however
// NOTE: in the future we should collect the delta
if (ArtifactUtils.hasScanClassification(artifact)) {
manageArtifactsWithScanClassification(artifact, targetDir, tmpFolder, removableArtifacts);
// skip other process parts
continue;
}
final Set files = artifact.getProjects();
for (String file : files) {
final File srcFile = new File(getExtractedFilesBaseDir(), file);
final String path = FileUtils.asRelativePath(getExtractedFilesBaseDir(), srcFile);
final File dstFile = new File(targetDir, path);
if (srcFile.exists() && !srcFile.isDirectory()) {
FileUtils.copyFile(srcFile, dstFile);
artifact.set("Archive Path", dstFile.getAbsolutePath());
}
}
}
}
// now zip the files to one archive to be consumed by the scanner
compress(targetDir, resultInventory, qualifierToComponentPatternMap, tmpFolder, removableArtifacts);
// compensate artifacts of DirectoryScan (matched ComponentPatterns; no files)
resultInventory.getArtifacts().removeAll(removableArtifacts);
checkCompletenessOfArchivePath(resultInventory);
// NOTE: we keep the tmp folder to be able to debug the process; the tmp folder will be deleted by the next run
// this could be done by simply renaming the directory
}
private void manageArtifactsWithScanClassification(Artifact artifact, File targetDir, File tmpFolder, Set removableArtifacts) throws IOException {
final Set relativePaths = getRelativePaths(artifact);
final String path = relativePaths.size() > 0 ? relativePaths.iterator().next() : null;
if (path != null) {
final File file = new File(getExtractedFilesBaseDir(), path);
if (file.exists()) {
// this case should be covered anyways by normal processing
} else {
// FIXME: extract helper function in core
File unpackedFile = new File(file.getParentFile(), "[" + file.getName() + "]");
if (unpackedFile.exists()) {
// archive the unpacked folder
final File targetZipFile = new File(targetDir, artifact.getId() + "_scanned.zip");
FileUtils.zipAnt(unpackedFile, targetZipFile);
// mark as covered/removable
if (!targetZipFile.exists()) {
removableArtifacts.add(artifact);
return;
}
// point Archive Path to zip
artifact.set(Constants.KEY_ARCHIVE_PATH, targetZipFile.getAbsolutePath());
// compute content checksum of original folder and set on artifact
final File contentChecksumFile = new File(tmpFolder, targetZipFile.getName() + ".content.md5");
FileUtils.createDirectoryContentChecksumFile(unpackedFile, contentChecksumFile);
final String contentCheckSum = FileUtils.computeChecksum(contentChecksumFile);
artifact.set(KEY_CONTENT_CHECKSUM, contentCheckSum);
} else {
artifact.append("Errors", "Cannot extract files from extraction project.", ",");
}
}
}
}
private void contributeComponentPatternCoveredFiles(Artifact artifact, ComponentPatternMatches componentPatternMatches, File tmpFolder) throws IOException {
final File targetFolder = new File(tmpFolder, componentPatternMatches.key);
FileUtils.forceMkdir(targetFolder);
// aggregate all covered files into one directory
for (ComponentPatternData cpd : componentPatternMatches.list) {
final String includes = cpd.get(ComponentPatternData.Attribute.INCLUDE_PATTERN);
final String excludes = cpd.get(ComponentPatternData.Attribute.EXCLUDE_PATTERN);
// use the projects attribute to pre-select the folder to be scanned; can be multiple
final Set componentBaseDirs = getRelativePaths(artifact);
for (final String baseDir : componentBaseDirs) {
// derive absolute componentBaseDir
final File componentBaseDir = new File(getExtractedFilesBaseDir(), baseDir);
final String fileName = componentBaseDir.getName();
if (fileName.startsWith("[")) {
// this may be a scanned artifact; if not existing we need to unpack the file again
if (!componentBaseDir.exists()) {
File archiveFile = new File(componentBaseDir.getParentFile(), fileName.substring(1, fileName.length() - 1));
ArchiveUtils.unpackIfPossible(archiveFile, componentBaseDir, new ArrayList<>());
}
}
// check the directory (now) exists and can be used to further evaluate the component patterns
if (componentBaseDir.exists()) {
// aggregate matching files to dedicated folders
if (LOG.isTraceEnabled()) {
LOG.trace("Scanning {} including {} excluding {}", componentBaseDir, includes, excludes);
}
// differentiate directories and single files
if (componentBaseDir.isDirectory()) {
aggregateComponentFiles(cpd, getExtractedFilesBaseDir(), componentBaseDir, includes, excludes, targetFolder);
} else {
// the component pattern matches a single file; this is what we copy
final File targetFile = new File(targetFolder, componentBaseDir.getName());
FileUtils.copyFile(componentBaseDir, targetFile);
}
}
}
}
}
private void aggregateComponentFiles(ComponentPatternData cpd, File baseDir, File componentBaseDir, String includes, String excludes, File targetFolder) throws IOException {
// split includes/excludes in relative and absolute paths
final ComponentPatternProducer.NormalizedPatternSet includePatternSet = ComponentPatternProducer.normalizePattern(includes);
final ComponentPatternProducer.NormalizedPatternSet excludePatternSet = ComponentPatternProducer.normalizePattern(excludes);
final Set relativizedIncludePatterns = relativizePatterns(includePatternSet.absolutePatterns);
final Set relativizedExcludePatterns = relativizePatterns(excludePatternSet.absolutePatterns);
int count = 0;
if (includePatternSet.relativePatterns.size() > 0) {
final String[] relativeCoveredFiles = FileUtils.scanDirectoryForFiles(componentBaseDir,
toArray(includePatternSet.relativePatterns), toArray(excludePatternSet.relativePatterns));
aggregateFiles(componentBaseDir, relativeCoveredFiles, targetFolder);
count += relativeCoveredFiles.length;
}
if (relativizedIncludePatterns.size() > 0) {
final String[] absoluteCoveredFiles = FileUtils.scanDirectoryForFiles(baseDir,
toArray(relativizedIncludePatterns), toArray(relativizedExcludePatterns));
aggregateFiles(baseDir, absoluteCoveredFiles, targetFolder);
count += absoluteCoveredFiles.length;
}
if (count == 0) {
// FIXME: activate exception or at least log a warning; perhaps control by parameter
// throw new IllegalStateException("Identified component pattern does not match any file: " + cpd.deriveQualifier());
}
}
private void aggregateFiles(File baseDir, String[] coveredFiles, File targetFolder) throws IOException {
for (String file : coveredFiles) {
final File targetFile = new File(targetFolder, file);
// do not overwrite existing files
if (!targetFile.exists()) {
final File srcFile = new File(baseDir, file);
FileUtils.copyFile(srcFile, targetFile);
}
}
}
private String[] toArray(Set patternSet) {
if (patternSet.isEmpty()) return null;
return patternSet.toArray(new String[0]);
}
private Set relativizePatterns(Set patternSet) {
final Set relativizedPatterns = new HashSet<>();
if (patternSet != null) {
for (String absoluteInclude : patternSet) {
if (absoluteInclude.startsWith("/")) {
relativizedPatterns.add(absoluteInclude.substring(1));
} else {
throw new IllegalStateException("Absolute normalized pattern does not start with '/': " + absoluteInclude);
}
}
}
return relativizedPatterns;
}
private static File initializeTmpFolder(File targetDir) throws IOException {
final File tmpFolder = new File(targetDir.getParentFile(), ".tmp");
if (tmpFolder.exists()) {
FileUtils.deleteDirectory(tmpFolder);
}
tmpFolder.mkdirs();
return tmpFolder;
}
private Inventory loadResultInventory() throws IOException {
final File scanResultInventoryFile = getResultInventoryFile();
FileUtils.validateExists(scanResultInventoryFile);
final Inventory inventory = new InventoryReader().readInventory(scanResultInventoryFile);
return inventory;
}
private Inventory loadReferenceInventory() throws IOException {
final Inventory referenceInventory;
if (referenceInventoryFile != null) {
FileUtils.validateExists(referenceInventoryFile);
if (referenceInventoryFile.isDirectory()) {
referenceInventory = InventoryUtils.readInventory(referenceInventoryFile, "*.xls");
} else {
referenceInventory = new InventoryReader().readInventory(referenceInventoryFile);
}
} else {
Validate.notNull(this.referenceInventory);
referenceInventory = this.referenceInventory;
}
return referenceInventory;
}
private void contributeReferenceComponentPatterns(Inventory referenceInventory, Map> componentPatternMap) {
for (ComponentPatternData cpd : referenceInventory.getComponentPatternData()) {
final String key = deriveMapQualifier(cpd);
List list = componentPatternMap.computeIfAbsent(key, c -> new ArrayList<>());
componentPatternMap.put(key, list);
// also include fallback mapping (in case component name does not match)
componentPatternMap.put(deriveFallbackMapQualifier(
cpd.get(ComponentPatternData.Attribute.COMPONENT_PART),
cpd.get(ComponentPatternData.Attribute.COMPONENT_VERSION)), list);
list.add(cpd);
}
}
private static Set getRelativePaths(Artifact artifact) {
// use information from projects (relative to original scanBaseDir)
final Set componentBaseDirs = new HashSet<>(artifact.getProjects());
// in case information is not available fall back to ATTRIBUTE_KEY_ARTIFACT_PATH (also relative to original scanBaseDir)
if (componentBaseDirs.isEmpty()) {
final String artifactPath = artifact.get(FileSystemScanConstants.ATTRIBUTE_KEY_ARTIFACT_PATH);
if (!StringUtils.isEmpty(artifactPath)) {
componentBaseDirs.add(artifactPath);
}
}
return componentBaseDirs;
}
private File getExtractedFilesBaseDir() {
return scanBaseDir;
}
private ComponentPatternMatches findComponentPatternMatches(Map> componentPatternMap, String componentName, String componentVersion, String componentPart) {
ComponentPatternMatches componentPatternMatch = new ComponentPatternMatches();
componentPatternMatch.key = deriveMapQualifier(componentName, componentPart, componentVersion);
componentPatternMatch.list = componentPatternMap.get(componentPatternMatch.key);
if (componentPatternMatch.list == null) {
String modulatedKey = deriveFallbackMapQualifier(componentPart, componentVersion);
componentPatternMatch.list = componentPatternMap.get(modulatedKey);
}
return componentPatternMatch;
}
private static class ComponentPatternMatches {
protected String key;
protected List list;
}
private void compress(File targetDir, Inventory inventory, Map> componentPatternMap,
File tmpFolder, Set removableArtifacts) throws IOException {
int i = 1;
for (Artifact artifact : inventory.getArtifacts()) {
LOG.info("Compressing artifact {}/{}: {}", i++, inventory.getArtifacts().size(), artifact.deriveQualifier());
// check artifact is covered by a component pattern
String componentName = artifact.getComponent();
String componentVersion = artifact.getVersion();
String componentPart = artifact.getId();
ComponentPatternMatches componentPatternMatch = findComponentPatternMatches(componentPatternMap, componentName, componentVersion, componentPart);
if (componentPatternMatch.list != null) {
// now package the aggregated folder
final File targetFolder = new File(tmpFolder, componentPatternMatch.key);
final File targetZipFile = new File(targetDir, componentPatternMatch.key + ".zip");
FileUtils.zipAnt(targetFolder, targetZipFile);
if (!targetZipFile.exists()) {
removableArtifacts.add(artifact);
continue;
}
artifact.set(Constants.KEY_ARCHIVE_PATH, targetZipFile.getAbsolutePath());
final File contentChecksumFile = new File(tmpFolder, targetZipFile.getName() + ".content.md5");
FileUtils.createDirectoryContentChecksumFile(targetFolder, contentChecksumFile);
// set the content checksum
final String contentChecksum = FileUtils.computeChecksum(contentChecksumFile);
artifact.set(KEY_CONTENT_CHECKSUM, contentChecksum);
}
}
}
/**
* There are multiple reasons for empty archive paths:
*
* -
* The relevant component pattern is not included in the reference inventory, This is a configuration issue.
*
* -
* There is no content available for the artifact. E.g. a logical package configuration without physical files.
* This may require provision of download urls or additional content (however at a later stage)
*
* -
* Artifacts that have been unpacked for scanning the content (classification contains 'scan')
*
*
*
* @param inventory The inventory to check for KEY_ARCHIVE_PATH completeness.
*/
private void checkCompletenessOfArchivePath(Inventory inventory) {
for (final Artifact artifact : inventory.getArtifacts()) {
String archivePath = artifact.get(Constants.KEY_ARCHIVE_PATH);
// deep scanned artifacts are not further scanned. It would be good to get an aggregated view however
if (ArtifactUtils.hasScanClassification(artifact)) continue;
if (!StringUtils.hasText(archivePath)) {
// only report issue, when we have a checksum; implicitly excluded shaded subcomponents from being reported
if (StringUtils.hasText(artifact.getChecksum())) {
LOG.warn("Artifact {} does not have an archive path! " +
"Validate that the component patterns for this process are complete.", artifact);
}
}
}
}
private String deriveFallbackMapQualifier(String componentPart, String componentVersion) {
final StringBuilder sb = new StringBuilder();
sb.append(componentPart);
if (StringUtils.notEmpty(componentVersion)) {
sb.append("-");
sb.append(componentVersion);
}
return sb.toString();
}
private String deriveMapQualifier(String componentName, String componentPart, String componentVersion) {
final StringBuilder sb = new StringBuilder();
if (StringUtils.notEmpty(componentName)) {
sb.append(componentName).append("-");
}
sb.append(componentPart);
if (StringUtils.notEmpty(componentVersion)) {
sb.append("-");
sb.append(componentVersion);
}
return sb.toString();
}
private String deriveMapQualifier(ComponentPatternData cpd) {
return deriveMapQualifier(
cpd.get(ComponentPatternData.Attribute.COMPONENT_NAME),
cpd.get(ComponentPatternData.Attribute.COMPONENT_PART),
cpd.get(ComponentPatternData.Attribute.COMPONENT_VERSION));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy