All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.spdx.maven.utils.SpdxFileCollector Maven / Gradle / Ivy

There is a newer version: 1.0.0-RC1
Show newest version
/*
 * Copyright 2014 Source Auditor Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License" );
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.spdx.maven.utils;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;
import java.util.Map.Entry;

import org.apache.maven.shared.model.fileset.FileSet;
import org.apache.maven.shared.model.fileset.util.FileSetManager;

import org.spdx.library.InvalidSPDXAnalysisException;
import org.spdx.library.model.Checksum;
import org.spdx.library.model.Relationship;
import org.spdx.library.model.SpdxDocument;
import org.spdx.library.model.SpdxFile;
import org.spdx.library.model.SpdxPackage;
import org.spdx.library.model.SpdxPackageVerificationCode;
import org.spdx.library.model.SpdxSnippet;
import org.spdx.library.model.enumerations.ChecksumAlgorithm;
import org.spdx.library.model.enumerations.FileType;
import org.spdx.library.model.enumerations.RelationshipType;
import org.spdx.library.model.license.AnyLicenseInfo;
import org.spdx.library.model.license.InvalidLicenseStringException;
import org.spdx.maven.SnippetInfo;
import org.spdx.storage.IModelStore.IdType;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
 * Collects SPDX file information from directories.
 * 

* The method collectFilesInDirectory(FileSet[] filesets) will scan and create SPDX File information for * all files in the filesets. * * @author Gary O'Neall */ public class SpdxFileCollector { private static final Logger LOG = LoggerFactory.getLogger( SpdxFileCollector.class ); // constants for mapping extensions to types. static final String SPDX_FILE_TYPE_CONSTANTS_PROP_PATH = "resources/SpdxFileTypeConstants.prop"; static final Map EXT_TO_FILE_TYPE = new HashMap<>(); static { loadFileExtensionConstants(); } static final Map checksumAlgorithms = new HashMap<>(); static { checksumAlgorithms.put( ChecksumAlgorithm.SHA1, "SHA-1" ); checksumAlgorithms.put( ChecksumAlgorithm.SHA224, "SHA-224" ); checksumAlgorithms.put( ChecksumAlgorithm.SHA256, "SHA-256" ); checksumAlgorithms.put( ChecksumAlgorithm.SHA384, "SHA-384" ); checksumAlgorithms.put( ChecksumAlgorithm.SHA3_384, "SHA-512" ); checksumAlgorithms.put( ChecksumAlgorithm.MD2, "MD2" ); checksumAlgorithms.put( ChecksumAlgorithm.MD4, "MD4" ); checksumAlgorithms.put( ChecksumAlgorithm.MD5, "MD5" ); checksumAlgorithms.put( ChecksumAlgorithm.MD6, "MD6" ); } Set licensesFromFiles = new HashSet<>(); /** * Map of fileName, SPDXFile for all files in the SPDX document */ Map spdxFiles = new HashMap<>(); List spdxSnippets = new ArrayList<>(); FileSetManager fileSetManager = new FileSetManager(); /** * SpdxFileCollector collects SPDX file information for files */ public SpdxFileCollector() { } /** * Load file type constants from the properties file */ private static void loadFileExtensionConstants() { Properties prop = new Properties(); try ( InputStream is = SpdxFileCollector.class.getClassLoader().getResourceAsStream( SPDX_FILE_TYPE_CONSTANTS_PROP_PATH ) ) { if ( is == null ) { LOG.error( "Unable to load properties file " + SPDX_FILE_TYPE_CONSTANTS_PROP_PATH ); return; } prop.load( is ); Iterator> iter = prop.entrySet().iterator(); while ( iter.hasNext() ) { Entry entry = iter.next(); String fileTypeStr = (String)entry.getKey(); FileType fileType = FileType.valueOf( fileTypeStr ); String[] extensions = ((String)entry.getValue()).split( "," ); for ( String extension:extensions ) { try { String trimmedExtension = extension.toUpperCase().trim(); if ( EXT_TO_FILE_TYPE.containsKey( trimmedExtension ) ) { LOG.warn( "Duplicate file extension: "+trimmedExtension ); } EXT_TO_FILE_TYPE.put( trimmedExtension, fileType ); } catch ( Exception ex ) { LOG.error( "Error adding file extensions to filetype map", ex ); } } } } catch ( IOException e ) { LOG.warn( "WARNING: Error reading SpdxFileTypeConstants properties file. All file types will be mapped to Other." ); } } /** * Collect file information in the directory (including subdirectories). * * @param fileSets FileSets containing the description of the directory to be scanned * @param baseDir project base directory used to construct the relative paths for the SPDX files * @param defaultFileInformation Information on default SPDX field data for the files * @param pathSpecificInformation Map of path to file information used to override the default file information * @param relationshipType Type of relationship to the project package * @param projectPackage Package to which the files belong * @param spdxDoc SPDX document which contains the extracted license infos that may be needed for license parsing * * @throws SpdxCollectionException */ public void collectFiles( List fileSets, String baseDir, SpdxDefaultFileInformation defaultFileInformation, Map pathSpecificInformation, SpdxPackage projectPackage, RelationshipType relationshipType, SpdxDocument spdxDoc, Set algorithms ) throws SpdxCollectionException { for ( FileSet fileSet : fileSets ) { String[] includedFiles = fileSetManager.getIncludedFiles( fileSet ); for ( String includedFile : includedFiles ) { String filePath = fileSet.getDirectory() + File.separator + includedFile; File file = new File( filePath ); String relativeFilePath = file.getAbsolutePath().substring( baseDir.length() + 1 ).replace( '\\', '/' ); SpdxDefaultFileInformation fileInfo = findDefaultFileInformation( relativeFilePath, pathSpecificInformation ); if ( fileInfo == null ) { fileInfo = defaultFileInformation; } String outputFileName; if ( fileSet.getOutputDirectory() != null ) { outputFileName = fileSet.getOutputDirectory() + File.separator + includedFile; } else { outputFileName = file.getAbsolutePath().substring( baseDir.length() + 1 ); } collectFile( file, outputFileName, fileInfo, relationshipType, projectPackage, spdxDoc, algorithms ); } } } /** * Find the most appropriate file information based on the lowest level match (closed to file) * * @param filePath * @param pathSpecificInformation * @return */ private SpdxDefaultFileInformation findDefaultFileInformation( String filePath, Map pathSpecificInformation ) { LOG.debug( "Checking for file path " + filePath ); SpdxDefaultFileInformation retval = pathSpecificInformation.get( filePath ); if ( retval != null ) { LOG.debug( "Found filepath" ); return retval; } // see if any of the parent directories contain default information which should be used String parentPath = filePath; int parentPathIndex = 0; do { parentPathIndex = parentPath.lastIndexOf( "/" ); if ( parentPathIndex > 0 ) { parentPath = parentPath.substring( 0, parentPathIndex ); retval = pathSpecificInformation.get( parentPath ); } } while ( retval == null && parentPathIndex > 0 ); if ( retval != null ) { LOG.debug( "Found directory containing file path for path specific information. File path: " + parentPath ); } return retval; } /** * Collect SPDX information for a specific file * * @param file * @param outputFileName Path to the output file name relative to the root of the output archive file * @param relationshipType Type of relationship to the project package * @param projectPackage Package to which the files belong * @param spdxDoc SPDX Document which will contain the files * @param algorithms algorithms to use to generate checksums * @throws SpdxCollectionException */ private void collectFile( File file, String outputFileName, SpdxDefaultFileInformation fileInfo, RelationshipType relationshipType, SpdxPackage projectPackage, SpdxDocument spdxDoc, Set algorithms ) throws SpdxCollectionException { if ( spdxFiles.containsKey( file.getPath() ) ) { return; // already added from a previous scan } SpdxFile spdxFile = convertToSpdxFile( file, outputFileName, fileInfo, algorithms, spdxDoc ); try { Relationship relationship = spdxDoc.createRelationship( projectPackage, relationshipType, "" ); spdxFile.addRelationship( relationship ); } catch ( InvalidSPDXAnalysisException e ) { throw new SpdxCollectionException( "Error creating SPDX file relationship", e ); } if ( fileInfo.getSnippets() != null ) { for ( SnippetInfo snippet : fileInfo.getSnippets() ) { SpdxSnippet spdxSnippet; try { spdxSnippet = convertToSpdxSnippet( snippet, spdxFile, spdxDoc ); } catch ( InvalidLicenseStringException e ) { throw new SpdxCollectionException( "Error processing SPDX snippet information. Invalid license string specified in snippet.", e ); } catch ( SpdxBuilderException e ) { throw new SpdxCollectionException( "Error creating SPDX snippet information.", e ); } catch ( InvalidSPDXAnalysisException e ) { throw new SpdxCollectionException( "Error processing SPDX snippet information.", e ); } spdxSnippets.add( spdxSnippet ); } } spdxFiles.put( file.getPath(), spdxFile ); Collection licenseInfoFromFiles; try { licenseInfoFromFiles = spdxFile.getLicenseInfoFromFiles(); licensesFromFiles.addAll( licenseInfoFromFiles ); } catch ( InvalidSPDXAnalysisException e ) { throw new SpdxCollectionException( "Error getting license information from files.", e ); } } /** * Create an SpdxSnippet from the snippet information provided * @param snippet * @param spdxFile * @param spdxDoc * @return * @throws SpdxBuilderException * @throws InvalidSPDXAnalysisException */ private SpdxSnippet convertToSpdxSnippet( SnippetInfo snippet, SpdxFile spdxFile, SpdxDocument spdxDoc ) throws SpdxBuilderException, InvalidSPDXAnalysisException { //TODO: Add annotations to snippet return spdxDoc.createSpdxSnippet( spdxDoc.getModelStore().getNextId( IdType.SpdxId, spdxDoc.getDocumentUri() ), snippet.getName(), snippet.getLicenseConcluded( spdxDoc ), snippet.getLicenseInfoInSnippet( spdxDoc ), snippet.getCopyrightText(), spdxFile, snippet.getByteRangeStart(), snippet.getByteRangeEnd() ) .setComment( snippet.getComment() ) .setLicenseComments( snippet.getLicensComment() ) .setLineRange( snippet.getLineRangeStart(), snippet.getLineRangeEnd() ) .build(); } /** * @param file * @param outputFileName Path to the output file name relative to the root of the output archive file * @param defaultFileInformation Information on default SPDX field data for the files * @param algorithms algorithms to use to generate checksums * @param spdxDoc SPDX document which will contain the SPDX file * @return * @throws SpdxCollectionException */ private SpdxFile convertToSpdxFile( File file, String outputFileName, SpdxDefaultFileInformation defaultFileInformation, Set algorithms, SpdxDocument spdxDoc ) throws SpdxCollectionException { String relativePath = convertFilePathToSpdxFileName( outputFileName ); ArrayList fileTypes = new ArrayList<>(); fileTypes.add( extensionToFileType( getExtension( file ) ) ); Set checksums; try { checksums = generateChecksum( file, algorithms, spdxDoc ); } catch ( SpdxCollectionException | InvalidSPDXAnalysisException e1 ) { throw new SpdxCollectionException( "Unable to generate checksum for file "+file.getName() ); } AnyLicenseInfo concludedLicense = null; AnyLicenseInfo license = null; String licenseComment = defaultFileInformation.getLicenseComment(); if ( isSourceFile( fileTypes ) && file.length() < SpdxSourceFileParser.MAXIMUM_SOURCE_FILE_LENGTH ) { List fileSpdxLicenses = null; try { fileSpdxLicenses = SpdxSourceFileParser.parseFileForSpdxLicenses( file ); } catch ( SpdxSourceParserException ex ) { LOG.error( "Error parsing for SPDX license ID's", ex ); } if ( fileSpdxLicenses != null && fileSpdxLicenses.size() > 0 ) { // The file has declared licenses of the form SPDX-License-Identifier: licenseId if ( fileSpdxLicenses.size() == 1 ) { license = fileSpdxLicenses.get( 0 ); } else { try { license = spdxDoc.createConjunctiveLicenseSet( fileSpdxLicenses ); } catch ( InvalidSPDXAnalysisException e ) { throw new SpdxCollectionException( "Error creating SPDX file - unable to create a license set", e ); } } if ( licenseComment == null ) { licenseComment = ""; } else if ( licenseComment.length() > 0 ) { licenseComment = licenseComment.concat( "; " ); } licenseComment = licenseComment.concat( "This file contains SPDX-License-Identifiers for " ); licenseComment = licenseComment.concat( license.toString() ); } } if ( license == null ) { license = defaultFileInformation.getDeclaredLicense(); concludedLicense = defaultFileInformation.getConcludedLicense(); } else { concludedLicense = license; } String copyright = defaultFileInformation.getCopyright(); String notice = defaultFileInformation.getNotice(); String comment = defaultFileInformation.getComment(); String[] defaultContributors = defaultFileInformation.getContributors(); List contributors; if ( defaultContributors != null ) { contributors = Arrays.asList( defaultFileInformation.getContributors() ); } else { contributors = new ArrayList<>(); } SpdxFile retval = null; //TODO: Add annotation try { List seenLicenses = new ArrayList<>(); seenLicenses.add( license ); Checksum sha1 = null; for ( Checksum checksum:checksums ) { if (ChecksumAlgorithm.SHA1.equals( checksum.getAlgorithm() )) { sha1 = checksum; break; } } retval = spdxDoc.createSpdxFile( spdxDoc.getModelStore().getNextId( IdType.SpdxId, spdxDoc.getDocumentUri() ), relativePath, concludedLicense, seenLicenses, copyright, sha1 ) .setComment( comment ) .setLicenseComments( licenseComment ) .setFileTypes( fileTypes ) .setFileContributors( contributors ) .build(); retval.setNoticeText( notice ); } catch ( InvalidSPDXAnalysisException e ) { throw new SpdxCollectionException( "Error creating SPDX file", e ); } return retval; } /** * @param fileTypes * @return true if the fileTypes contain a source file type */ protected boolean isSourceFile( Collection fileTypes ) { for ( FileType ft : fileTypes ) { if ( ft == FileType.SOURCE ) { return true; } } return false; } /** * Create the SPDX file name from a system specific path name * * @param filePath system specific file path relative to the top of the archive root to the top of the archive * directory where the file is stored. * @return */ public String convertFilePathToSpdxFileName( String filePath ) { String result = filePath.replace( '\\', '/' ); if ( !result.startsWith( "./" ) ) { result = "./" + result; } return result; } public String getExtension( File file ) { String fileName = file.getName(); int lastDot = fileName.lastIndexOf( '.' ); if ( lastDot < 1 ) { return ""; } else { return fileName.substring( lastDot + 1 ); } } protected static FileType extensionToFileType( String fileExtension ) { FileType retval = EXT_TO_FILE_TYPE.get( fileExtension.trim().toUpperCase() ); if ( retval == null ) { retval = FileType.OTHER; } return retval; } /** * @return SPDX Files which have been acquired through the collectFilesInDirectory method */ public Collection getFiles() { return spdxFiles.values(); } /** * @return SPDX Snippets collected through the collectFilesInDirectory method */ public List getSnippets() { return this.spdxSnippets; } /** * @return all license information used in the SPDX files */ public Collection getLicenseInfoFromFiles() { return licensesFromFiles; } /** * Create a verification code from all SPDX files collected * * @param spdxFilePath Complete file path for the SPDX file - this will be excluded from the verification code * @param spdxDoc SPDX document which will contain the package verification code. * @return * @throws NoSuchAlgorithmException * @throws InvalidSPDXAnalysisException */ public SpdxPackageVerificationCode getVerificationCode( String spdxFilePath, SpdxDocument spdxDoc ) throws NoSuchAlgorithmException, InvalidSPDXAnalysisException { List excludedFileNamesFromVerificationCode = new ArrayList<>(); if ( spdxFilePath != null && spdxFiles.containsKey( spdxFilePath ) ) { Optional excludedFileName = spdxFiles.get( spdxFilePath ).getName(); if ( excludedFileName.isPresent() ) { excludedFileNamesFromVerificationCode.add( excludedFileName.get() ); } } SpdxPackageVerificationCode verificationCode; verificationCode = calculatePackageVerificationCode( spdxFiles.values(), excludedFileNamesFromVerificationCode, spdxDoc ); return verificationCode; } /** * Calculate the package verification code for a collection of SPDX files * * @param spdxFiles Files used to calculate the verification code * @param excludedFileNamesFromVerificationCode List of file names to exclude * @param spdxDoc SPDX document which will contain the Package Verification Code * @return * @throws NoSuchAlgorithmException * @throws InvalidSPDXAnalysisException */ private SpdxPackageVerificationCode calculatePackageVerificationCode( Collection spdxFiles, List excludedFileNamesFromVerificationCode, SpdxDocument spdxDoc ) throws NoSuchAlgorithmException, InvalidSPDXAnalysisException { List fileChecksums = new ArrayList<>(); for ( SpdxFile file : spdxFiles ) { Optional filename = file.getName(); if ( filename.isPresent() && includeInVerificationCode( file.getName().get(), excludedFileNamesFromVerificationCode ) ) { fileChecksums.add( file.getSha1() ); } } Collections.sort( fileChecksums ); MessageDigest verificationCodeDigest = MessageDigest.getInstance( "SHA-1" ); for ( String fileChecksum : fileChecksums ) { byte[] hashInput = fileChecksum.getBytes( StandardCharsets.UTF_8 ); verificationCodeDigest.update( hashInput ); } String value = convertChecksumToString( verificationCodeDigest.digest() ); return spdxDoc.createPackageVerificationCode( value, excludedFileNamesFromVerificationCode ); } private boolean includeInVerificationCode( String name, List excludedFileNamesFromVerificationCode ) { for ( String s : excludedFileNamesFromVerificationCode ) { if ( s.equals( name ) ) { return false; } } return true; } /** * Converts an array of bytes to a string compliant with the SPDX sha1 representation * * @param digestBytes * @return */ public static String convertChecksumToString( byte[] digestBytes ) { StringBuilder sb = new StringBuilder(); for ( byte digestByte : digestBytes ) { String hex = Integer.toHexString( 0xff & digestByte ); if ( hex.length() < 2 ) { sb.append( '0' ); } sb.append( hex ); } return sb.toString(); } /** * Generate the Sha1 for a given file. Must have read access to the file. This method is equivalent to calling * {@code SpdxFileCollector.generateChecksum(file, "SHA-1")}. * * @param file file to generate checksum for * @param spdxDoc SPDX document which will contain the checksum * @return SHA1 checksum of the input file * @throws SpdxCollectionException if the algorithm is unavailable or the file cannot be read * @throws InvalidSPDXAnalysisException */ public static String generateSha1( File file, SpdxDocument spdxDoc ) throws SpdxCollectionException, InvalidSPDXAnalysisException { Set sha1 = new HashSet<>(); sha1.add( ChecksumAlgorithm.SHA1 ); Checksum sha1Checksum = generateChecksum( file, sha1, spdxDoc ).iterator().next(); return sha1Checksum.getValue(); } /** * Generate checksums for a given file using each algorithm supplied. Must have read access to the file. * * @param file file whose checksum is to be generated * @param algorithms algorithms to generate the checksums * @param spdxDoc SPDX document which will contain the checksum * @return {@code Set} of checksums for file using each algorithm specified * @throws SpdxCollectionException if the input algorithm is invalid or unavailable or if the file cannot be read * @throws InvalidSPDXAnalysisException */ public static Set generateChecksum( File file, Set algorithms, SpdxDocument spdxDoc ) throws SpdxCollectionException, InvalidSPDXAnalysisException { Set checksums = new HashSet<>(); byte[] buffer; try { buffer = Files.readAllBytes( file.toPath() ); } catch ( IOException e ) { throw new SpdxCollectionException( "IO error while calculating checksums.", e ); } for ( ChecksumAlgorithm algorithm : algorithms ) { String checksumAlgorithm = checksumAlgorithms.get( algorithm ); MessageDigest digest; try { digest = MessageDigest.getInstance( checksumAlgorithm ); } catch ( NoSuchAlgorithmException e ) { throw new SpdxCollectionException( e ); } digest.update( buffer ); String checksum = convertChecksumToString( digest.digest() ); checksums.add( spdxDoc.createChecksum( algorithm, checksum ) ); } return checksums; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy