Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.spdx.maven.utils.SpdxFileCollector Maven / Gradle / Ivy
Go to download
Plugin for supporting SPDX in a Maven build. See
* Copyright 2014 Source Auditor Inc.
* Licensed under the Apache License, Version 2.0 (the "License" );
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.spdx.maven.utils;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.*;
import java.util.Map.Entry;
import org.apache.maven.shared.model.fileset.FileSet;
import org.apache.maven.shared.model.fileset.util.FileSetManager;
import org.spdx.library.InvalidSPDXAnalysisException;
import org.spdx.library.model.Checksum;
import org.spdx.library.model.Relationship;
import org.spdx.library.model.SpdxDocument;
import org.spdx.library.model.SpdxFile;
import org.spdx.library.model.SpdxPackage;
import org.spdx.library.model.SpdxPackageVerificationCode;
import org.spdx.library.model.SpdxSnippet;
import org.spdx.library.model.enumerations.ChecksumAlgorithm;
import org.spdx.library.model.enumerations.FileType;
import org.spdx.library.model.enumerations.RelationshipType;
import org.spdx.library.model.license.AnyLicenseInfo;
import org.spdx.library.model.license.InvalidLicenseStringException;
import org.spdx.maven.SnippetInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
* Collects SPDX file information from directories.
* The method collectFilesInDirectory(FileSet[] filesets)
will scan and create SPDX File information for
* all files in the filesets.
* @author Gary O'Neall
public class SpdxFileCollector
private static final Logger LOG = LoggerFactory.getLogger( SpdxFileCollector.class );
// constants for mapping extensions to types.
static final String SPDX_FILE_TYPE_CONSTANTS_PROP_PATH = "resources/SpdxFileTypeConstants.prop";
static final Map EXT_TO_FILE_TYPE = new HashMap<>();
static final Map checksumAlgorithms = new HashMap<>();
checksumAlgorithms.put( ChecksumAlgorithm.SHA1, "SHA-1" );
checksumAlgorithms.put( ChecksumAlgorithm.SHA224, "SHA-224" );
checksumAlgorithms.put( ChecksumAlgorithm.SHA256, "SHA-256" );
checksumAlgorithms.put( ChecksumAlgorithm.SHA384, "SHA-384" );
checksumAlgorithms.put( ChecksumAlgorithm.SHA3_384, "SHA-512" );
checksumAlgorithms.put( ChecksumAlgorithm.MD2, "MD2" );
checksumAlgorithms.put( ChecksumAlgorithm.MD4, "MD4" );
checksumAlgorithms.put( ChecksumAlgorithm.MD5, "MD5" );
checksumAlgorithms.put( ChecksumAlgorithm.MD6, "MD6" );
Set licensesFromFiles = new HashSet<>();
* Map of fileName, SPDXFile for all files in the SPDX document
Map spdxFiles = new HashMap<>();
List spdxSnippets = new ArrayList<>();
FileSetManager fileSetManager = new FileSetManager();
* SpdxFileCollector collects SPDX file information for files
public SpdxFileCollector()
* Load file type constants from the properties file
private static void loadFileExtensionConstants()
Properties prop = new Properties();
try ( InputStream is = SpdxFileCollector.class.getClassLoader().getResourceAsStream(
if ( is == null )
LOG.error( "Unable to load properties file " + SPDX_FILE_TYPE_CONSTANTS_PROP_PATH );
prop.load( is );
Iterator> iter = prop.entrySet().iterator();
while ( iter.hasNext() )
Entry entry =;
String fileTypeStr = (String)entry.getKey();
FileType fileType = FileType.valueOf( fileTypeStr );
String[] extensions = ((String)entry.getValue()).split( "," );
for ( String extension:extensions )
String trimmedExtension = extension.toUpperCase().trim();
if ( EXT_TO_FILE_TYPE.containsKey( trimmedExtension ) )
LOG.warn( "Duplicate file extension: "+trimmedExtension );
EXT_TO_FILE_TYPE.put( trimmedExtension, fileType );
catch ( Exception ex ) {
LOG.error( "Error adding file extensions to filetype map", ex );
catch ( IOException e )
"WARNING: Error reading SpdxFileTypeConstants properties file. All file types will be mapped to Other." );
* Collect file information in the directory (including subdirectories).
* @param fileSets FileSets containing the description of the directory to be scanned
* @param baseDir project base directory used to construct the relative paths for the SPDX files
* @param defaultFileInformation Information on default SPDX field data for the files
* @param pathSpecificInformation Map of path to file information used to override the default file information
* @param relationshipType Type of relationship to the project package
* @param projectPackage Package to which the files belong
* @param spdxDoc SPDX document which contains the extracted license infos that may be needed for license parsing
* @throws SpdxCollectionException
public void collectFiles( List fileSets, String baseDir,
SpdxDefaultFileInformation defaultFileInformation,
Map pathSpecificInformation,
SpdxPackage projectPackage, RelationshipType relationshipType,
SpdxDocument spdxDoc, Set algorithms ) throws SpdxCollectionException
for ( FileSet fileSet : fileSets )
String[] includedFiles = fileSetManager.getIncludedFiles( fileSet );
for ( String includedFile : includedFiles )
String filePath = fileSet.getDirectory() + File.separator + includedFile;
File file = new File( filePath );
String relativeFilePath = file.getAbsolutePath().substring( baseDir.length() + 1 ).replace( '\\', '/' );
SpdxDefaultFileInformation fileInfo = findDefaultFileInformation( relativeFilePath,
pathSpecificInformation );
if ( fileInfo == null )
fileInfo = defaultFileInformation;
String outputFileName;
if ( fileSet.getOutputDirectory() != null )
outputFileName = fileSet.getOutputDirectory() + File.separator + includedFile;
outputFileName = file.getAbsolutePath().substring( baseDir.length() + 1 );
collectFile( file, outputFileName, fileInfo, relationshipType, projectPackage, spdxDoc, algorithms );
* Find the most appropriate file information based on the lowest level match (closed to file)
* @param filePath
* @param pathSpecificInformation
* @return
private SpdxDefaultFileInformation findDefaultFileInformation( String filePath, Map pathSpecificInformation )
LOG.debug( "Checking for file path " + filePath );
SpdxDefaultFileInformation retval = pathSpecificInformation.get( filePath );
if ( retval != null )
LOG.debug( "Found filepath" );
return retval;
// see if any of the parent directories contain default information which should be used
String parentPath = filePath;
int parentPathIndex = 0;
parentPathIndex = parentPath.lastIndexOf( "/" );
if ( parentPathIndex > 0 )
parentPath = parentPath.substring( 0, parentPathIndex );
retval = pathSpecificInformation.get( parentPath );
} while ( retval == null && parentPathIndex > 0 );
if ( retval != null )
LOG.debug( "Found directory containing file path for path specific information. File path: " + parentPath );
return retval;
* Collect SPDX information for a specific file
* @param file
* @param outputFileName Path to the output file name relative to the root of the output archive file
* @param relationshipType Type of relationship to the project package
* @param projectPackage Package to which the files belong
* @param spdxDoc SPDX Document which will contain the files
* @param algorithms algorithms to use to generate checksums
* @throws SpdxCollectionException
private void collectFile( File file, String outputFileName, SpdxDefaultFileInformation fileInfo, RelationshipType relationshipType, SpdxPackage projectPackage, SpdxDocument spdxDoc, Set algorithms ) throws SpdxCollectionException
if ( spdxFiles.containsKey( file.getPath() ) )
return; // already added from a previous scan
SpdxFile spdxFile = convertToSpdxFile( file, outputFileName, fileInfo, algorithms, spdxDoc );
Relationship relationship = spdxDoc.createRelationship( projectPackage, relationshipType, "" );
spdxFile.addRelationship( relationship );
catch ( InvalidSPDXAnalysisException e )
throw new SpdxCollectionException( "Error creating SPDX file relationship", e );
if ( fileInfo.getSnippets() != null )
for ( SnippetInfo snippet : fileInfo.getSnippets() )
SpdxSnippet spdxSnippet;
spdxSnippet = convertToSpdxSnippet( snippet, spdxFile, spdxDoc );
catch ( InvalidLicenseStringException e )
throw new SpdxCollectionException(
"Error processing SPDX snippet information. Invalid license string specified in snippet.",
e );
catch ( SpdxBuilderException e )
throw new SpdxCollectionException( "Error creating SPDX snippet information.", e );
catch ( InvalidSPDXAnalysisException e )
throw new SpdxCollectionException(
"Error processing SPDX snippet information.",
e );
spdxSnippets.add( spdxSnippet );
spdxFiles.put( file.getPath(), spdxFile );
Collection licenseInfoFromFiles;
licenseInfoFromFiles = spdxFile.getLicenseInfoFromFiles();
licensesFromFiles.addAll( licenseInfoFromFiles );
catch ( InvalidSPDXAnalysisException e )
throw new SpdxCollectionException( "Error getting license information from files.", e );
* Create an SpdxSnippet from the snippet information provided
* @param snippet
* @param spdxFile
* @param spdxDoc
* @return
* @throws SpdxBuilderException
* @throws InvalidSPDXAnalysisException
private SpdxSnippet convertToSpdxSnippet( SnippetInfo snippet, SpdxFile spdxFile, SpdxDocument spdxDoc ) throws SpdxBuilderException, InvalidSPDXAnalysisException
//TODO: Add annotations to snippet
return spdxDoc.createSpdxSnippet( spdxDoc.getModelStore().getNextId( IdType.SpdxId, spdxDoc.getDocumentUri() ),
snippet.getName(), snippet.getLicenseConcluded( spdxDoc ),
snippet.getLicenseInfoInSnippet( spdxDoc ),
snippet.getCopyrightText(), spdxFile,
snippet.getByteRangeStart(), snippet.getByteRangeEnd() )
.setComment( snippet.getComment() )
.setLicenseComments( snippet.getLicensComment() )
.setLineRange( snippet.getLineRangeStart(), snippet.getLineRangeEnd() )
* @param file
* @param outputFileName Path to the output file name relative to the root of the output archive file
* @param defaultFileInformation Information on default SPDX field data for the files
* @param algorithms algorithms to use to generate checksums
* @param spdxDoc SPDX document which will contain the SPDX file
* @return
* @throws SpdxCollectionException
private SpdxFile convertToSpdxFile( File file, String outputFileName,
SpdxDefaultFileInformation defaultFileInformation,
Set algorithms,
SpdxDocument spdxDoc ) throws SpdxCollectionException
String relativePath = convertFilePathToSpdxFileName( outputFileName );
ArrayList fileTypes = new ArrayList<>();
fileTypes.add( extensionToFileType( getExtension( file ) ) );
Set checksums;
checksums = generateChecksum( file, algorithms, spdxDoc );
catch ( SpdxCollectionException | InvalidSPDXAnalysisException e1 )
throw new SpdxCollectionException( "Unable to generate checksum for file "+file.getName() );
AnyLicenseInfo concludedLicense = null;
AnyLicenseInfo license = null;
String licenseComment = defaultFileInformation.getLicenseComment();
if ( isSourceFile( fileTypes ) && file.length() < SpdxSourceFileParser.MAXIMUM_SOURCE_FILE_LENGTH )
List fileSpdxLicenses = null;
fileSpdxLicenses = SpdxSourceFileParser.parseFileForSpdxLicenses( file );
catch ( SpdxSourceParserException ex )
LOG.error( "Error parsing for SPDX license ID's", ex );
if ( fileSpdxLicenses != null && fileSpdxLicenses.size() > 0 )
// The file has declared licenses of the form SPDX-License-Identifier: licenseId
if ( fileSpdxLicenses.size() == 1 )
license = fileSpdxLicenses.get( 0 );
license = spdxDoc.createConjunctiveLicenseSet( fileSpdxLicenses );
catch ( InvalidSPDXAnalysisException e )
throw new SpdxCollectionException( "Error creating SPDX file - unable to create a license set", e );
if ( licenseComment == null )
licenseComment = "";
else if ( licenseComment.length() > 0 )
licenseComment = licenseComment.concat( "; " );
licenseComment = licenseComment.concat( "This file contains SPDX-License-Identifiers for " );
licenseComment = licenseComment.concat( license.toString() );
if ( license == null )
license = defaultFileInformation.getDeclaredLicense();
concludedLicense = defaultFileInformation.getConcludedLicense();
concludedLicense = license;
String copyright = defaultFileInformation.getCopyright();
String notice = defaultFileInformation.getNotice();
String comment = defaultFileInformation.getComment();
String[] defaultContributors = defaultFileInformation.getContributors();
List contributors;
if ( defaultContributors != null ) {
contributors = Arrays.asList( defaultFileInformation.getContributors() );
} else {
contributors = new ArrayList<>();
SpdxFile retval = null;
//TODO: Add annotation
List seenLicenses = new ArrayList<>();
seenLicenses.add( license );
Checksum sha1 = null;
for ( Checksum checksum:checksums )
if (ChecksumAlgorithm.SHA1.equals( checksum.getAlgorithm() )) {
sha1 = checksum;
retval = spdxDoc.createSpdxFile( spdxDoc.getModelStore().getNextId( IdType.SpdxId, spdxDoc.getDocumentUri() ),
relativePath, concludedLicense, seenLicenses,
copyright, sha1 )
.setComment( comment )
.setLicenseComments( licenseComment )
.setFileTypes( fileTypes )
.setFileContributors( contributors )
retval.setNoticeText( notice );
catch ( InvalidSPDXAnalysisException e )
throw new SpdxCollectionException( "Error creating SPDX file", e );
return retval;
* @param fileTypes
* @return true if the fileTypes contain a source file type
protected boolean isSourceFile( Collection fileTypes )
for ( FileType ft : fileTypes )
if ( ft == FileType.SOURCE )
return true;
return false;
* Create the SPDX file name from a system specific path name
* @param filePath system specific file path relative to the top of the archive root to the top of the archive
* directory where the file is stored.
* @return
public String convertFilePathToSpdxFileName( String filePath )
String result = filePath.replace( '\\', '/' );
if ( !result.startsWith( "./" ) )
result = "./" + result;
return result;
public String getExtension( File file )
String fileName = file.getName();
int lastDot = fileName.lastIndexOf( '.' );
if ( lastDot < 1 )
return "";
return fileName.substring( lastDot + 1 );
protected static FileType extensionToFileType( String fileExtension )
FileType retval = EXT_TO_FILE_TYPE.get( fileExtension.trim().toUpperCase() );
if ( retval == null )
retval = FileType.OTHER;
return retval;
* @return SPDX Files which have been acquired through the collectFilesInDirectory method
public Collection getFiles()
return spdxFiles.values();
* @return SPDX Snippets collected through the collectFilesInDirectory method
public List getSnippets()
return this.spdxSnippets;
* @return all license information used in the SPDX files
public Collection getLicenseInfoFromFiles()
return licensesFromFiles;
* Create a verification code from all SPDX files collected
* @param spdxFilePath Complete file path for the SPDX file - this will be excluded from the verification code
* @param spdxDoc SPDX document which will contain the package verification code.
* @return
* @throws NoSuchAlgorithmException
* @throws InvalidSPDXAnalysisException
public SpdxPackageVerificationCode getVerificationCode( String spdxFilePath, SpdxDocument spdxDoc ) throws NoSuchAlgorithmException, InvalidSPDXAnalysisException
List excludedFileNamesFromVerificationCode = new ArrayList<>();
if ( spdxFilePath != null && spdxFiles.containsKey( spdxFilePath ) )
Optional excludedFileName = spdxFiles.get( spdxFilePath ).getName();
if ( excludedFileName.isPresent() )
excludedFileNamesFromVerificationCode.add( excludedFileName.get() );
SpdxPackageVerificationCode verificationCode;
verificationCode = calculatePackageVerificationCode( spdxFiles.values(),
excludedFileNamesFromVerificationCode, spdxDoc );
return verificationCode;
* Calculate the package verification code for a collection of SPDX files
* @param spdxFiles Files used to calculate the verification code
* @param excludedFileNamesFromVerificationCode List of file names to exclude
* @param spdxDoc SPDX document which will contain the Package Verification Code
* @return
* @throws NoSuchAlgorithmException
* @throws InvalidSPDXAnalysisException
private SpdxPackageVerificationCode calculatePackageVerificationCode( Collection spdxFiles,
List excludedFileNamesFromVerificationCode,
SpdxDocument spdxDoc ) throws NoSuchAlgorithmException, InvalidSPDXAnalysisException
List fileChecksums = new ArrayList<>();
for ( SpdxFile file : spdxFiles )
Optional filename = file.getName();
if ( filename.isPresent() && includeInVerificationCode( file.getName().get(), excludedFileNamesFromVerificationCode ) )
fileChecksums.add( file.getSha1() );
Collections.sort( fileChecksums );
MessageDigest verificationCodeDigest = MessageDigest.getInstance( "SHA-1" );
for ( String fileChecksum : fileChecksums )
byte[] hashInput = fileChecksum.getBytes( StandardCharsets.UTF_8 );
verificationCodeDigest.update( hashInput );
String value = convertChecksumToString( verificationCodeDigest.digest() );
return spdxDoc.createPackageVerificationCode( value, excludedFileNamesFromVerificationCode );
private boolean includeInVerificationCode( String name, List excludedFileNamesFromVerificationCode )
for ( String s : excludedFileNamesFromVerificationCode )
if ( s.equals( name ) )
return false;
return true;
* Converts an array of bytes to a string compliant with the SPDX sha1 representation
* @param digestBytes
* @return
public static String convertChecksumToString( byte[] digestBytes )
StringBuilder sb = new StringBuilder();
for ( byte digestByte : digestBytes )
String hex = Integer.toHexString( 0xff & digestByte );
if ( hex.length() < 2 )
sb.append( '0' );
sb.append( hex );
return sb.toString();
* Generate the Sha1 for a given file. Must have read access to the file. This method is equivalent to calling
* {@code SpdxFileCollector.generateChecksum(file, "SHA-1")}.
* @param file file to generate checksum for
* @param spdxDoc SPDX document which will contain the checksum
* @return SHA1 checksum of the input file
* @throws SpdxCollectionException if the algorithm is unavailable or the file cannot be read
* @throws InvalidSPDXAnalysisException
public static String generateSha1( File file, SpdxDocument spdxDoc ) throws SpdxCollectionException, InvalidSPDXAnalysisException
Set sha1 = new HashSet<>();
sha1.add( ChecksumAlgorithm.SHA1 );
Checksum sha1Checksum = generateChecksum( file, sha1, spdxDoc ).iterator().next();
return sha1Checksum.getValue();
* Generate checksums for a given file using each algorithm supplied. Must have read access to the file.
* @param file file whose checksum is to be generated
* @param algorithms algorithms to generate the checksums
* @param spdxDoc SPDX document which will contain the checksum
* @return {@code Set} of checksums for file using each algorithm specified
* @throws SpdxCollectionException if the input algorithm is invalid or unavailable or if the file cannot be read
* @throws InvalidSPDXAnalysisException
public static Set generateChecksum( File file, Set algorithms,
SpdxDocument spdxDoc ) throws SpdxCollectionException, InvalidSPDXAnalysisException
Set checksums = new HashSet<>();
byte[] buffer;
buffer = Files.readAllBytes( file.toPath() );
catch ( IOException e )
throw new SpdxCollectionException( "IO error while calculating checksums.", e );
for ( ChecksumAlgorithm algorithm : algorithms )
String checksumAlgorithm = checksumAlgorithms.get( algorithm );
MessageDigest digest;
digest = MessageDigest.getInstance( checksumAlgorithm );
catch ( NoSuchAlgorithmException e )
throw new SpdxCollectionException( e );
digest.update( buffer );
String checksum = convertChecksumToString( digest.digest() );
checksums.add( spdxDoc.createChecksum( algorithm, checksum ) );
return checksums;