All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.spdx.maven.SpdxFileCollector Maven / Gradle / Ivy

/*

 * Copyright 2014 Source Auditor Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License" );
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.spdx.maven;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;


import org.apache.maven.plugin.logging.Log;
import org.apache.maven.shared.model.fileset.FileSet;
import org.apache.maven.shared.model.fileset.util.FileSetManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.spdx.rdfparser.InvalidSPDXAnalysisException;
import org.spdx.rdfparser.SpdxDocumentContainer;
import org.spdx.rdfparser.SpdxPackageVerificationCode;
import org.spdx.rdfparser.license.AnyLicenseInfo;
import org.spdx.rdfparser.model.DoapProject;
import org.spdx.rdfparser.model.Relationship;
import org.spdx.rdfparser.model.Relationship.RelationshipType;
import org.spdx.rdfparser.model.SpdxFile;
import org.spdx.rdfparser.model.SpdxFile.FileType;
import org.spdx.rdfparser.model.SpdxPackage;
import org.spdx.rdfparser.model.SpdxSnippet;
import org.spdx.spdxspreadsheet.InvalidLicenseStringException;


/**
 * Collects SPDX file information from directories.
 * 
 * The method collectFilesInDirectory(FileSet[] filesets) will scan and
 * create SPDX File information for all files in the filesets.
 * 
 * @author Gary O'Neall
 *
 */
public class SpdxFileCollector 
{
    static Logger logger = LoggerFactory.getLogger( SpdxFileCollector.class );
    // constants for mapping extensions to types.
    static HashSet SOURCE_EXTENSIONS = new HashSet();
    static HashSet BINARY_EXTENSIONS = new HashSet();
    static HashSet ARCHIVE_EXTENSIONS = new HashSet();
    static final String SPDX_FILE_TYPE_CONSTANTS_PROP_PATH = "resources/SpdxFileTypeConstants.prop";
    static final String SPDX_PROP_FILETYPE_SOURCE = "SpdxSourceExtensions";
    static final String SPDX_PROP_FILETYPE_BINARY = "SpdxBinaryExtensions";
    static final String SPDX_PROP_FILETYPE_ARCHIVE = "SpdxArchiveExtensions";
    static 
    {
        loadFileExtensionConstants();
    }
    
    static final String SHA1_ALGORITHM = "SHA-1";
    static final String PACKAGE_VERIFICATION_CHARSET = "UTF-8";
    private static MessageDigest digest;
    static 
    {
        try 
        {
            digest = MessageDigest.getInstance( SHA1_ALGORITHM );
        } catch ( NoSuchAlgorithmException e ) {
            logger.error( "No such algorithm error initializing the SPDX file collector - SHA1", e );
            digest = null;
        };
    }
    
    
    Set licensesFromFiles = new HashSet();
    /**
     * Map of fileName, SPDXFile for all files in the SPDX document
     */
    Map spdxFiles = new HashMap();
    List spdxSnippets = new ArrayList();
    
    FileSetManager fileSetManager = new FileSetManager();
    private Log log;

    /**
     * SpdxFileCollector collects SPDX file information for files
     */
    public SpdxFileCollector( Log log ) 
    {
        this.log = log;
    }
 
    /**
     * Load file type constants from the properties file
     */
    private static void loadFileExtensionConstants()
    {
        InputStream is = null;
        Properties prop = new Properties();
        try {
            is = SpdxFileCollector.class.getClassLoader().getResourceAsStream(SPDX_FILE_TYPE_CONSTANTS_PROP_PATH);
            if ( is == null ) {
                logger.error( "Unable to load properties file "+SPDX_FILE_TYPE_CONSTANTS_PROP_PATH );
            }
            prop.load(is);
            String sourceExtensionStr = prop.getProperty( SPDX_PROP_FILETYPE_SOURCE );
            loadSetUpcase( SOURCE_EXTENSIONS, sourceExtensionStr );
            String binaryExtensionStr = prop.getProperty( SPDX_PROP_FILETYPE_BINARY );
            loadSetUpcase( BINARY_EXTENSIONS, binaryExtensionStr );
            String archiveExtensionStr = prop.getProperty( SPDX_PROP_FILETYPE_ARCHIVE );
            loadSetUpcase( ARCHIVE_EXTENSIONS, archiveExtensionStr );
        }
        catch ( IOException e )
        {
            logger.warn("WARNING: Error reading SpdxFileTypeConstants properties file.  All file types will be mapped to Other.");
        } finally {
            try {
                if (is != null) {
                    is.close();
                }
            } catch (Throwable e) {
                logger.warn("WARNING: Error closing SpdxFileTypeConstants properties file");
            }
        }
    }

    /**
     * Load a set from a comma delimited string of values trimming and upcasing all values
     * @param set
     * @param str
     */
    private static void loadSetUpcase( Set set, String str )
    {
        String[] values = str.split( "," );
        for ( int i = 0; i < values.length; i++ )
        {
            set.add( values[i].toUpperCase().trim() );
        }
    }

    /**
     * Collect file information in the directory (including subdirectories).  
     * @param fileSets FileSets containing the description of the directory to be scanned
     * @param baseDir project base directory used to construct the relative paths for the SPDX files 
     * @param pathPrefix Path string which should be removed when creating the SPDX file name
     * @param defaultFileInformation Information on default SPDX field data for the files
     * @param pathSpecificInformation Map of path to file information used to override the default file information
     * @param relationshipType Type of relationship to the project package 
     * @param projectPackage Package to which the files belong
     * @param container contains the extracted license infos that may be needed for license parsing
     * @throws SpdxCollectionException 
     */
    public void collectFiles( FileSet[] fileSets,
                              String baseDir, SpdxDefaultFileInformation defaultFileInformation,
                              Map pathSpecificInformation, 
                              SpdxPackage projectPackage, RelationshipType relationshipType,
                              SpdxDocumentContainer container) throws SpdxCollectionException 
    {
       for ( int i = 0; i < fileSets.length; i++ ) 
       {
           String[] includedFiles = fileSetManager.getIncludedFiles( fileSets[i] );
           for ( int j = 0; j < includedFiles.length; j++ )
           {
               String filePath = fileSets[i].getDirectory() + File.separator + includedFiles[j];
               File file = new File( filePath );
               String relativeFilePath = file.getAbsolutePath().substring( baseDir.length() + 1 ).replace( '\\', '/' );;
               SpdxDefaultFileInformation fileInfo = findDefaultFileInformation( relativeFilePath, pathSpecificInformation );
               if ( fileInfo == null ) 
               {
                   fileInfo = defaultFileInformation;
               }
               
               String outputFileName;
               if ( fileSets[i].getOutputDirectory() != null ) 
               {
                   outputFileName = fileSets[i].getOutputDirectory() + File.separator + includedFiles[j];
               } else 
               {
                   outputFileName = file.getAbsolutePath().substring( baseDir.length() + 1 );
               }
               collectFile( file, outputFileName, fileInfo, relationshipType, projectPackage, container );
           }
       }
    }
    
    /**
     * Find the most appropriate file information based on the lowset level match (closedt to file)
     * @param filePath
     * @param pathSpecificInformation
     * @return
     */
    private SpdxDefaultFileInformation findDefaultFileInformation( String filePath,
                                                                   Map pathSpecificInformation )
    {
        if ( log != null ) {
            log.debug( "Checking for file path "+filePath );
        }      
        SpdxDefaultFileInformation retval = pathSpecificInformation.get( filePath );
        if ( retval != null ) 
        {
            if ( log != null ) {
                log.debug( "Found filepath" );
            }
            return retval;
        }
        // see if any of the parent directories contain default information which should be used
        String parentPath = filePath;
        int parentPathIndex = 0;
        do
        {
            parentPathIndex = parentPath.lastIndexOf( "/" );
            if ( parentPathIndex > 0 ) 
            {
                parentPath = parentPath.substring( 0, parentPathIndex );
                retval = pathSpecificInformation.get( parentPath );
            }
        } while ( retval == null && parentPathIndex > 0 );
        if ( retval != null )
        {
            debug( "Found directory containing file path for path specific information.  File path: "+parentPath );
        }
        return retval;
    }

    private void debug( String msg )
    {
        if ( this.getLog() != null ) 
        {
            this.getLog().debug( msg );
        } 
        else 
        {
            logger.debug( msg );
        }
    }

    /**
     * Collect SPDX information for a specific file
     * @param file
     * @param outputFileName Path to the output file name relative to the root of the output archive file
     * @param relationshipType Type of relationship to the project package 
     * @param projectPackage Package to which the files belong
     * @throws SpdxCollectionException
     */
    private void collectFile( File file, String outputFileName, 
                              SpdxDefaultFileInformation fileInfo, RelationshipType relationshipType, 
                              SpdxPackage projectPackage, SpdxDocumentContainer container ) throws SpdxCollectionException
    {
        if ( spdxFiles.containsKey( file.getPath() )) 
        {
            return; // already added from a previous scan
        }
        SpdxFile spdxFile = convertToSpdxFile( file, outputFileName, fileInfo );
        Relationship relationship = new Relationship(projectPackage, relationshipType, "");
        try
        {
            spdxFile.addRelationship( relationship );
        }
        catch ( InvalidSPDXAnalysisException e )
        {
            if ( log != null ) {
                log.error( "Spdx exception creating file relationship: "+e.getMessage(), e );   
            }
            throw new SpdxCollectionException("Error creating SPDX file relationship: "+e.getMessage());
        }
        if ( fileInfo.getSnippets() != null ) {
            for ( SnippetInfo snippet:fileInfo.getSnippets() ) {
                SpdxSnippet spdxSnippet;
                try
                {
                    spdxSnippet = convertToSpdxSnippet( snippet, spdxFile, container  );
                }
                catch ( InvalidLicenseStringException e )
                {
                    logger.error( "Invalid license string creating snippet", e );
                    throw new SpdxCollectionException( "Error processing SPDX snippet information.  Invalid license string specified in snippet.",e );
                }
                catch ( SpdxBuilderException e )
                {
                    logger.error( "Error creating SPDX snippet", e );
                    throw new SpdxCollectionException( "Error creating SPDX snippet information.",e );
                }
                spdxSnippets.add( spdxSnippet );
            }
        }
        spdxFiles.put( file.getPath(), spdxFile );
        AnyLicenseInfo[] licenseInfoFromFiles = spdxFile.getLicenseInfoFromFiles();
        for ( int j = 0; j < licenseInfoFromFiles.length; j++ ) 
        {
            licensesFromFiles.add( licenseInfoFromFiles[j] );
        }
    }

    private SpdxSnippet convertToSpdxSnippet( SnippetInfo snippet, SpdxFile spdxFile, SpdxDocumentContainer container ) throws InvalidLicenseStringException, SpdxBuilderException
    {
        //TODO: Add annotations to snippet
        SpdxSnippet retval = new SpdxSnippet(snippet.getName(), 
                                             snippet.getComment(), 
                                             new org.spdx.rdfparser.model.Annotation[0], 
                                             new Relationship[0], 
                                             snippet.getLicenseConcluded( container ), 
                                             snippet.getLicenseInfoInSnippet( container ),
                                             snippet.getCopyrightText(),
                                             snippet.getLicensComment(), 
                                             spdxFile, 
                                             snippet.getByteRange(spdxFile),
                                             snippet.getLineRange(spdxFile));
        return retval;
    }

    /**
     * @param file
     * @param outputFileName Path to the output file name relative to the root of the output archive file
     * @param defaultFileInformation Information on default SPDX field data for the files
     * @return
     * @throws SpdxCollectionException
     */
    private SpdxFile convertToSpdxFile( File file, String outputFileName,
                                        SpdxDefaultFileInformation defaultFileInformation) throws SpdxCollectionException 
    {
        String relativePath = convertFilePathToSpdxFileName( outputFileName );
        FileType[] fileTypes =  new FileType[] {extensionToFileType( getExtension( file ) )};
        String sha1 = generateSha1( file );
        AnyLicenseInfo license;
        license = defaultFileInformation.getDeclaredLicense();
        String copyright = defaultFileInformation.getCopyright();
        String notice = defaultFileInformation.getNotice();
        String comment = defaultFileInformation.getComment();
        String[] contributors = defaultFileInformation.getContributors();
        DoapProject[] artifactOf = new DoapProject[0];
        AnyLicenseInfo concludedLicense = defaultFileInformation.getConcludedLicense();
        String licenseComment = defaultFileInformation.getLicenseComment();
        SpdxFile retval = null;
        //TODO: Add annotation
        //TODO: Add optional checksums
        try
        {
            retval = new SpdxFile( relativePath, fileTypes, 
                    sha1, concludedLicense, new AnyLicenseInfo[] {license}, 
                    licenseComment, copyright, artifactOf, comment );
            retval.setFileContributors( contributors );
            retval.setNoticeText( notice );
        }
        catch ( InvalidSPDXAnalysisException e )
        {
            if ( log != null ) {
                log.error( "Spdx exception creating file: "+e.getMessage(), e );
            }
            throw new SpdxCollectionException("Error creating SPDX file: "+e.getMessage());
        }

        return retval;
    }

    /**
     * Create the SPDX file name from a system specific path name
     * @param filePath system specific file path relative to the top of the archive root
     * to the top of the archive directory where the file is stored.
     * @return
     */
    public String convertFilePathToSpdxFileName( String filePath )
    {
        String result = filePath.replace( '\\', '/' );
        if ( !result.startsWith( "./" )) 
        {
            result = "./" + result;
        }
        return result;
    }

    public String getExtension( File file ) {
        String fileName = file.getName();
        int lastDot = fileName.lastIndexOf( '.' );
        if ( lastDot < 1 ) 
        {
            return "";
        } else {
            return fileName.substring( lastDot+1 );
        }
    }

    private static FileType extensionToFileType( String fileExtension ) {
        //TODO: Add other file types
        if ( fileExtension == null ) {
            return FileType.fileType_other;
        }
        String upperExtension = fileExtension.toUpperCase();
        if ( SOURCE_EXTENSIONS.contains( upperExtension ) ) 
        {
            return FileType.fileType_source;
        } else if ( BINARY_EXTENSIONS.contains( upperExtension ) ) 
        {
            return FileType.fileType_binary;
        } else if ( ARCHIVE_EXTENSIONS.contains( upperExtension ) ) 
        {
            return FileType.fileType_archive;
        } else 
        {
            return FileType.fileType_other;
        }
        //TODO: Add new file types for SPDX 2.0
    }

    /**
     * @return SPDX Files which have been acquired through the collectFilesInDirectory method
     */
    public SpdxFile[] getFiles() 
    {
        return spdxFiles.values().toArray( new SpdxFile[spdxFiles.size()] );
    }
    
    /**
     * @return SPDX Snippets collected through the collectFilesInDirectory method
     */
    public List getSnippets() 
    {
        return this.spdxSnippets;
    }

    /**
     * @return all license information used in the SPDX files
     */
    public AnyLicenseInfo[] getLicenseInfoFromFiles() 
    {
        return licensesFromFiles.toArray( new AnyLicenseInfo[licensesFromFiles.size()] );
    }

    /**
     * Create a verification code from all SPDX files collected
     * @param spdxFilePath Complete file path for the SPDX file - this will be excluded from the verification code
     * @return
     * @throws NoSuchAlgorithmException
     */
    public SpdxPackageVerificationCode getVerificationCode( String spdxFilePath ) throws NoSuchAlgorithmException 
    {
        ArrayList excludedFileNamesFromVerificationCode = new ArrayList();

        if ( spdxFilePath != null && spdxFiles.containsKey( spdxFilePath ) ) 
        {
            excludedFileNamesFromVerificationCode.add( spdxFiles.get( spdxFilePath ).getName() );
        }            
        SpdxPackageVerificationCode verificationCode;
        verificationCode = calculatePackageVerificationCode( spdxFiles.values(), excludedFileNamesFromVerificationCode );
        return verificationCode;
    }

    /**
     * Calculate the package verification code for a collection of SPDX files
     * @param spdxFiles Files used to calculate the verification code
     * @param excludedFileNamesFromVerificationCode List of file names to exclude
     * @return
     * @throws NoSuchAlgorithmException
     */
    private SpdxPackageVerificationCode calculatePackageVerificationCode(
            Collection spdxFiles,
            ArrayList excludedFileNamesFromVerificationCode ) throws NoSuchAlgorithmException 
    {
        ArrayList fileChecksums = new ArrayList();
        Iterator iter = spdxFiles.iterator();
        while ( iter.hasNext() ) 
        {
            SpdxFile file = iter.next();
            if ( includeInVerificationCode( file.getName(), excludedFileNamesFromVerificationCode ) ) 
            {
                fileChecksums.add( file.getSha1() );
            }
        }
        Collections.sort( fileChecksums );
        MessageDigest verificationCodeDigest = MessageDigest.getInstance( "SHA-1" );
        for ( int i = 0;i < fileChecksums.size(); i++ ) 
        {
            byte[] hashInput = fileChecksums.get( i ).getBytes( Charset.forName( "UTF-8" ) );
            verificationCodeDigest.update( hashInput );
        }
        String value = convertChecksumToString( verificationCodeDigest.digest() );
        return new SpdxPackageVerificationCode( value, excludedFileNamesFromVerificationCode.toArray(
                new String[excludedFileNamesFromVerificationCode.size()] ) );
    }

    private boolean includeInVerificationCode( String name, ArrayList excludedFileNamesFromVerificationCode ) 
    {
        for ( int i = 0; i < excludedFileNamesFromVerificationCode.size(); i++ ) 
        {
            if ( excludedFileNamesFromVerificationCode.get( i ).equals( name ) ) 
            {
                return false;
            }
        }
        return true;
    }
    
    /**
     * Converts an array of bytes to a string compliant with the SPDX sha1 representation
     * @param digestBytes
     * @return
     */
    public static String convertChecksumToString( byte[] digestBytes ) 
    {
        StringBuilder sb = new StringBuilder();   
        for ( int i = 0; i < digestBytes.length; i++ ) 
        {
            String hex = Integer.toHexString( 0xff & digestBytes[i] );
            if ( hex.length() < 2 ) 
            {
                sb.append( '0' );
            }
            sb.append( hex );
        }
        return sb.toString();
    }
    
    /**
     * Generate the Sha1 for a given file.  Must have read access to the file.
     * @param file
     * @return
     * @throws SpdxCollectionException
     */
    public static String generateSha1( File file ) throws SpdxCollectionException 
    {
        if ( digest == null ) 
        {
            try {
                digest = MessageDigest.getInstance( SHA1_ALGORITHM );
            } catch ( NoSuchAlgorithmException e ) 
            {
                throw( new SpdxCollectionException( "Unable to create the message digest for generating the File SHA1" ) );
            }
        }
        digest.reset();
        InputStream in;
        try 
        {
            in = new FileInputStream( file );
        } catch ( IOException e1 ) 
        {
            throw( new SpdxCollectionException( "IO getting file content while calculating the SHA1" ) );
        }
        try {
            byte[] buffer = new byte[2048];
            int numBytes = in.read( buffer );
            while ( numBytes >= 0 ) 
            {
                digest.update( buffer, 0, numBytes );
                numBytes = in.read( buffer );
            }
            return convertChecksumToString( digest.digest() );
        } catch ( IOException e ) 
        {
            throw( new SpdxCollectionException( "IO error reading file input stream while calculating the SHA1" ) );
        } finally {
            try 
            {
                if ( in != null ) 
                {
                      in.close(); 
                }
            } catch ( IOException e ) 
            {
                throw( new SpdxCollectionException( "IO error closing file input stream while calculating the SHA1" ) );
            }
        }
    }

    public void setLog( Log log )
    {
        this.log = log;
    }
    
    private Log getLog()
    {
        return this.log;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy