org.whitesource.agent.hash.HashCalculator Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of wss-agent-hash-calculator Show documentation
There is a newer version: 18.4.1
Show newest version
/**
 * Copyright (C) 2017 White Source Ltd.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 

 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.whitesource.agent.hash;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.BOMInputStream;
import org.apache.commons.lang.StringUtils;
import org.slf4j.LoggerFactory;
import org.whitesource.agent.api.model.ChecksumType;
import org.whitesource.agent.api.model.DependencyType;
import org.whitesource.agent.parser.JavaScriptParser;
import org.whitesource.agent.parser.ParseResult;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.MessageFormat;
import java.util.Arrays;
import java.util.Collection;
import java.util.EnumMap;
import java.util.Map;

/**
 * Utility class to calculate SHA-1 hash codes for files.
 *
 * @author anna.rozin
 * @author tom.shapira
 */
public class HashCalculator {

    /* --- Static members --- */

    private static final org.slf4j.Logger logger = LoggerFactory.getLogger(HashCalculator.class);

    private static final int BUFFER_SIZE = 32 * 1024;

    private static final int FILE_MIN_SIZE_THRESHOLD = 512;
    private static final int FILE_PARTIAL_HASH_MIN_SIZE = 1024 * 2;
    private static final int FILE_SMALL_SIZE = 1024 * 3;
    private static final int FILE_MAX_SIZE_THRESHOLD = Integer.MAX_VALUE;

    private static final double FILE_SMALL_BUCKET_SIZE = 1024 * 1.25;

    private static final char ZERO = '0';

    private static final byte CARRIAGE_RETURN = (byte) 0x0d;
    private static final byte NEW_LINE = (byte) 0x0a;
    private static final byte HORIZONTAL_TAB = (byte) 0x09;
    private static final byte SPACE = (byte) 0x20;

    private static final Collection WHITESPACES = Arrays.asList(CARRIAGE_RETURN, NEW_LINE, HORIZONTAL_TAB, SPACE);
    private static final String UTF_8 = "utf-8";

    private static final String UNDERSCORE = "_";

    /* --- Static methods --- */

    /**
     * Calculates 3 hashes for the given file:
     *
     * 1. Hash of the file without new lines and whitespaces
     * 2. Hash of the most significant bits of the file without new lines and whitespaces
     * 3. Hash of the least significant bits of the file without new lines and whitespaces
     *
     * @param file input
     * @return HashCalculationResult with all three hashes
     * @throws IOException exception1
     */
    public HashCalculationResult calculateSuperHash(File file) throws IOException {
        // Ignore files smaller than 0.5kb
        long fileSize = file.length();
        if (fileSize <= FILE_MIN_SIZE_THRESHOLD) {
            logger.debug("Ignored file " + file.getName() + " (" + FileUtils.byteCountToDisplaySize(fileSize)
                    + "): minimum file size is 512B");
            return null;
        }
        if (fileSize >= FILE_MAX_SIZE_THRESHOLD) {
            logger.debug("Ignore file {}, ({}): maximum file size is 2GB", file.getName(),
                    FileUtils.byteCountToDisplaySize(fileSize));
            return null;
        }

        HashCalculationResult result = null;
        try {
            result = calculateSuperHash(FileUtils.readFileToByteArray(file));
        } catch (OutOfMemoryError e) {
            logger.debug(MessageFormat.format("Failed calculating SHA-1 for file {0}: size too big {1}",
                    file.getAbsolutePath(), FileUtils.byteCountToDisplaySize(fileSize)));
        }
        return result;
    }

    /**
     * Calculates 3 hashes for the given bytes:
     *
     * 1. Hash of the file without new lines and whitespaces
     * 2. Hash of the most significant bits of the file without new lines and whitespaces
     * 3. Hash of the least significant bits of the file without new lines and whitespaces
     *
     * @param bytes to calculate
     * @return HashCalculationResult with all three hashes
     * @throws IOException exception2
     */
    public HashCalculationResult calculateSuperHash(byte[] bytes) throws IOException {
        HashCalculationResult result = null;

        // Remove white spaces
        byte[] bytesWithoutSpaces = stripWhiteSpaces(bytes);

        long fileSize = bytesWithoutSpaces.length;
        if (fileSize < FILE_MIN_SIZE_THRESHOLD) {
            // Ignore files smaller 1/2 kb
            logger.debug("Ignoring file with size " + FileUtils.byteCountToDisplaySize(fileSize) + ": minimum file size is 512B");
        } else if (fileSize <= FILE_PARTIAL_HASH_MIN_SIZE) {
            // Don't calculate msb and lsb hashes for files smaller than 2kb
            String fullFileHash = calculateByteArrayHash(bytesWithoutSpaces, HashAlgorithm.SHA1);
            result = new HashCalculationResult(fullFileHash);
        } else if (fileSize <= FILE_SMALL_SIZE) {
            // Handle 2kb->3kb files
            result = hashBuckets(bytesWithoutSpaces, FILE_SMALL_BUCKET_SIZE);
        } else {
            int baseLowNumber = 1;
            int digits = (int) Math.log10(fileSize);
            int i = 0;
            while (i < digits) {
                baseLowNumber = baseLowNumber * 10;
                i++;
            }
            double highNumber = Math.ceil((fileSize + 1) / (float) baseLowNumber) * baseLowNumber;
            double lowNumber = highNumber - baseLowNumber;
            double bucketSize = (highNumber + lowNumber) / 4;
            result = hashBuckets(bytesWithoutSpaces, bucketSize);
        }
        return result;
    }

    /**
     * Calculates the given file's SHA-1 hash code.
     *
     * @param resourceFile File to calculate
     * @return Calculated SHA-1 for the given file.
     * @throws IOException           on file reading errors.
     * @throws IllegalStateException when no algorithm for SHA-1 can be found.
     */
    public String calculateSHA1(File resourceFile) throws IOException {
        return calculateHash(resourceFile, HashAlgorithm.SHA1);
    }

    public String calculateHash(File resourceFile, HashAlgorithm algorithm) throws IOException {
        MessageDigest messageDigest;
        try {
            messageDigest = MessageDigest.getInstance(algorithm.getAlgorithm());
        } catch (NoSuchAlgorithmException e) {
            throw new IllegalStateException(e.getMessage(), e);
        }

        try (InputStream inputStream = new FileInputStream(resourceFile);
             BOMInputStream fis = new BOMInputStream(inputStream)) {
            byte[] buffer = new byte[BUFFER_SIZE];
            int len = fis.read(buffer, 0, BUFFER_SIZE);
            while (len >= 0) {
                messageDigest.update(buffer, 0, len);
                len = fis.read(buffer, 0, BUFFER_SIZE);
            }
        }
        return toHex(messageDigest.digest());
    }

    /**
     * Calculates the given file's SHA-1 hash code.
     *
     * @param byteArray to calculate
     * @return Calculated SHA-1 for the given byteArray.
     * @throws IOException when no algorithm for SHA-1 can be found.
     */
    public String calculateByteArraySHA1(byte[] byteArray) throws IOException {
        return calculateByteArrayHash(byteArray, HashAlgorithm.SHA1);
    }

    public String calculateByteArrayHash(byte[] byteArray, HashAlgorithm algorithm) throws IOException {
        MessageDigest messageDigest;
        try {
            messageDigest = MessageDigest.getInstance(algorithm.getAlgorithm());
        } catch (NoSuchAlgorithmException e) {
            throw new IllegalStateException(e.getMessage(), e);
        }
        messageDigest.update(byteArray, 0, byteArray.length);
        return toHex(messageDigest.digest());
    }

    /**
     * Removes all JavaScript comments from the file and calculates SHA-1 checksum.
     *
     * @param file to calculate
     * @return Calculated SHA-1 checksums for the given file.
     */
    public Map calculateJavaScriptHashes(File file) throws WssHashException {
        Map checksums = new EnumMap<>(ChecksumType.class);
        try {
            long fileLength = file.length();
            if (fileLength >= FILE_MAX_SIZE_THRESHOLD) {
                logger.debug("Ignore file {}, ({}): maximum file size  is 2GB", file.getName(),
                        FileUtils.byteCountToDisplaySize(fileLength));
                return checksums;
            }
            checksums = calculateJavaScriptHashes(FileUtils.readFileToByteArray(file));
        } catch (Exception e) {
            throw new WssHashException("Error calculating JavaScript hash: " + e.getMessage());
        }
        return checksums;
    }

    /**
     * Removes all JavaScript header comments from the file and calculates SHA-1 checksum.
     *
     * @param byteArray to calculate
     * @return Calculated SHA-1 for the given file.
     */
    public Map calculateJavaScriptHashes(byte[] byteArray) throws WssHashException {
        Map checksums = new EnumMap<>(ChecksumType.class);
        try {
            String fileContent = IOUtils.toString(byteArray, UTF_8);
            ParseResult parseResult = new JavaScriptParser().parse(fileContent);
            if (parseResult != null) {
                // no comments
                String contentWithoutComments = parseResult.getContentWithoutComments();
                if (StringUtils.isNotBlank(contentWithoutComments)) {
                    HashCalculationResult noCommentsSha1 = calculateSuperHash(contentWithoutComments.getBytes());
                    if (noCommentsSha1 != null) {
                        checksums.put(ChecksumType.SHA1_NO_COMMENTS_SUPER_HASH, noCommentsSha1.getFullHash());
                    }
                }

                // no headers
                String headerlessContent = parseResult.getContentWithoutHeaderComments();
                if (StringUtils.isNotBlank(headerlessContent)) {
                    String headerlessChecksum = calculateByteArrayHash(headerlessContent.getBytes(), HashAlgorithm.SHA1);
                    checksums.put(ChecksumType.SHA1_NO_HEADER, headerlessChecksum);
                }
            }
        } catch (Exception e) {
            throw new WssHashException("Error calculating JavaScript hash: " + e.getMessage());
        }
        return checksums;
    }

    /**
     * Calculates SHA-1 for library by name, version and dependencyType
     *
     * @param groupId of library
     * @param artifactId of library
     * @param version of library
     * @param dependencyType of library
     * @return Calculated SHA-1 for library by name, version and dependencyType
     * @throws IOException when failed to calculate sha-1
     */
    public String calculateSha1ByGAVCoordinatesAndType(String groupId, String artifactId, String version,
                                                       DependencyType dependencyType) throws IOException {
        String sha1ToCalc = groupId.toLowerCase() + UNDERSCORE + artifactId.toLowerCase() + UNDERSCORE
                + version.toLowerCase() + UNDERSCORE + dependencyType.toString();

        return calculateByteArraySHA1(sha1ToCalc.getBytes(StandardCharsets.UTF_8));
    }

    /**
     * Calculates SHA-1 for library by name, version, architecture, release and dependencyType
     *
     * @param name of library
     * @param version of library
     * @param architecture of library
     * @param release of library
     * @param dependencyType of library
     * @return Calculated SHA-1 for library by name, version, architecture, release and dependencyType
     * @throws IOException when failed to calculate sha-1
     */
    public String calculateSha1ByNameVersionArchitectureReleaseAndType(String name, String version, String architecture,
                                                                       String release, DependencyType dependencyType) throws IOException {
        String sha1ToCalc = name + UNDERSCORE + version + UNDERSCORE + architecture + UNDERSCORE +
                release + UNDERSCORE + dependencyType.toString();
        return calculateByteArraySHA1(sha1ToCalc.getBytes(StandardCharsets.UTF_8));
    }

    /**
     * Calculates SHA-1 for library by name, version, architecture, branch and dependencyType
     *
     * @param name of library
     * @param version of library
     * @param architecture of library
     * @param branch of operating system
     * @param dependencyType of library
     * @return Calculated SHA-1 for library by name, version, architecture, branch and dependencyType
     * @throws IOException when failed to calculate sha-1
     */
    public String calculateSha1ByNameVersionArchitectureBranchAndType(String name, String version, String architecture,
                                                                      String branch, DependencyType dependencyType) throws IOException {
        String sha1ToCalc = name + UNDERSCORE + version + UNDERSCORE + architecture + UNDERSCORE +
                branch + UNDERSCORE + dependencyType.toString();
        return calculateByteArraySHA1(sha1ToCalc.getBytes(StandardCharsets.UTF_8));
    }

    /**
     * Calculates SHA-1 for library by name, version, architecture and dependencyType
     *
     * @param name of library
     * @param version of library
     * @param architecture of library
     * @param dependencyType of library
     * @return Calculated SHA-1 for library by name, version, architecture and dependencyType
     * @throws IOException when failed to calculate sha-1
     */
    public String calculateSha1ByNameVersionArchitectureAndType(String name, String version, String architecture,
                                                                DependencyType dependencyType) throws IOException {
        String sha1ToCalc = name + UNDERSCORE + version + UNDERSCORE + architecture + UNDERSCORE + dependencyType.toString();
        return calculateByteArraySHA1(sha1ToCalc.getBytes(StandardCharsets.UTF_8));
    }

    /**
     * Calculates SHA-1 for library by recipeName_recipeVersion_recipeRevision_layer_yoctoTag_language
     * Example for use in yocto:
     *
     * @param name of library
     * @param version of library
     * @param revision of library
     * @param layer of library
     * @param tag of library
     * @param type library
     * @return Calculated SHA-1 of  name_version_revision_layer_tag_type
     * @throws IOException when failed to calculate sha-1
     */
    public String calculateSha1ByNameVersionRevisionLayerTagLanguage(String name, String version, String revision,
                                                                     String layer, String tag ,
                                                                     DependencyType type) throws IOException {
        String sha1ToCalc = name + UNDERSCORE + version + UNDERSCORE + revision + UNDERSCORE + layer +
                UNDERSCORE + tag + UNDERSCORE + type.toString();
        return calculateByteArraySHA1(sha1ToCalc.getBytes(StandardCharsets.UTF_8));
    }

    /**
     * Calculates SHA-1 for library by name, version and dependencyType
     *
     * @param name of library
     * @param version of library
     * @param dependencyType of library
     * @return Calculated SHA-1 for library by name, version and dependencyType
     * @throws IOException when failed to calculate sha-1
     */
    public String calculateSha1ByNameVersionAndType(String name, String version, DependencyType dependencyType) throws IOException {
        String sha1ToCalc = name + UNDERSCORE + version + UNDERSCORE + dependencyType.toString();
        return calculateByteArraySHA1(sha1ToCalc.getBytes(StandardCharsets.UTF_8));
    }

    /* --- Private static methods --- */

    private HashCalculationResult hashBuckets(byte[] fileWithoutSpaces, double bucketSize) throws IOException {
        // int(bucket_size) will round down the bucket_size: IE: 1.2 -> 1.0
        int bucketIntSize = (int) bucketSize;

        // Get bytes and calculate sha1
        byte[] mostSigBytes = Arrays.copyOfRange(fileWithoutSpaces, 0, bucketIntSize);
        int length = fileWithoutSpaces.length;
        byte[] leastSigBytes = Arrays.copyOfRange(fileWithoutSpaces, length - bucketIntSize, length);
        String fullFileHash = calculateByteArraySHA1(fileWithoutSpaces);
        return new HashCalculationResult(fullFileHash);
    }

    private String toHex(byte[] bytes) {
        StringBuilder sb = new StringBuilder(bytes.length * 2);
        for (byte aByte : bytes) {
            int b = aByte & 0xFF;
            if (b < 0x10) {
                sb.append(ZERO);
            }
            sb.append(Integer.toHexString(b));
        }
        return sb.toString();
    }

    /**
     * Removes all whitespaces from the text - the same way that Shir is doing for source files.
     *
     * @param data - byte array
     * @return file as string
     */
    private byte[] stripWhiteSpaces(byte[] data) {
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        for (byte b : data) {
            if (!WHITESPACES.contains(b)) {
                bos.write(b);
            }
        }
        return bos.toByteArray();
    }
}