org.whitesource.agent.hash.HashCalculator Maven / Gradle / Ivy
/**
* Copyright (C) 2017 White Source Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.whitesource.agent.hash;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.BOMInputStream;
import org.apache.commons.lang.StringUtils;
import org.slf4j.LoggerFactory;
import org.whitesource.agent.api.model.ChecksumType;
import org.whitesource.agent.api.model.DependencyType;
import org.whitesource.agent.parser.JavaScriptParser;
import org.whitesource.agent.parser.ParseResult;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.MessageFormat;
import java.util.Arrays;
import java.util.Collection;
import java.util.EnumMap;
import java.util.Map;
/**
* Utility class to calculate SHA-1 hash codes for files.
*
* @author anna.rozin
* @author tom.shapira
*/
public class HashCalculator {
/* --- Static members --- */
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(HashCalculator.class);
private static final int BUFFER_SIZE = 32 * 1024;
private static final int FILE_MIN_SIZE_THRESHOLD = 512;
private static final int FILE_PARTIAL_HASH_MIN_SIZE = 1024 * 2;
private static final int FILE_SMALL_SIZE = 1024 * 3;
private static final int FILE_MAX_SIZE_THRESHOLD = Integer.MAX_VALUE;
private static final double FILE_SMALL_BUCKET_SIZE = 1024 * 1.25;
private static final char ZERO = '0';
private static final byte CARRIAGE_RETURN = (byte) 0x0d;
private static final byte NEW_LINE = (byte) 0x0a;
private static final byte HORIZONTAL_TAB = (byte) 0x09;
private static final byte SPACE = (byte) 0x20;
private static final Collection WHITESPACES = Arrays.asList(CARRIAGE_RETURN, NEW_LINE, HORIZONTAL_TAB, SPACE);
private static final String UTF_8 = "utf-8";
private static final String UNDERSCORE = "_";
/* --- Static methods --- */
/**
* Calculates 3 hashes for the given file:
*
* 1. Hash of the file without new lines and whitespaces
* 2. Hash of the most significant bits of the file without new lines and whitespaces
* 3. Hash of the least significant bits of the file without new lines and whitespaces
*
* @param file input
* @return HashCalculationResult with all three hashes
* @throws IOException exception1
*/
public HashCalculationResult calculateSuperHash(File file) throws IOException {
// Ignore files smaller than 0.5kb
long fileSize = file.length();
if (fileSize <= FILE_MIN_SIZE_THRESHOLD) {
logger.debug("Ignored file " + file.getName() + " (" + FileUtils.byteCountToDisplaySize(fileSize)
+ "): minimum file size is 512B");
return null;
}
if (fileSize >= FILE_MAX_SIZE_THRESHOLD) {
logger.debug("Ignore file {}, ({}): maximum file size is 2GB", file.getName(),
FileUtils.byteCountToDisplaySize(fileSize));
return null;
}
HashCalculationResult result = null;
try {
result = calculateSuperHash(FileUtils.readFileToByteArray(file));
} catch (OutOfMemoryError e) {
logger.debug(MessageFormat.format("Failed calculating SHA-1 for file {0}: size too big {1}",
file.getAbsolutePath(), FileUtils.byteCountToDisplaySize(fileSize)));
}
return result;
}
/**
* Calculates 3 hashes for the given bytes:
*
* 1. Hash of the file without new lines and whitespaces
* 2. Hash of the most significant bits of the file without new lines and whitespaces
* 3. Hash of the least significant bits of the file without new lines and whitespaces
*
* @param bytes to calculate
* @return HashCalculationResult with all three hashes
* @throws IOException exception2
*/
public HashCalculationResult calculateSuperHash(byte[] bytes) throws IOException {
HashCalculationResult result = null;
// Remove white spaces
byte[] bytesWithoutSpaces = stripWhiteSpaces(bytes);
long fileSize = bytesWithoutSpaces.length;
if (fileSize < FILE_MIN_SIZE_THRESHOLD) {
// Ignore files smaller 1/2 kb
logger.debug("Ignoring file with size " + FileUtils.byteCountToDisplaySize(fileSize) + ": minimum file size is 512B");
} else if (fileSize <= FILE_PARTIAL_HASH_MIN_SIZE) {
// Don't calculate msb and lsb hashes for files smaller than 2kb
String fullFileHash = calculateByteArrayHash(bytesWithoutSpaces, HashAlgorithm.SHA1);
result = new HashCalculationResult(fullFileHash);
} else if (fileSize <= FILE_SMALL_SIZE) {
// Handle 2kb->3kb files
result = hashBuckets(bytesWithoutSpaces, FILE_SMALL_BUCKET_SIZE);
} else {
int baseLowNumber = 1;
int digits = (int) Math.log10(fileSize);
int i = 0;
while (i < digits) {
baseLowNumber = baseLowNumber * 10;
i++;
}
double highNumber = Math.ceil((fileSize + 1) / (float) baseLowNumber) * baseLowNumber;
double lowNumber = highNumber - baseLowNumber;
double bucketSize = (highNumber + lowNumber) / 4;
result = hashBuckets(bytesWithoutSpaces, bucketSize);
}
return result;
}
/**
* Calculates the given file's SHA-1 hash code.
*
* @param resourceFile File to calculate
* @return Calculated SHA-1 for the given file.
* @throws IOException on file reading errors.
* @throws IllegalStateException when no algorithm for SHA-1 can be found.
*/
public String calculateSHA1(File resourceFile) throws IOException {
return calculateHash(resourceFile, HashAlgorithm.SHA1);
}
public String calculateHash(File resourceFile, HashAlgorithm algorithm) throws IOException {
MessageDigest messageDigest;
try {
messageDigest = MessageDigest.getInstance(algorithm.getAlgorithm());
} catch (NoSuchAlgorithmException e) {
throw new IllegalStateException(e.getMessage(), e);
}
try (InputStream inputStream = new FileInputStream(resourceFile);
BOMInputStream fis = new BOMInputStream(inputStream)) {
byte[] buffer = new byte[BUFFER_SIZE];
int len = fis.read(buffer, 0, BUFFER_SIZE);
while (len >= 0) {
messageDigest.update(buffer, 0, len);
len = fis.read(buffer, 0, BUFFER_SIZE);
}
}
return toHex(messageDigest.digest());
}
/**
* Calculates the given file's SHA-1 hash code.
*
* @param byteArray to calculate
* @return Calculated SHA-1 for the given byteArray.
* @throws IOException when no algorithm for SHA-1 can be found.
*/
public String calculateByteArraySHA1(byte[] byteArray) throws IOException {
return calculateByteArrayHash(byteArray, HashAlgorithm.SHA1);
}
public String calculateByteArrayHash(byte[] byteArray, HashAlgorithm algorithm) throws IOException {
MessageDigest messageDigest;
try {
messageDigest = MessageDigest.getInstance(algorithm.getAlgorithm());
} catch (NoSuchAlgorithmException e) {
throw new IllegalStateException(e.getMessage(), e);
}
messageDigest.update(byteArray, 0, byteArray.length);
return toHex(messageDigest.digest());
}
/**
* Removes all JavaScript comments from the file and calculates SHA-1 checksum.
*
* @param file to calculate
* @return Calculated SHA-1 checksums for the given file.
*/
public Map calculateJavaScriptHashes(File file) throws WssHashException {
Map checksums = new EnumMap<>(ChecksumType.class);
try {
long fileLength = file.length();
if (fileLength >= FILE_MAX_SIZE_THRESHOLD) {
logger.debug("Ignore file {}, ({}): maximum file size is 2GB", file.getName(),
FileUtils.byteCountToDisplaySize(fileLength));
return checksums;
}
checksums = calculateJavaScriptHashes(FileUtils.readFileToByteArray(file));
} catch (Exception e) {
throw new WssHashException("Error calculating JavaScript hash: " + e.getMessage());
}
return checksums;
}
/**
* Removes all JavaScript header comments from the file and calculates SHA-1 checksum.
*
* @param byteArray to calculate
* @return Calculated SHA-1 for the given file.
*/
public Map calculateJavaScriptHashes(byte[] byteArray) throws WssHashException {
Map checksums = new EnumMap<>(ChecksumType.class);
try {
String fileContent = IOUtils.toString(byteArray, UTF_8);
ParseResult parseResult = new JavaScriptParser().parse(fileContent);
if (parseResult != null) {
// no comments
String contentWithoutComments = parseResult.getContentWithoutComments();
if (StringUtils.isNotBlank(contentWithoutComments)) {
HashCalculationResult noCommentsSha1 = calculateSuperHash(contentWithoutComments.getBytes());
if (noCommentsSha1 != null) {
checksums.put(ChecksumType.SHA1_NO_COMMENTS_SUPER_HASH, noCommentsSha1.getFullHash());
}
}
// no headers
String headerlessContent = parseResult.getContentWithoutHeaderComments();
if (StringUtils.isNotBlank(headerlessContent)) {
String headerlessChecksum = calculateByteArrayHash(headerlessContent.getBytes(), HashAlgorithm.SHA1);
checksums.put(ChecksumType.SHA1_NO_HEADER, headerlessChecksum);
}
}
} catch (Exception e) {
throw new WssHashException("Error calculating JavaScript hash: " + e.getMessage());
}
return checksums;
}
/**
* Calculates SHA-1 for library by name, version and dependencyType
*
* @param groupId of library
* @param artifactId of library
* @param version of library
* @param dependencyType of library
* @return Calculated SHA-1 for library by name, version and dependencyType
* @throws IOException when failed to calculate sha-1
*/
public String calculateSha1ByGAVCoordinatesAndType(String groupId, String artifactId, String version,
DependencyType dependencyType) throws IOException {
String sha1ToCalc = groupId.toLowerCase() + UNDERSCORE + artifactId.toLowerCase() + UNDERSCORE
+ version.toLowerCase() + UNDERSCORE + dependencyType.toString();
return calculateByteArraySHA1(sha1ToCalc.getBytes(StandardCharsets.UTF_8));
}
/**
* Calculates SHA-1 for library by name, version, architecture, release and dependencyType
*
* @param name of library
* @param version of library
* @param architecture of library
* @param release of library
* @param dependencyType of library
* @return Calculated SHA-1 for library by name, version, architecture, release and dependencyType
* @throws IOException when failed to calculate sha-1
*/
public String calculateSha1ByNameVersionArchitectureReleaseAndType(String name, String version, String architecture,
String release, DependencyType dependencyType) throws IOException {
String sha1ToCalc = name + UNDERSCORE + version + UNDERSCORE + architecture + UNDERSCORE +
release + UNDERSCORE + dependencyType.toString();
return calculateByteArraySHA1(sha1ToCalc.getBytes(StandardCharsets.UTF_8));
}
/**
* Calculates SHA-1 for library by name, version, architecture, branch and dependencyType
*
* @param name of library
* @param version of library
* @param architecture of library
* @param branch of operating system
* @param dependencyType of library
* @return Calculated SHA-1 for library by name, version, architecture, branch and dependencyType
* @throws IOException when failed to calculate sha-1
*/
public String calculateSha1ByNameVersionArchitectureBranchAndType(String name, String version, String architecture,
String branch, DependencyType dependencyType) throws IOException {
String sha1ToCalc = name + UNDERSCORE + version + UNDERSCORE + architecture + UNDERSCORE +
branch + UNDERSCORE + dependencyType.toString();
return calculateByteArraySHA1(sha1ToCalc.getBytes(StandardCharsets.UTF_8));
}
/**
* Calculates SHA-1 for library by name, version, architecture and dependencyType
*
* @param name of library
* @param version of library
* @param architecture of library
* @param dependencyType of library
* @return Calculated SHA-1 for library by name, version, architecture and dependencyType
* @throws IOException when failed to calculate sha-1
*/
public String calculateSha1ByNameVersionArchitectureAndType(String name, String version, String architecture,
DependencyType dependencyType) throws IOException {
String sha1ToCalc = name + UNDERSCORE + version + UNDERSCORE + architecture + UNDERSCORE + dependencyType.toString();
return calculateByteArraySHA1(sha1ToCalc.getBytes(StandardCharsets.UTF_8));
}
/**
* Calculates SHA-1 for library by recipeName_recipeVersion_recipeRevision_layer_yoctoTag_language
* Example for use in yocto:
*
* @param name of library
* @param version of library
* @param revision of library
* @param layer of library
* @param tag of library
* @param type library
* @return Calculated SHA-1 of name_version_revision_layer_tag_type
* @throws IOException when failed to calculate sha-1
*/
public String calculateSha1ByNameVersionRevisionLayerTagLanguage(String name, String version, String revision,
String layer, String tag ,
DependencyType type) throws IOException {
String sha1ToCalc = name + UNDERSCORE + version + UNDERSCORE + revision + UNDERSCORE + layer +
UNDERSCORE + tag + UNDERSCORE + type.toString();
return calculateByteArraySHA1(sha1ToCalc.getBytes(StandardCharsets.UTF_8));
}
/**
* Calculates SHA-1 for library by name, version and dependencyType
*
* @param name of library
* @param version of library
* @param dependencyType of library
* @return Calculated SHA-1 for library by name, version and dependencyType
* @throws IOException when failed to calculate sha-1
*/
public String calculateSha1ByNameVersionAndType(String name, String version, DependencyType dependencyType) throws IOException {
String sha1ToCalc = name + UNDERSCORE + version + UNDERSCORE + dependencyType.toString();
return calculateByteArraySHA1(sha1ToCalc.getBytes(StandardCharsets.UTF_8));
}
/* --- Private static methods --- */
private HashCalculationResult hashBuckets(byte[] fileWithoutSpaces, double bucketSize) throws IOException {
// int(bucket_size) will round down the bucket_size: IE: 1.2 -> 1.0
int bucketIntSize = (int) bucketSize;
// Get bytes and calculate sha1
byte[] mostSigBytes = Arrays.copyOfRange(fileWithoutSpaces, 0, bucketIntSize);
int length = fileWithoutSpaces.length;
byte[] leastSigBytes = Arrays.copyOfRange(fileWithoutSpaces, length - bucketIntSize, length);
String fullFileHash = calculateByteArraySHA1(fileWithoutSpaces);
return new HashCalculationResult(fullFileHash);
}
private String toHex(byte[] bytes) {
StringBuilder sb = new StringBuilder(bytes.length * 2);
for (byte aByte : bytes) {
int b = aByte & 0xFF;
if (b < 0x10) {
sb.append(ZERO);
}
sb.append(Integer.toHexString(b));
}
return sb.toString();
}
/**
* Removes all whitespaces from the text - the same way that Shir is doing for source files.
*
* @param data - byte array
* @return file as string
*/
private byte[] stripWhiteSpaces(byte[] data) {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
for (byte b : data) {
if (!WHITESPACES.contains(b)) {
bos.write(b);
}
}
return bos.toByteArray();
}
}