dev.brachtendorf.jimagehash.hash.Hash Maven / Gradle / Ivy
Show all versions of JImageHash Show documentation
package dev.brachtendorf.jimagehash.hash;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.math.BigInteger;
import dev.brachtendorf.Require;
import dev.brachtendorf.StringUtil;
import dev.brachtendorf.graphics.FastPixel;
import dev.brachtendorf.jimagehash.hashAlgorithms.HashingAlgorithm;
import javafx.scene.paint.Color;
/**
* Hashes are bit encoded encoded values (0101011101) created from images using
* a hashing algorithm. Hashes enable a quick approximate similarity comparison
* between images while only storing a fraction of the original data.
*
*
* They are created from images down scaling information and enabling quick
* comparison between instances produced by the same algorithm. Every bit in the
* hash usually represents a section of the image containing certain information
* (hue, brightness, color, frequencies or gradients)
*
* @author Kilian
* @since 1.0.0
* @since 3.0.0 Serializable
*/
public class Hash implements Serializable {
private static final long serialVersionUID = 3045682506632674223L;
/**
* Unique identifier of the algorithm and settings used to create the hash
*/
protected int algorithmId;
/**
* Hash value representation
*
* Hashes are constructed by left shifting BigIntegers with either Zero or One
* depending on the condition found in the image. Preceding 0's will be
* truncated therefore it is the algorithms responsibility to add a 1 padding
* bit at the beginning new BigInteger("011011) new BigInteger("000101) 1xxxxx
*
*/
protected BigInteger hashValue;
/**
* How many bits does this hash represent. Necessary due to suffix 0 bits
* beginning dropped.
*/
protected int hashLength;
/**
* Creates a Hash object with the specified hashValue and algorithmId. To allow
* save comparison of different hashes they have to be generated by the same
* algorithm.
*
* @param hashValue The hash value describing the image
* @param hashLength the actual bit resolution of the hash. The bigInteger
* truncates leading zero bits resulting in a loss of length
* information.
* @param algorithmId Unique identifier of the algorithm used to create this
* hash
*/
public Hash(BigInteger hashValue, int hashLength, int algorithmId) {
this.hashValue = hashValue;
this.algorithmId = algorithmId;
this.hashLength = hashLength;
}
/**
* Calculate the hamming distance of 2 hash values. The distance of two hashes
* is the difference of the individual bits found in the hash.
*
* The hamming distance falls within [0-bitResolution]. Lower values indicate
* closer similarity while identical images must return a score of 0. On the
* flip side score of 0 does not mean images have to be identical!
*
*
* A longer hash (higher bitResolution) will increase the average hamming
* distance returned. While this method allows for the most accurate fine tuning
* of the distance {@link #normalizedHammingDistance(Hash)} is hash length
* independent.
*
*
* Please be aware that only hashes produced by the same algorithm with the same
* settings will return meaningful result and should be compared. This method
* will check if the hashes are compatible if no additional check is required
* see {@link #hammingDistanceFast(Hash)}
*
* @param h The hash to calculate the distance to
* @return similarity value ranging between [0 - hash length]
*/
public int hammingDistance(Hash h) {
if (this.algorithmId != h.algorithmId) {
throw new IllegalArgumentException("Can't compare two hash values created by different algorithms");
}
return hammingDistanceFast(h);
}
/**
* Calculate the hamming distance of 2 hash values. The distance of two hashes
* is the difference of the individual bits found in the hash.
*
* The hamming distance falls within [0-bitResolution]. Lower values indicate
* closer similarity while identical images must return a score of 0. On the
* flip side score of 0 does not mean images have to be identical!
*
*
* A longer hash (higher bitResolution) will increase the average hamming
* distance returned. While this method allows for the most accurate fine tuning
* of the distance {@link #normalizedHammingDistance(Hash)} is hash length
* independent.
*
*
* Please be aware that only hashes produced by the same algorithm with the same
* settings will return meaningful result and should be compared. This method
* will NOT check if the hashes are compatible.
*
* @param h The hash to calculate the distance to
* @return similarity value ranging between [0 - hash length]
* @see #hammingDistance(Hash)
*/
public int hammingDistanceFast(Hash h) {
return this.hashValue.xor(h.getHashValue()).bitCount();
}
/**
* Calculate the hamming distance of 2 hash values. The distance of two hashes
* is the difference of the individual bits found in the hash.
*
* The hamming distance falls within [0-bitResolution]. Lower values indicate
* closer similarity while identical images must return a score of 0. On the
* flip side score of 0 does not mean images have to be identical!
*
*
* A longer hash (higher bitResolution) will increase the average hamming
* distance returned. While this method allows for the most accurate fine tuning
* of the distance {@link #normalizedHammingDistance(Hash)} is hash length
* independent.
*
*
* Please be aware that only hashes produced by the same algorithm with the same
* settings will return meaningful result and should be compared. This method
* will NOT check if the hashes are compatible.
*
* @param bInt A big integer representing a hash
* @return similarity value ranging between [0 - hash length]
* @see #hammingDistance(Hash)
*/
public int hammingDistanceFast(BigInteger bInt) {
return this.hashValue.xor(bInt).bitCount();
}
/**
* Calculate the hamming distance of 2 hash values. The distance of two hashes
* is the difference of the individual bits found in the hash.
*
* The normalized hamming distance falls within [0-1]. Lower values indicate
* closer similarity while identical images must return a score of 0. On the
* flip side score of 0 does not mean images have to be identical!
*
*
* See {@link #hammingDistance(Hash)} for a non normalized version
*
* Please be aware that only hashes produced by the same algorithm with the same
* settings will return meaningful result and should be compared. This method
* will check if the hashes are compatible if no additional check is required
* see {@link #normalizedHammingDistanceFast(Hash)}
*
* @param h The hash to calculate the distance to
* @return similarity value ranging between [0 - 1]
*/
public double normalizedHammingDistance(Hash h) {
if (this.algorithmId != h.algorithmId) {
throw new IllegalArgumentException("Can't compare two hash values created by different algorithms");
}
// We expect both integers to contain the same bit key lengths!
// -1 due to the preceding padding bit
return normalizedHammingDistanceFast(h);
}
/**
* Calculate the hamming distance of 2 hash values. The distance of two hashes
* is the difference of the individual bits found in the hash.
*
* The normalized hamming distance falls within [0-1]. Lower values indicate
* closer similarity while identical images must return a score of 0. On the
* flip side score of 0 does not mean images have to be identical!
*
*
* See {@link #hammingDistance(Hash)} for a non normalized version
*
* Please be aware that only hashes produced by the same algorithm with the same
* settings will return meaningful result and should be compared. This method
* will NOT check if the hashes are compatible.
*
* @param h The hash to calculate the distance to
* @return similarity value ranging between [0 - 1]
* @see #hammingDistance(Hash)
*/
public double normalizedHammingDistanceFast(Hash h) {
// We expect both integers to contain the same bit key lengths!
return hammingDistanceFast(h) / (double) hashLength;
}
/**
* Check if the bit at the given position is set.
*
* @param position of the bit. An index of 0 points to the lowest (rightmost
* bit)
* @return true if the bit is set (1) or false if it's not set (0)
* @throws IllegalArgumentException if the supplied index is outside the hash
* bound
* @since 2.0.0
*/
public boolean getBit(int position) {
Require.inRange(position, 0, this.getBitResolution() - 1, "Bit out of bounds");
return getBitUnsafe(position);
}
/**
* Check if the bit at the given position of the hash is set. This method does
* not check the bounds of the supplied argument.
*
* @param position of the bit. An index of 0 points to the lowest (rightmost
* bit)
* @return true if the bit is set (1). False if it's not set (0) ot the index is
* bigger than the hash length.
* @throws ArithmeticException if position is negative
* @since 2.0.0
*/
public boolean getBitUnsafe(int position) {
return hashValue.testBit(position);
}
/**
* Return the algorithm identifier specifying by which algorithm and setting
* this hash was created. The id shall remain constant.
*
* @return The algorithm id
*/
public int getAlgorithmId() {
return algorithmId;
}
/**
* @return the base BigInteger holding the hash value
*/
public BigInteger getHashValue() {
return hashValue;
}
/**
* Creates a visual representation of the hash mapping the hash values to the
* section of the rescaled image used to generate the hash assuming default bit
* encoding.
*
*
* Some hash algorithms may chose to construct their hashes in a non default
* manner (e.g. {@link dev.brachtendorf.jimagehash.hashAlgorithms.DifferenceHash}). In
* this case {@link #toImage(int, HashingAlgorithm)} may help to resolve the
* issue;
*
* @param blockSize scaling factor of each pixel in the has. each bit of the
* hash will be represented to blockSize*blockSize pixels
*
* @return A black and white image representing the individual bits of the hash
*/
public BufferedImage toImage(int blockSize) {
Color[] colorArr = new Color[] { Color.WHITE, Color.BLACK };
int[] colorIndex = new int[hashLength];
for (int i = 0; i < hashLength; i++) {
colorIndex[i] = hashValue.testBit(i) ? 1 : 0;
}
return toImage(colorIndex, colorArr, blockSize);
}
/**
* Creates a visual representation of the hash mapping the hash values to the
* section of the rescaled image used to generate the hash.
*
*
* Some hash algorithms may chose to construct their hashes in a non default
* manner (e.g. {@link dev.brachtendorf.jimagehash.hashAlgorithms.DifferenceHash}).
*
* @param blockSize scaling factor of each pixel in the has. each bit of the
* hash will be represented to blockSize*blockSize pixels
* @param hasher HashAlgorithm which created this hash.
* @return A black and white image representing the individual bits of the hash
* @since 3.0.0
*/
public BufferedImage toImage(int blockSize, HashingAlgorithm hasher) {
return hasher.createAlgorithmSpecificHash(this).toImage(blockSize);
}
/**
* Creates a visual representation of the hash mapping the hash values to the
* section of the rescaled image used to generate the hash.
*
* @param bitColorIndex array mapping each bit of the hash to a color of the
* color array
* @param colors array to colorize the pixels
* @param blockSize scaling factor of each pixel in the has. each bit of the
* hash will be represented to blockSize*blockSize pixels
* @return A colorized image representing the individual bits of the hash
*/
public BufferedImage toImage(int[] bitColorIndex, Color[] colors, int blockSize) {
int width = (int) Math.sqrt(hashLength);
int height = width;
BufferedImage bi = new BufferedImage(blockSize * width, blockSize * height, BufferedImage.TYPE_3BYTE_BGR);
FastPixel fp = FastPixel.create(bi);
int i = 0;
for (int w = 0; w < width * blockSize; w = w + blockSize) {
for (int h = 0; h < height * blockSize; h = h + blockSize) {
Color c = colors[bitColorIndex[i++]];
int red = (int) (c.getRed() * 255);
int green = (int) (c.getGreen() * 255);
int blue = (int) (c.getBlue() * 255);
for (int m = 0; m < blockSize; m++) {
for (int n = 0; n < blockSize; n++) {
int x = w + m;
int y = h + n;
// bi.setRGB(y, x, bit ? black : white);
// fp.setAverageGrayscale(x, y, gray);
fp.setRed(x, y, red);
fp.setGreen(x, y, green);
fp.setBlue(x, y, blue);
}
}
}
}
return bi;
}
/**
* @return the hash resolution in bits
*/
public int getBitResolution() {
return hashLength;
}
/**
* Saves this hash to a file for persistent storage. The hash can later be
* recovered by calling {@link #fromFile(File)};
*
* @param saveLocation the file to save the hash to
* @throws IOException If an error occurs during file access
* @since 3.0.0
*/
public void toFile(File saveLocation) throws IOException {
try (ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(saveLocation))) {
oos.writeObject(this);
}
}
/**
* Reads a hash from a serialization file and returns it. Only hashes can be read from file
* that got saved by the same class instance using {@link #toFile(File)};
*
* @param source The file this hash can be read from.
* @return a hash object
* @throws IOException If an error occurs during file read
* @throws ClassNotFoundException if the class used to serialize this hash can
* not be found
* @since 3.0.0
*/
public static Hash fromFile(File source) throws IOException, ClassNotFoundException {
try (ObjectInputStream ois = new ObjectInputStream(new FileInputStream(source))) {
Object o = ois.readObject();
//Ugly. This is not oop.
if(o.getClass() == FuzzyHash.class) {
return FuzzyHash.fromFile(source);
}
return (Hash) o;
}
}
/**
* Return the byte representation of the big integer with the leading zero byte
* stripped if present. The BigInteger class prepends a sign byte if necessary
* to indicate the signum of the number. Since our hashes are always positive we
* can get rid of it and reduce the space requirement in our db by 1 byte.
*
*
* To reconstruct the big integer value we can simply prepend a [0x00] byte even
* if it wasn't present in the first place. The constructor
* {@link java.math.BigInteger#BigInteger(byte[])} will take care of it.
*
* @return the byte representation of the big integer without an artificial sign
* byte.
*/
public byte[] toByteArray() {
byte[] bArray = hashValue.toByteArray();
if (bArray[0] != 0) {
return bArray;
} else {
byte[] bArrayWithoutSign = new byte[bArray.length - 1];
System.arraycopy(bArray, 1, bArrayWithoutSign, 0, bArray.length - 1);
return bArrayWithoutSign;
}
}
public String toString() {
return "Hash: " + StringUtil.fillStringBeginning("0", hashLength, hashValue.toString(2)) + " [algoId: "
+ algorithmId + "]";
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + algorithmId;
result = prime * result + ((hashValue == null) ? 0 : hashValue.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
Hash other = (Hash) obj;
if (algorithmId != other.getAlgorithmId())
return false;
if (hashValue == null) {
if (other.hashValue != null)
return false;
} else if (!hashValue.equals(other.getHashValue()))
return false;
return true;
}
}