All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.s3curitybug.similarityuniformfuzzyhash.VisualRepresentation Maven / Gradle / Ivy

Go to download

Similarity Uniform Fuzzy Hash is a tool that allows to accurately and efficiently compute the similarity between two files (or sets of bytes) as a 0 to 1 score. For that purpose, it first computes for each file a Context Triggered Piecewise Hash (CTPH), also known as fuzzy hash, and then compares the hashes.

There is a newer version: 1.8.4
Show newest version
package com.github.s3curitybug.similarityuniformfuzzyhash;

import static com.github.s3curitybug.similarityuniformfuzzyhash.ToStringUtils.ANSI_CODE_COLOR_END;
import static com.github.s3curitybug.similarityuniformfuzzyhash.ToStringUtils.DECIMALS_FORMAT_SYMBOLS;
import static com.github.s3curitybug.similarityuniformfuzzyhash.ToStringUtils.UNICODE_CTRL;
import static com.github.s3curitybug.similarityuniformfuzzyhash.ToStringUtils.spaces;

import org.apache.commons.io.IOUtils;
import org.fusesource.jansi.AnsiConsole;

import com.github.s3curitybug.similarityuniformfuzzyhash.ToStringUtils.AnsiCodeColors;

import java.io.PrintStream;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.DecimalFormat;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

/**
 * This class provides utility static methods to represent and compare Uniform Fuzzy Hashes in
 * visual way.
 * 
 * @author [email protected]
 *
 */
public final class VisualRepresentation {

    /**
     * Charset in which bases are encoded.
     */
    public static final Charset BASES_ENCODING = StandardCharsets.UTF_8;

    /**
     * Printable ASCII base.
     */
    public static final char[] PRINTABLE_ASCII_BASE = readBaseFromResources("printableAscii.base");

    /**
     * Default base.
     */
    public static final char[] DEFAULT_BASE = PRINTABLE_ASCII_BASE;

    /**
     * Default factor divisor.
     */
    public static final int DEFAULT_FACTOR_DIVISOR = 3;

    /**
     * Default line wrap.
     */
    public static final int DEFAULT_LINE_WRAP = 60;

    /**
     * Bases path inside resources.
     */
    private static final String RESOURCES_BASES_PATH = "/VisualPrint/";

    /**
     * Format in which accumulated wrap length per line will be formatted during a string wrap.
     */
    private static final String ACCUMULATED_WRAP_FORMAT = "%5s  %s";

    /**
     * Format in which percent accumulated wrap length per line will be formatted during a string
     * wrap.
     */
    private static final DecimalFormat ACCUMULATED_WRAP_DECIMAL_FORMAT =
            new DecimalFormat("0.0", DECIMALS_FORMAT_SYMBOLS);

    /**
     * ANSI format which will be used to visually represent the blocks which are only present in the
     * first hash in a comparison.
     */
    private static final String BLOCK_IN_FIRST_HASH_ANSI_CODE_FORMAT =
            AnsiCodeColors.GREEN_FONT.getCode();

    /**
     * ANSI format which will be used to visually represent the blocks which are only present in the
     * second hash in a comparison.
     */
    private static final String BLOCK_IN_SECOND_HASH_ANSI_CODE_FORMAT =
            AnsiCodeColors.BLUE_FONT.getCode();

    /**
     * ANSI format which will be used to visually represent the blocks which are present in both
     * hashes in a comparison.
     */
    private static final String BLOCK_IN_BOTH_HASHES_ANSI_CODE_FORMAT =
            AnsiCodeColors.RED_FONT.getCode();

    /**
     * Private constructor.
     */
    private VisualRepresentation() {

    }

    /**
     * Represents a Uniform Fuzzy Hash in a visual way, using the default base and factor divisor.
     * 
     * @param hash The Uniform Fuzzy Hash.
     * @return A string representing the introduced Uniform Fuzzy Hash in a visual way.
     */
    public static String represent(
            UniformFuzzyHash hash) {

        return represent(hash, DEFAULT_BASE, DEFAULT_FACTOR_DIVISOR);

    }

    /**
     * Represents a Uniform Fuzzy Hash in a visual way.
     * 
     * @param hash The Uniform Fuzzy Hash.
     * @param base The characters base which will be used to represent the blocks.
     * @param factorDivisor Amount of characters per factor size for each block.
     * @return A string representing the introduced Uniform Fuzzy Hash in a visual way.
     */
    public static String represent(
            UniformFuzzyHash hash,
            char[] base,
            int factorDivisor) {

        if (hash == null) {
            throw new NullPointerException("Hash is null.");
        }

        if (base == null) {
            throw new NullPointerException("Base is null.");
        }

        if (base.length == 0) {
            throw new IllegalArgumentException("Base is empty.");
        }

        if (factorDivisor < 1) {
            throw new IllegalArgumentException("Factor divisor is lower than 1.");
        }

        int factor = hash.getFactor();
        List blocks = hash.accessBlocks();

        StringBuilder strB = new StringBuilder(blocks.size() * factorDivisor * 2);

        for (UniformFuzzyHashBlock block : blocks) {

            char character = base[block.getBlockHash() % base.length];
            int blockSize = block.getBlockSize();

            long characterRepetitions = 0;
            do {
                strB.append(character);
            } while (blockSize > (++characterRepetitions * factor + factor / 2) / factorDivisor);

        }

        return strB.toString();

    }

    /**
     * Represents a Uniform Fuzzy Hash in a visual way, coloring the blocks which are present in
     * another Uniform Fuzzy Hash with a different color to the ones which are not, and using the
     * default base and factor divisor.
     * 
     * @param hash1 The Uniform Fuzzy Hash.
     * @param hash2 The Uniform Fuzzy Hash to which the first one will be compared.
     * @return A string representing the introduced Uniform Fuzzy Hash in a visual way, with ANSI
     *         code color characters.
     */
    public static String representCompared(
            UniformFuzzyHash hash1,
            UniformFuzzyHash hash2) {

        return representCompared(hash1, hash2, DEFAULT_BASE, DEFAULT_FACTOR_DIVISOR);

    }

    /**
     * Represents a Uniform Fuzzy Hash in a visual way, coloring the blocks which are present in
     * another Uniform Fuzzy Hash with a different color to the ones which are not.
     * 
     * @param hash1 The Uniform Fuzzy Hash.
     * @param hash2 The Uniform Fuzzy Hash to which the first one will be compared.
     * @param base The characters base which will be used to represent the blocks.
     * @param factorDivisor Amount of characters per factor size for each block.
     * @return A string representing the introduced Uniform Fuzzy Hash in a visual way, with ANSI
     *         code color characters.
     */
    public static String representCompared(
            UniformFuzzyHash hash1,
            UniformFuzzyHash hash2,
            char[] base,
            int factorDivisor) {

        if (hash1 == null) {
            throw new NullPointerException("Hash 1 is null.");
        }

        if (hash2 == null) {
            throw new NullPointerException("Hash 2 is null.");
        }

        if (base == null) {
            throw new NullPointerException("Base is null.");
        }

        if (base.length == 0) {
            throw new IllegalArgumentException("Base is empty.");
        }

        if (factorDivisor < 1) {
            throw new IllegalArgumentException("Factor divisor is lower than 1.");
        }

        int factor1 = hash1.getFactor();
        int factor2 = hash2.getFactor();

        if (factor1 != factor2) {
            throw new IllegalArgumentException("The Uniform Fuzzy Hashes factors are different.");
        }

        List blocks1 = hash1.accessBlocks();
        Set blocks2 = hash2.accessBlocksSet();

        StringBuilder strB = new StringBuilder(blocks1.size() * factorDivisor * 2);

        String ansiCodeFormat = null;
        for (UniformFuzzyHashBlock block : blocks1) {

            char character = base[block.getBlockHash() % base.length];
            int blockSize = block.getBlockSize();

            if (blocks2.contains(block)) {
                if (!BLOCK_IN_BOTH_HASHES_ANSI_CODE_FORMAT.equals(ansiCodeFormat)) {
                    ansiCodeFormat = BLOCK_IN_BOTH_HASHES_ANSI_CODE_FORMAT;
                    strB.append(AnsiCodeColors.RESET.getCode());
                    strB.append(ansiCodeFormat);
                }
            } else {
                if (!BLOCK_IN_FIRST_HASH_ANSI_CODE_FORMAT.equals(ansiCodeFormat)) {
                    ansiCodeFormat = BLOCK_IN_FIRST_HASH_ANSI_CODE_FORMAT;
                    strB.append(AnsiCodeColors.RESET.getCode());
                    strB.append(ansiCodeFormat);
                }
            }

            long characterRepetitions = 0;
            do {
                strB.append(character);
            } while (blockSize > (++characterRepetitions * factor1 + factor1 / 2) / factorDivisor);

        }

        strB.append(AnsiCodeColors.RESET.getCode());

        return strB.toString();

    }

    /**
     * Prints a Uniform Fuzzy Hash in a visual way, using the default base, factor divisor and line
     * wrap.
     * 
     * @param hash The Uniform Fuzzy Hash.
     */
    public static void print(
            UniformFuzzyHash hash) {

        print(hash, DEFAULT_BASE, DEFAULT_FACTOR_DIVISOR, DEFAULT_LINE_WRAP, true);

    }

    /**
     * Prints a Uniform Fuzzy Hash in a visual way.
     * 
     * @param hash The Uniform Fuzzy Hash.
     * @param base The characters base which will be used to represent the blocks.
     * @param factorDivisor Amount of characters per factor size for each block.
     * @param lineWrap Amount of characters per line. If this argument is lower than 1, no line wrap
     *        is performed and the full representation is printed in one line.
     * @param concatenatePercent In case line wrap is performed, true to concatenate to each line
     *        its relative percent to the total length.
     */
    public static void print(
            UniformFuzzyHash hash,
            char[] base,
            int factorDivisor,
            int lineWrap,
            boolean concatenatePercent) {

        // Representation.
        String representation = represent(hash, base, factorDivisor);
        List wrappedRepresentation =
                wrapString(representation, lineWrap, concatenatePercent);

        // Print.
        final PrintStream printStream = System.out;

        if (printStream == AnsiConsole.out) {
            AnsiConsole.systemInstall();
        }

        printStream.println();

        for (String line : wrappedRepresentation) {
            printStream.println(line);
        }

        printStream.println();

        if (printStream == AnsiConsole.out) {
            AnsiConsole.systemUninstall();
        }

    }

    /**
     * Prints a Uniform Fuzzy Hash in a visual way, coloring the blocks which are present in another
     * Uniform Fuzzy Hash with a different color to the ones which are not, and using the default
     * base, factor divisor, and line wrap.
     * 
     * @param hash1 The Uniform Fuzzy Hash.
     * @param hash2 The Uniform Fuzzy Hash to which the first one will be compared.
     */
    public static void printCompared(
            UniformFuzzyHash hash1,
            UniformFuzzyHash hash2) {

        printCompared(hash1, hash2, DEFAULT_BASE, DEFAULT_FACTOR_DIVISOR, DEFAULT_LINE_WRAP, true);

    }

    /**
     * Prints a Uniform Fuzzy Hash in a visual way, coloring the blocks which are present in another
     * Uniform Fuzzy Hash with a different color to the ones which are not.
     * 
     * @param hash1 The Uniform Fuzzy Hash.
     * @param hash2 The Uniform Fuzzy Hash to which the first one will be compared.
     * @param base The characters base which will be used to represent the blocks.
     * @param factorDivisor Amount of characters per factor size for each block.
     * @param lineWrap Amount of characters per line. If this argument is lower than 1, no line wrap
     *        is performed and the full representation is printed in one line.
     * @param concatenatePercent In case line wrap is performed, true to concatenate to each line
     *        its relative percent to the total length.
     */
    public static void printCompared(
            UniformFuzzyHash hash1,
            UniformFuzzyHash hash2,
            char[] base,
            int factorDivisor,
            int lineWrap,
            boolean concatenatePercent) {

        // Representations.
        String representation1 = representCompared(hash1, hash2, base, factorDivisor);
        String representation2 = representCompared(hash2, hash1, base, factorDivisor).replace(
                BLOCK_IN_FIRST_HASH_ANSI_CODE_FORMAT, BLOCK_IN_SECOND_HASH_ANSI_CODE_FORMAT);

        List wrappedRepresentation1 =
                wrapStringRespectingAnsiCodeFormat(representation1, lineWrap, concatenatePercent);
        List wrappedRepresentation2 =
                wrapStringRespectingAnsiCodeFormat(representation2, lineWrap, concatenatePercent);

        String wrapLengthSpaces = concatenatePercent
                ? spaces(lineWrap + formatAccumulatedWrapLength("", 0).length())
                : spaces(lineWrap);
        while (wrappedRepresentation1.size() < wrappedRepresentation2.size()) {
            wrappedRepresentation1.add(wrapLengthSpaces);
        }
        while (wrappedRepresentation2.size() < wrappedRepresentation1.size()) {
            wrappedRepresentation2.add(wrapLengthSpaces);
        }

        // Print.
        final PrintStream printStream = AnsiConsole.out;

        if (printStream == AnsiConsole.out) {
            AnsiConsole.systemInstall();
        }

        printStream.println();

        Iterator iterator1 = wrappedRepresentation1.iterator();
        Iterator iterator2 = wrappedRepresentation2.iterator();
        final int separation = 5;
        while (iterator1.hasNext()) {
            printStream.println(iterator1.next() + spaces(separation) + iterator2.next());
        }

        printStream.println();

        if (printStream == AnsiConsole.out) {
            AnsiConsole.systemUninstall();
        }

    }

    /**
     * Reads a base from RESOURCES_BASES_PATH.
     * Must be encoded as BASES_ENCODING.
     * 
     * @param baseName The base name.
     * @return The base as a char[].
     */
    private static char[] readBaseFromResources(
            String baseName) {

        if (baseName == null) {
            throw new NullPointerException("Base name is null.");
        }

        URL resource = VisualRepresentation.class.getResource(RESOURCES_BASES_PATH + baseName);
        if (resource == null) {
            throw new IllegalArgumentException("Base name does not match any resource.");
        }

        try {
            return IOUtils.toString(resource, BASES_ENCODING).toCharArray();
        } catch (Exception exception) {
            throw new RuntimeException(
                    String.format("Error reading base %s from resources", baseName),
                    exception);
        }

    }

    /**
     * Splits a string in fixed length substrings.
     * The last substring is filled with spaces until reaching the fixed length.
     * 
     * @param string The string to split.
     * @param wrapLength The substrings length. If this argument is lower than 1, no split is
     *        performed.
     * @param concatenatePercent True to concatenate to each line its relative percent to the total
     *        string length.
     * @return A List of strings containing the substrings, or a List of strings containing the
     *         introduced string if wrapLength is lower than 1.
     */
    private static List wrapString(
            String string,
            int wrapLength,
            boolean concatenatePercent) {

        if (string == null) {
            throw new NullPointerException("String is null.");
        }

        List wrappedString = new LinkedList<>();
        double relativeWrapLength = (double) wrapLength / string.length();
        double accumulatedWrapLength = 0;

        if (wrapLength < 1) {

            wrappedString.add(string);

        } else {

            while (string.length() > wrapLength) {

                if (concatenatePercent) {
                    wrappedString.add(formatAccumulatedWrapLength(
                            string.substring(0, wrapLength),
                            accumulatedWrapLength));
                } else {
                    wrappedString.add(string.substring(0, wrapLength));
                }

                string = string.substring(wrapLength);
                accumulatedWrapLength += relativeWrapLength;

            }

            if (concatenatePercent) {
                wrappedString.add(formatAccumulatedWrapLength(
                        string + spaces(wrapLength - string.length()),
                        accumulatedWrapLength));
            } else {
                wrappedString.add(string + spaces(wrapLength - string.length()));
            }

        }

        return wrappedString;

    }

    /**
     * Splits a string in fixed length substrings.
     * ANSI code format is respected.
     * The last substring is filled with spaces until reaching the fixed length.
     * 
     * @param string The string to split.
     * @param wrapLength The substrings length. If this argument is lower than 1, no split is
     *        performed.
     * @param concatenatePercent True to concatenate to each line its relative percent to the total
     *        string length.
     * @return A List of strings containing the substrings, or a List of strings containing the
     *         introduced string if wrapLength is lower than 1.
     */
    private static List wrapStringRespectingAnsiCodeFormat(
            String string,
            int wrapLength,
            boolean concatenatePercent) {

        if (string == null) {
            throw new NullPointerException("String is null.");
        }

        List wrappedString = new LinkedList<>();
        double relativeWrapLength = (double) wrapLength / AnsiCodeColors.remove(string).length();
        double accumulatedWrapLength = 0;

        if (wrapLength < 1) {

            wrappedString.add(string);

        } else {

            StringBuilder substring = new StringBuilder(wrapLength * 2);
            StringBuilder ansiCodeFormat = new StringBuilder();
            int substringChars = 0;

            for (int i = 0; i < string.length(); i++) {

                char ch = string.charAt(i);

                if (ch == UNICODE_CTRL) {

                    ansiCodeFormat = new StringBuilder();
                    while (string.charAt(i) != ANSI_CODE_COLOR_END) {
                        ansiCodeFormat.append(string.charAt(i++));
                    }
                    ansiCodeFormat.append(string.charAt(i));

                    substring.append(ansiCodeFormat);

                } else {

                    substring.append(ch);
                    substringChars++;

                    if (substringChars == wrapLength || i == string.length() - 1) {

                        if (ansiCodeFormat.length() > 0 && !ansiCodeFormat.toString().equals(
                                AnsiCodeColors.RESET.getCode())) {

                            substring.append(AnsiCodeColors.RESET.getCode());

                            if (concatenatePercent) {
                                wrappedString.add(formatAccumulatedWrapLength(
                                        substring.toString(),
                                        accumulatedWrapLength));
                            } else {
                                wrappedString.add(substring.toString());
                            }

                            substring = new StringBuilder(wrapLength * 2);
                            substring.append(ansiCodeFormat);

                        } else {

                            if (concatenatePercent) {
                                wrappedString.add(formatAccumulatedWrapLength(
                                        substring.toString(),
                                        accumulatedWrapLength));
                            } else {
                                wrappedString.add(substring.toString());
                            }

                            substring = new StringBuilder(wrapLength * 2);

                        }

                        accumulatedWrapLength += relativeWrapLength;
                        substringChars = 0;

                    }

                }

            }

            if (substring.length() > 0) {

                if (ansiCodeFormat.length() > 0 && !ansiCodeFormat.toString().equals(
                        AnsiCodeColors.RESET.getCode())) {
                    substring.append(AnsiCodeColors.RESET.getCode());
                }

                substring.append(spaces(wrapLength - substringChars));

                if (concatenatePercent) {
                    wrappedString.add(formatAccumulatedWrapLength(
                            substring.toString(),
                            accumulatedWrapLength));
                } else {
                    wrappedString.add(substring.toString());
                }

            }

        }

        return wrappedString;

    }

    /**
     * Formats a wrapped line concatenating its percent accumulated wrap length to it.
     * 
     * @param string The wrapped line.
     * @param accumulatedWrapLength Accumulated wrap length in previous lines.
     * @return The wrapped line formatted with its percent accumulated wrap length.
     */
    private static String formatAccumulatedWrapLength(
            String string,
            double accumulatedWrapLength) {

        final int percent = 100;

        return String.format(
                ACCUMULATED_WRAP_FORMAT,
                ACCUMULATED_WRAP_DECIMAL_FORMAT.format(accumulatedWrapLength * percent),
                string);

    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy