All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.s3curitybug.similarityuniformfuzzyhash.ToStringUtils Maven / Gradle / Ivy

Go to download

Similarity Uniform Fuzzy Hash is a tool that allows to accurately and efficiently compute the similarity between two files (or sets of bytes) as a 0 to 1 score. For that purpose, it first computes for each file a Context Triggered Piecewise Hash (CTPH), also known as fuzzy hash, and then compares the hashes.

There is a newer version: 1.8.4
Show newest version
package com.github.s3curitybug.similarityuniformfuzzyhash;

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.regex.Pattern;

/**
 * This class provides utility methods and constants to build string representations of Uniform
 * Fuzzy Hashes.
 * 
 * @author [email protected]
 *
 */
public final class ToStringUtils {

    /**
     * Charset for reading and writing files.
     */
    public static final Charset FILES_ENCODING = StandardCharsets.UTF_8;

    /**
     * Mark at the beginning of a Uniform Fuzzy Hash file line which indicates that the line should
     * be ignored.
     */
    public static final String IGNORE_MARK = "#";

    /**
     * Separator between identifier and hash for an identified Uniform Fuzzy Hash string
     * representation.
     */
    public static final String IDENTIFIER_SEPARATOR = " > ";

    /**
     * Separator between factor and blocks for a Uniform Fuzzy Hash string representation.
     */
    public static final String FACTOR_SEPARATOR = ":";

    /**
     * Separator between blocks for a Uniform Fuzzy Hash string representation.
     */
    public static final String BLOCKS_SEPARATOR = "-";

    /**
     * Separator between block parts for a Uniform Fuzzy Hash Block string representation.
     */
    public static final String BLOCK_INNER_SEPARATOR = "/";

    /**
     * Tabulation.
     */
    public static final String TAB = "   ";

    /**
     * New line.
     */
    public static final String NEW_LINE = System.getProperty("line.separator");

    /**
     * String which will be used when an identifier is null.
     */
    public static final String NULL_IDENTIFIER = "null";

    /**
     * String which will be used when a value is null.
     */
    public static final String NULL_VALUE = "-";

    /**
     * Separator for comma separated values.
     */
    public static final String CSV_SEPARATOR = ", ";

    /**
     * Trimmed separator for comma separated values.
     */
    public static final String CSV_TRIMMED_SEPARATOR = CSV_SEPARATOR.trim();

    /**
     * Quotation mark for comma separated values.
     */
    public static final String CSV_QUOTATION_MARK = "\"";

    /**
     * CSV split pattern.
     */
    public static final Pattern CSV_SPLIT_PATTERN =
            Pattern.compile(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");

    /**
     * Maximum number of characters of an integer string representation.
     */
    protected static final int INT_MAX_CHARS =
            Integer.toString(Integer.MAX_VALUE).length();

    /**
     * Maximum number of characters of a factor string representation with separator.
     */
    protected static final int FACTOR_WITH_SEP_MAX_CHARS =
            INT_MAX_CHARS + FACTOR_SEPARATOR.length();

    /**
     * Base in which Uniform Fuzzy Hash Blocks integers are represented.
     */
    protected static final int BLOCK_BASE = 36;

    /**
     * Maximum number of characters of an integer block base representation.
     */
    protected static final int BLOCK_INT_MAX_CHARS =
            Integer.toString(Integer.MAX_VALUE, BLOCK_BASE).length();

    /**
     * Maximum number of characters of a block string representation.
     */
    protected static final int BLOCK_MAX_CHARS =
            2 * BLOCK_INT_MAX_CHARS + BLOCK_INNER_SEPARATOR.length();

    /**
     * Maximum number of characters of a block string representation with separator.
     */
    protected static final int BLOCK_WITH_SEP_MAX_CHARS =
            BLOCK_MAX_CHARS + BLOCKS_SEPARATOR.length();

    /**
     * Symbols for decimal numbers format.
     */
    public static final DecimalFormatSymbols DECIMALS_FORMAT_SYMBOLS =
            DecimalFormatSymbols.getInstance(Locale.ROOT);

    /**
     * Format in which decimal numbers are printed.
     */
    public static final DecimalFormat DECIMALS_FORMAT =
            new DecimalFormat("0.0##", DECIMALS_FORMAT_SYMBOLS);

    /**
     * Maximum number of characters of a decimal number string representation.
     */
    protected static final int DECIMAL_MAX_CHARS = 1
            + INT_MAX_CHARS
            + DECIMALS_FORMAT.getMaximumFractionDigits();

    /**
     * Maximum number of characters of a 0-1 decimal number string representation.
     */
    protected static final int ZERO_TO_ONE_DECIMAL_MAX_CHARS = 1
            + DECIMALS_FORMAT.getMinimumIntegerDigits()
            + DECIMALS_FORMAT.getMaximumFractionDigits();

    /**
     * Unicode control character.
     */
    protected static final char UNICODE_CTRL = '\u001b';

    /**
     * ANSI code start character.
     */
    protected static final char ANSI_CODE_START = '[';

    /**
     * ANSI code color end character.
     */
    protected static final char ANSI_CODE_COLOR_END = 'm';

    /**
     * ANSI code color pattern.
     */
    protected static final Pattern ANSI_CODE_COLOR_PATTERN =
            Pattern.compile(UNICODE_CTRL + ".+?" + ANSI_CODE_COLOR_END);

    /**
     * Enum of ANSI code colors.
     */
    protected enum AnsiCodeColors {

        /**
         * Red font color.
         */
        RED_FONT(31),

        /**
         * Green font color.
         */
        GREEN_FONT(32),

        /**
         * Blue font color.
         */
        BLUE_FONT(34),

        /**
         * Reset color.
         */
        RESET(0);

        /**
         * Color number.
         */
        private int number;

        /**
         * Color code.
         */
        private String code;

        /**
         * Constructor.
         * 
         * @param number Color number.
         */
        AnsiCodeColors(
                int number) {

            this.number = number;
            this.code = Character.toString(UNICODE_CTRL)
                    + Character.toString(ANSI_CODE_START)
                    + Integer.toString(number)
                    + Character.toString(ANSI_CODE_COLOR_END);

        }

        /**
         * @return The color number.
         */
        protected int getNumber() {
            return number;
        }

        /**
         * @return The color code.
         */
        protected String getCode() {
            return code;
        }

        /**
         * Removes all ANSI code colors from a string.
         * 
         * @param string A string.
         * @return The string without any ANSI code colors.
         */
        protected static String remove(
                String string) {

            return ANSI_CODE_COLOR_PATTERN.matcher(string).replaceAll("");

        }

    }

    /**
     * ANSI code color which will be used to mark decimals which are above a threshold.
     */
    private static final AnsiCodeColors ANSI_CODE_COLOR_DECIMAL_ABOVE = AnsiCodeColors.RED_FONT;

    /**
     * ANSI code color which will be used to mark decimals which are below a threshold.
     */
    private static final AnsiCodeColors ANSI_CODE_COLOR_DECIMAL_BELOW = AnsiCodeColors.BLUE_FONT;

    /**
     * Private constructor.
     */
    private ToStringUtils() {

    }

    /**
     * @param strings Collection of strings.
     * @return Maximum length between the strings in the collection.
     */
    public static int maxLength(
            Collection strings) {

        int maxLength = 0;

        for (String string : strings) {
            if (string != null && string.length() > maxLength) {
                maxLength = string.length();
            }
        }

        return maxLength;

    }

    /**
     * Converts an identifier to string and prepares it to be printed.
     * 
     * @param  Identifier type.
     * @param identifier The identifier.
     * @param truncateLength Maximum length of the returned string.
     *        If this parameter is lower than 1, no truncation is performed.
     * @return The identifier prepared to be printed.
     */
    public static  String prepareIdentifier(
            T identifier,
            int truncateLength) {

        String preparedIdentifier = null;

        if (identifier == null) {
            preparedIdentifier = NULL_IDENTIFIER;
        } else {
            preparedIdentifier = identifier.toString();
        }

        preparedIdentifier = preparedIdentifier.trim();

        if (truncateLength > 0 && preparedIdentifier.length() > truncateLength) {
            preparedIdentifier = preparedIdentifier.substring(0, truncateLength);
        }

        return preparedIdentifier;

    }

    /**
     * Prepares a collection of identifiers to be printed.
     * 
     * @param  Identifiers type.
     * @param identifiers The collection of identifiers.
     * @param truncateLength Maximum length of the returned string.
     *        If this parameter is lower than 1, no truncation is performed.
     * @return The list of identifiers prepared to be printed.
     */
    public static  List prepareIdentifiers(
            Collection identifiers,
            int truncateLength) {

        List preparedIdentifiers = new ArrayList<>(identifiers.size());

        for (T identifier : identifiers) {
            String preparedIdentifier = prepareIdentifier(identifier, truncateLength);
            preparedIdentifiers.add(preparedIdentifier);
        }

        return preparedIdentifiers;

    }

    /**
     * Formats a decimal number.
     * 
     * @param decimal A decimal number.
     * @return The formatted decimal number.
     */
    public static String formatDecimal(
            Double decimal) {

        if (decimal == null) {
            return NULL_VALUE;
        }

        return DECIMALS_FORMAT.format(decimal);

    }

    /**
     * Formats a decimal number, marking it with a color if it is above or equal to a threshold, and
     * with another color if it is below another threshold.
     * 
     * @param decimal A decimal number.
     * @param markAbove Mark the decimal with a color if it is above or equal to this threshold.
     *        Introduce a negative number to not mark the decimal.
     * @param markBelow Mark the decimal with a color if it is below this threshold.
     *        Introduce a negative number to not mark the decimal.
     * @return The formatted and possibly marked decimal number.
     */
    public static String formatDecimal(
            Double decimal,
            double markAbove,
            double markBelow) {

        if (decimal == null) {
            return NULL_VALUE;
        }

        String decimalStr = formatDecimal(decimal);

        if (markAbove >= 0 && decimal >= markAbove) {
            decimalStr = ANSI_CODE_COLOR_DECIMAL_ABOVE.getCode()
                    + decimalStr
                    + AnsiCodeColors.RESET.getCode();
        } else if (markBelow >= 0 && decimal < markBelow) {
            decimalStr = ANSI_CODE_COLOR_DECIMAL_BELOW.getCode()
                    + decimalStr
                    + AnsiCodeColors.RESET.getCode();
        }

        return decimalStr;

    }

    /**
     * @param string A string to be repeated.
     * @param n Amount of repetitions.
     * @return A string formed by the repetition of the introduced string n times.
     */
    public static String repeatString(
            String string,
            int n) {

        if (n < 1) {
            return "";
        }

        StringBuilder strB = new StringBuilder(string.length() * n);

        for (int i = 0; i < n; i++) {
            strB.append(string);
        }

        return strB.toString();

    }

    /**
     * @param n Amount of spaces.
     * @return A string composed of n spaces.
     */
    public static String spaces(
            int n) {

        return repeatString(" ", n);

    }

    /**
     * @param n Amount of hyphens.
     * @return A string composed of n hyphens.
     */
    public static String hyphens(
            int n) {

        return repeatString("-", n);

    }

    /**
     * Escapes a string to include it into a comma separated values list.
     * 
     * @param str The string to escape.
     * @return The escaped string.
     */
    public static String escapeCsv(
            String str) {

        if (str.contains(CSV_TRIMMED_SEPARATOR) || str.contains(CSV_QUOTATION_MARK)) {

            return CSV_QUOTATION_MARK
                    + str.replace(CSV_QUOTATION_MARK, CSV_QUOTATION_MARK + CSV_QUOTATION_MARK)
                    + CSV_QUOTATION_MARK;

        } else {

            return str;

        }

    }

    /**
     * Unescapes a string that was included into a comma separated values list.
     * 
     * @param str The string to unescape.
     * @return The unescaped string.
     */
    public static String unescapeCsv(
            String str) {

        if (str.startsWith(CSV_QUOTATION_MARK)) {
            str = str.substring(CSV_QUOTATION_MARK.length());
        }
        if (str.endsWith(CSV_QUOTATION_MARK)) {
            str = str.substring(0, str.length() - CSV_QUOTATION_MARK.length());
        }

        str = str.replace(CSV_QUOTATION_MARK + CSV_QUOTATION_MARK, CSV_QUOTATION_MARK);

        return str;

    }

    /**
     * Splits a CSV into a list of strings.
     * 
     * @param csv The CSV to split.
     * @return The splitted CSV.
     */
    public static List splitCsv(
            String csv) {

        List strings = new LinkedList<>();

        for (String str : CSV_SPLIT_PATTERN.split(csv)) {
            strings.add(unescapeCsv(str.trim()));
        }

        return strings;

    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy