All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.morimekta.util.Strings Maven / Gradle / Ivy

Go to download

Utilities helping with reading writing and keeping various data formats, including JSON, binary data and formatted text.

There is a newer version: 3.7.1
Show newest version
/*
 * Copyright (c) 2016, Stein Eldar Johnsen
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package net.morimekta.util;

import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.Collection;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Pattern;

import static java.lang.Character.isHighSurrogate;
import static java.lang.Character.isLowSurrogate;

/**
 * String utilities.
 */
public class Strings {
    private static final String        NULL                 = "null";
    private static final DecimalFormat DOUBLE_FORMATTER     =
            new DecimalFormat("#.##############", DecimalFormatSymbols.getInstance(Locale.US));
    private static final DecimalFormat SCIENTIFIC_FORMATTER =
            new DecimalFormat("0.############E0", DecimalFormatSymbols.getInstance(Locale.US));
    private static final Pattern       CAMEL_CASE_DELIMITER = Pattern.compile("[^a-zA-Z0-9]");

    /**
     * Properly java-escape the string for printing to console.
     * @param string The string to escape.
     * @return The escaped string.
     */
    public static String escape(CharSequence string) {
        StringBuilder builder = new StringBuilder();
        for (int i = 0; i < string.length(); ++i) {
            char c = string.charAt(i);
            switch (c) {
                case '\b':
                    builder.append('\\').append('b');
                    break;
                case '\t':
                    builder.append('\\').append('t');
                    break;
                case '\n':
                    builder.append('\\').append('n');
                    break;
                case '\f':
                    builder.append('\\').append('f');
                    break;
                case '\r':
                    builder.append('\\').append('r');
                    break;
                case '"':
                case '\'':
                    builder.append('\\').append(c);
                    break;
                case '\\':
                    builder.append('\\').append('\\');
                    break;
                default:
                    if (c < 32 || c == 127) {
                        builder.append(String.format("\\%03o", (int) c));
                    } else if (!isConsolePrintable(c) ||
                               Character.isHighSurrogate(c) ||
                               Character.isLowSurrogate(c)) {
                        builder.append(String.format("\\u%04x", (int) c));
                    } else {
                        builder.append(c);
                    }
                    break;
            }
        }
        return builder.toString();
    }

    /**
     * Escape a single character. It is escaped into a string, as it may become
     * more than one char when escaped.
     *
     * @param c The char to escape.
     * @return The escaped char string.
     */
    public static String escape(char c) {
        switch (c) {
            case '\b':
                return "\\b";
            case '\t':
                return "\\t";
            case '\n':
                return "\\n";
            case '\f':
                return "\\f";
            case '\r':
                return "\\r";
            case '"':
                return "\\\"";
            case '\'':
                return "\\'";
            case '\\':
                return "\\\\";
            default:
                if (c < 32 || c == 127) {
                    return String.format("\\%03o", (int) c);
                } else if (!isConsolePrintable(c) ||
                           isHighSurrogate(c) ||
                           isLowSurrogate(c)) {
                    return String.format("\\u%04x", (int) c);
                }
                return String.valueOf(c);
        }
    }

    /**
     * Utility to figure out if a character is printable to the console as
     * a character. Returns false if one of:
     * 
    *
  • The character is a control character. *
  • The character is not defined. *
  • The character does not have a known representation. *
* * @param cp The character unicode code point. * @return If it is printable. */ public static boolean isConsolePrintable(int cp) { return (cp >= 0x20 && cp < 0x7F) || // main printable ascii Character.isDefined(cp) && !((cp < 0x0020 && cp != '\n') || (0x007F <= cp && cp < 0x00A0) || Character.isIdentifierIgnorable(cp) || (0x07e8 <= cp && cp <= 0x07f3) || (0x07f6 <= cp && cp <= 0x0900) || cp == 0x0ac6 || (0x0bfc <= cp && cp <= 0x0d01) || cp == 0x0f8c || cp == 0x10cd || cp == 0x10fd || cp == 0x10fe || cp == 0x10ff || (0x1a20 <= cp && cp <= 0x1cff) || cp == 0x1680 || (0x1701 <= cp && cp <= 0x1711) || (0x1740 <= cp && cp <= 0x1770) || cp == 0x1772 || cp == 0x1773 || (0x1800 <= cp && cp <= 0x18af) || (0x1900 <= cp && cp <= 0x194f) || (0x1980 <= cp && cp <= 0x19df) || cp == 0x1dcd || cp == 0x1dce || cp == 0x1dd0 || cp == 0x2028 || cp == 0x2c22 || cp == 0x2c2b || cp == 0x2c2c || cp == 0x2c2d || cp == 0x2c52 || cp == 0x2c5b || cp == 0x2c5c || cp == 0x2c5d || (0x2cb2 <= cp && cp <= 0x2cbf) || (0x2cc2 <= cp && cp <= 0x2cc7) || (0x2ccc <= cp && cp <= 0x2ce3) || (0x2ceb <= cp && cp <= 0x2cee) || (0x2cf0 <= cp && cp <= 0x2cfc) || cp == 0x2d70 || (0xa000 <= cp && cp <= 0xa4cf) || (0xa674 <= cp && cp <= 0xa67b) || (0xa698 <= cp && cp <= 0xa6ff) || cp == 0xa754 || cp == 0xa755 || cp == 0xa758 || cp == 0xa759 || (0xa75c <= cp && cp <= 0xa763) || (0xa76a <= cp && cp <= 0xa76d) || (0xa771 <= cp && cp <= 0xa778) || (0xa800 <= cp && cp <= 0xa8df) || (0xa930 <= cp && cp <= 0xa95f) || (0xa97d <= cp && cp <= 0xaa5e) || (0xaa7c <= cp && cp <= 0xaaff) || (0xab30 <= cp && cp <= 0xabff) || (0xd7fc <= cp && cp <= 0xdfff) || (0xe47f <= cp && cp <= 0xe48a) || (0xe4c5 <= cp && cp <= 0xe4ff) || cp == 0xe506 || (0xe50b <= cp && cp <= 0xe50e) || cp == 0xe52d || (0xe534 <= cp && cp <= 0xe547) || cp == 0xe55d || (0xe560 <= cp && cp <= 0xe56f) || cp == 0xe576 || cp == 0xe577 || (0xe57d <= cp && cp <= 0xe583) || (0xe588 <= cp && cp <= 0xe58c) || cp == 0xe591 || cp == 0xe592 || (0xe598 <= cp && cp <= 0xe67f) || (0xe6a4 <= cp && cp <= 0xee68 && cp != 0xec0b && cp != 0xec96 && cp != 0xec97 && cp != 0xec99 && cp != 0xec9d) || (0xee94 <= cp && cp <= 0xeeff) || (0xef1a <= cp && cp <= 0xefec) || (0xfd40 <= cp && cp <= 0xfdff) ); } /** * Unescape selected chars for compatability with JavaScript's encodeURI. * In speed critical applications this could be dropped since the * receiving application will certainly decode these fine. * Note that this function is case-sensitive. Thus "%3f" would not be * unescaped. But this is ok because it is only called with the output of * URLEncoder.encode which returns uppercase hex. *

* Example: "%3F" -> "?", "%24" -> "$", etc. * * @param str The string to escape. * @return The escaped string. */ public static String unescapeForEncodeUriCompatability(String str) { return str.replace("%21", "!").replace("%7E", "~") .replace("%27", "'").replace("%28", "(").replace("%29", ")") .replace("%3B", ";").replace("%2F", "/").replace("%3F", "?") .replace("%3A", ":").replace("%40", "@").replace("%26", "&") .replace("%3D", "=").replace("%2B", "+").replace("%24", "$") .replace("%2C", ",").replace("%23", "#"); } /** * Join set of arbitrary values with delimiter. * * @param delimiter The delimiter. * @param values The values to join. * @return The joined string. */ public static String join(String delimiter, Object... values) { // Since primitive arrays does not pass as a values array, but as it's // single first element. if (values.length == 1) { Class type = values[0].getClass(); if (char[].class.equals(type)) { return joinP(delimiter, (char[]) values[0]); } else if (int[].class.equals(type)) { return joinP(delimiter, (int[]) values[0]); } else if (long[].class.equals(type)) { return joinP(delimiter, (long[]) values[0]); } else if (double[].class.equals(type)) { return joinP(delimiter, (double[]) values[0]); } else if (boolean[].class.equals(type)) { return joinP(delimiter, (boolean[]) values[0]); } } StringBuilder builder = new StringBuilder(); boolean first = true; for (Object value : values) { if (first) { first = false; } else { builder.append(delimiter); } builder.append(Objects.toString(value)); } return builder.toString(); } /** * Join array with delimiter. * * @param delimiter The delimiter. * @param chars The char array to join. * @return The joined string. */ public static String joinP(String delimiter, char... chars) { StringBuilder builder = new StringBuilder(chars.length + (delimiter.length() * chars.length)); boolean first = true; for (char c : chars) { if (first) { first = false; } else { builder.append(delimiter); } builder.append(c); } return builder.toString(); } /** * Join array with delimiter. * * @param delimiter The delimiter. * @param values The int array to join. * @return The joined string. */ public static String joinP(String delimiter, int... values) { StringBuilder builder = new StringBuilder(values.length + (delimiter.length() * values.length)); boolean first = true; for (int i : values) { if (first) { first = false; } else { builder.append(delimiter); } builder.append(Integer.toString(i)); } return builder.toString(); } /** * Join array with delimiter. * * @param delimiter The delimiter. * @param values The int array to join. * @return The joined string. */ public static String joinP(String delimiter, long... values) { StringBuilder builder = new StringBuilder(values.length + (delimiter.length() * values.length)); boolean first = true; for (long i : values) { if (first) { first = false; } else { builder.append(delimiter); } builder.append(Long.toString(i)); } return builder.toString(); } /** * Join array with delimiter. * * @param delimiter The delimiter. * @param values The double array to join. * @return The joined string. */ public static String joinP(String delimiter, double... values) { StringBuilder builder = new StringBuilder(values.length + (delimiter.length() * values.length)); boolean first = true; for (double d : values) { if (first) { first = false; } else { builder.append(delimiter); } builder.append(asString(d)); } return builder.toString(); } /** * Join array with delimiter. * * @param delimiter The delimiter. * @param values The double array to join. * @return The joined string. */ public static String joinP(String delimiter, boolean... values) { StringBuilder builder = new StringBuilder(values.length + (delimiter.length() * values.length)); boolean first = true; for (boolean d : values) { if (first) { first = false; } else { builder.append(delimiter); } builder.append(d); } return builder.toString(); } /** * Join collection with delimiter. * * @param Collection item type. * @param delimiter The delimiter. * @param strings The string collection to join. * @return The joined string. */ public static String join(String delimiter, Collection strings) { StringBuilder builder = new StringBuilder(); boolean first = true; for (T o : strings) { if (first) { first = false; } else { builder.append(delimiter); } builder.append(Objects.toString(o)); } return builder.toString(); } /** * Check if the string is representing an integer (or long) value. * * @param str The string to check if is an integer. * @return True if key is an integer. */ public static boolean isInteger(CharSequence str) { return isInteger(str, 0, str.length()); } /** * Check if the string is representing an integer (or long) value. * * @param str The string to check if is an integer. * @param off The offset of the string to start checking. * @param len The length / number fo chars to check. * @return True if key is an integer. */ public static boolean isInteger(CharSequence str, int off, int len) { if (len == 0) return false; int i = off; if (str.charAt(i) == '-') { if (len == 1) return false; ++i; } for (; i < len; ++i) { char c = str.charAt(i); if (c < '0' || c > '9') return false; } return true; } /** * Multiply a string N times. * * @param s The string to multiply. * @param num N * @return The result. */ public static String times(String s, int num) { StringBuilder builder = new StringBuilder(); for (int i = 0; i < num; ++i) { builder.append(s); } return builder.toString(); } /** * Format a prefixed name as camelCase. The prefix is kept verbatim, while * tha name is split on '_' chars, and joined with each part capitalized. * * @param prefix The prefix. * @param name The name to camel-case. * @return theCamelCasedName */ public static String camelCase(String prefix, String name) { return prefix + camelCase(name); } /** * Format a name as CamelCase. The name is split on non-alphabet non-numeric * chars, and joined with each part capitalized. This is also called * PascalCase. There is in this instance no assumptions on the name itself, * other than it contains some alphabet characters. Any uppercase letters * in the name will be kept as uppercase, so that a CamelCase name will * stay CamelCase through this call. * * @param name The name to camel-case. * @return TheCamelCasedName */ public static String camelCase(String name) { StringBuilder builder = new StringBuilder(); String[] parts = CAMEL_CASE_DELIMITER.split(name); for (String part : parts) { if (part.isEmpty()) { continue; } builder.append(capitalize(part)); } return builder.toString(); } /** * Format a prefixed name as c_case. The prefix is kept verbatim, while the * name has a '_' character inserted before each upper-case letter, not * including the first character. Then the whole thing is lower-cased. * * @param prefix The prefix. * @param name The name to c-case. * @param suffix The suffix. * @return the_c_cased_name */ public static String c_case(String prefix, String name, String suffix) { // Assume we insert at most 4 '_' chars for a majority of names. StringBuilder builder = new StringBuilder(prefix.length() + name.length() + 5); builder.append(prefix); boolean lastUpper = true; for (char c : name.toCharArray()) { if (Character.isUpperCase(c)) { if (!lastUpper) { builder.append('_'); } lastUpper = true; } else if (c == '_' || c == '.' || c == '-') { builder.append('_'); lastUpper = true; continue; } else if (!Character.isDigit(c)) { lastUpper = false; } builder.append(Character.toLowerCase(c)); } builder.append(suffix); return builder.toString(); } /** * Format a prefixed name as c_case. The prefix is kept verbatim, while the * name has a '_' character inserted before each upper-case letter, not * including the first character. Then the whole thing is lower-cased. *

* Note that this will mangle upper-case abbreviations. *

* @param prefix The prefix. * @param name The name to c-case. * @return the_c_cased_name */ public static String c_case(String prefix, String name) { return c_case(prefix, name, ""); } /** * Format a prefixed name as c_case. The prefix is kept verbatim, while the * name has a '_' character inserted before each upper-case letter, not * including the first character. Then the whole thing is lower-cased. *

* Note that this may mangle upper-case abbreviations. *

* @param name The name to c-case. * @return the_c_cased_name */ public static String c_case(String name) { return c_case("", name, ""); } public static String capitalize(String string) { return string.substring(0, 1) .toUpperCase(Locale.US) + string.substring(1); } /** * Make a minimal printable string from a double value. This method does * not necessary generate a string that when parsed generates the identical * number as given in. But ut should consistently generate the same string * (locale independent) for the same number with reasonable accuracy. * * @param d The double value. * @return The string value. */ public static String asString(double d) { long l = (long) d; if (d > ((10 << 9) - 1) || (1 / d) > (10 << 6)) { // Scientific notation should be used. return SCIENTIFIC_FORMATTER.format(d); } else if (d == (double) l) { // actually an integer or long value. return Long.toString(l); } else { return DOUBLE_FORMATTER.format(d); } } /** * Make a printable string from a collection using the tools here. * * @param collection The collection to stringify. * @return The collection string value. */ public static String asString(Collection collection) { if (collection == null) { return NULL; } StringBuilder builder = new StringBuilder(); builder.append('['); boolean first = true; for (Object item : collection) { if (first) { first = false; } else { builder.append(','); } builder.append(asString(item)); } builder.append(']'); return builder.toString(); } /** * Make a minimal printable string value from a typed map. * * @param map The map to stringify. * @return The resulting string. */ public static String asString(Map map) { if (map == null) { return NULL; } StringBuilder builder = new StringBuilder(); builder.append('{'); boolean first = true; for (Map.Entry entry : map.entrySet()) { if (first) { first = false; } else { builder.append(','); } builder.append(asString(entry.getKey())) .append(':') .append(asString(entry.getValue())); } builder.append('}'); return builder.toString(); } /** * Make an object into a string using the typed tools here. * * @param o The object to stringify. * @return The resulting string. */ public static String asString(Object o) { if (o == null) { return NULL; } else if (o instanceof Stringable) { return ((Stringable) o).asString(); } else if (o instanceof Numeric) { return String.format("%d", ((Numeric) o).asInteger()); } else if (o instanceof CharSequence) { return String.format("\"%s\"", escape((CharSequence) o)); } else if (o instanceof Double) { return asString(((Double) o).doubleValue()); } else if (o instanceof Collection) { return asString((Collection) o); } else if (o instanceof Map) { return asString((Map) o); } else { return o.toString(); } } /* * The following functions are copied from the java version of * http://code.google.com/p/google-diff-match-patch/ * * Copyright 2006 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * Determine the common prefix of two strings * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the start of each string. */ public static int commonPrefix(String text1, String text2) { // Performance analysis: http://neil.fraser.name/news/2007/10/09/ int n = Math.min(text1.length(), text2.length()); for (int i = 0; i < n; i++) { if (text1.charAt(i) != text2.charAt(i)) { return i; } } return n; } /** * Determine the common suffix of two strings * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the end of each string. */ public static int commonSuffix(String text1, String text2) { // Performance analysis: http://neil.fraser.name/news/2007/10/09/ int text1_length = text1.length(); int text2_length = text2.length(); int n = Math.min(text1_length, text2_length); for (int i = 1; i <= n; i++) { if (text1.charAt(text1_length - i) != text2.charAt(text2_length - i)) { return i - 1; } } return n; } /** * Determine if the suffix of one string is the prefix of another. * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the end of the first * string and the start of the second string. */ public static int commonOverlap(String text1, String text2) { // Cache the text lengths to prevent multiple calls. int text1_length = text1.length(); int text2_length = text2.length(); // Eliminate the null case. if (text1_length == 0 || text2_length == 0) { return 0; } // Truncate the longer string. if (text1_length > text2_length) { text1 = text1.substring(text1_length - text2_length); } else if (text1_length < text2_length) { text2 = text2.substring(0, text1_length); } int text_length = Math.min(text1_length, text2_length); // Quick check for the worst case. if (text1.equals(text2)) { return text_length; } // Start by looking for a single character match // and increase length until no match is found. // Performance analysis: http://neil.fraser.name/news/2010/11/04/ int best = 0; int length = 1; while (true) { String pattern = text1.substring(text_length - length); int found = text2.indexOf(pattern); if (found == -1) { return best; } length += found; if (found == 0 || text1.substring(text_length - length).equals( text2.substring(0, length))) { best = length; length++; } } } // --- constants and helpers. // defeat instantiation. private Strings() {} }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy