jodd.util.CharUtil Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of xml-stream-css Show documentation
Stream Xml using StAX and Css matcher
The newest version!
package jodd.util;



import java.io.UnsupportedEncodingException;

/**
 * Various character and character sequence utilities, including char[] - byte[] conversions.
 */
public class CharUtil {

    // ---------------------------------------------------------------- simple

    /**
     * Converts (signed) byte to (unsigned) char.
     */
    public static char toChar(byte b) {
        return (char) (b & 0xFF);
    }

    /**
     * Converts char array into byte array by stripping the high byte of each character.
     */
    public static byte[] toSimpleByteArray(char[] carr) {
        byte[] barr = new byte[carr.length];
        for (int i = 0; i < carr.length; i++) {
            barr[i] = (byte) carr[i];
        }
        return barr;
    }

    /**
     * Converts char sequence into byte array.
     * @see #toSimpleByteArray(char[])
     */
    public static byte[] toSimpleByteArray(CharSequence charSequence) {
        byte[] barr = new byte[charSequence.length()];
        for (int i = 0; i < barr.length; i++) {
            barr[i] = (byte) charSequence.charAt(i);
        }
        return barr;
    }

    /**
     * Converts byte array to char array by simply extending bytes to chars.
     */
    public static char[] toSimpleCharArray(byte[] barr) {
        char[] carr = new char[barr.length];
        for (int i = 0; i < barr.length; i++) {
            carr[i] = (char) (barr[i] & 0xFF);
        }
        return carr;
    }

    // ---------------------------------------------------------------- ascii

    /**
     * Returns ASCII value of a char. In case of overload, 0x3F is returned.
     */
    public static int toAscii(char c) {
        if (c <= 0xFF) {
            return c;
        } else {
            return 0x3F;
        }
    }

    /**
     * Converts char array into {@link #toAscii(char) ASCII} array.
     */
    public static byte[] toAsciiByteArray(char[] carr) {
        byte[] barr = new byte[carr.length];
        for (int i = 0; i < carr.length; i++) {
            barr[i] = (byte) ((int) (carr[i] <= 0xFF ? carr[i] : 0x3F));
        }
        return barr;
    }

    /**
     * Converts char sequence into ASCII byte array.
     */
    public static byte[] toAsciiByteArray(CharSequence charSequence) {
        byte[] barr = new byte[charSequence.length()];
        for (int i = 0; i < barr.length; i++) {
            char c = charSequence.charAt(i);
            barr[i] = (byte) ((int) (c <= 0xFF ? c : 0x3F));
        }
        return barr;
    }

    // ---------------------------------------------------------------- raw arrays

    /**
     * Converts char array into byte array by replacing each character with two bytes.
     */
    public static byte[] toRawByteArray(char[] carr) {
        byte[] barr = new byte[carr.length << 1];
        for (int i = 0, bpos = 0; i < carr.length; i++) {
            char c = carr[i];
            barr[bpos++] = (byte) ((c & 0xFF00) >> 8);
            barr[bpos++] = (byte) (c & 0x00FF);
        }
        return barr;
    }

    public static char[] toRawCharArray(byte[] barr) {
        int carrLen = barr.length >> 1;
        if (carrLen << 1 < barr.length) {
            carrLen++;
        }
        char[] carr = new char[carrLen];
        int i = 0, j = 0;
        while (i < barr.length) {
            char c = (char) (barr[i] << 8);
            i++;

            if (i != barr.length) {
                c += barr[i] & 0xFF;
                i++;
            }
            carr[j++] = c;
        }
        return carr;
    }

    // ---------------------------------------------------------------- encoding



    /**
     * Converts byte array of specific encoding to char array.
     */
    public static char[] toCharArray(byte[] barr, String charset) throws UnsupportedEncodingException {
        return new String(barr, charset).toCharArray();
    }

    // ---------------------------------------------------------------- find


    /**
     * Match if one character equals to any of the given character.
     *
     * @return true if characters match any character from given array,
     *         otherwise false
     */
    public static boolean equalsOne(char c, char[] match) {
        for (char aMatch : match) {
            if (c == aMatch) {
                return true;
            }
        }
        return false;
    }

    /**
     * Finds index of the first character in given array the matches any from the
     * given set of characters.
     *
     * @return index of matched character or -1
     */
    public static int findFirstEqual(char[] source, int index, char[] match) {
        for (int i = index; i < source.length; i++) {
            if (equalsOne(source[i], match) == true) {
                return i;
            }
        }
        return -1;
    }

    /**
     * Finds index of the first character in given array the matches any from the
     * given set of characters.
     *
     * @return index of matched character or -1
     */
    public static int findFirstEqual(char[] source, int index, char match) {
        for (int i = index; i < source.length; i++) {
            if (source[i] == match) {
                return i;
            }
        }
        return -1;
    }


    /**
     * Finds index of the first character in given array the differs from the
     * given set of characters.
     *
     * @return index of matched character or -1
     */
    public static int findFirstDiff(char[] source, int index, char[] match) {
        for (int i = index; i < source.length; i++) {
            if (equalsOne(source[i], match) == false) {
                return i;
            }
        }
        return -1;
    }

    /**
     * Finds index of the first character in given array the differs from the
     * given set of characters.
     *
     * @return index of matched character or -1
     */
    public static int findFirstDiff(char[] source, int index, char match) {
        for (int i = index; i < source.length; i++) {
            if (source[i] != match) {
                return i;
            }
        }
        return -1;
    }

    // ---------------------------------------------------------------- is

    /**
     * Returns true if character is a white space ({@code <= ' '}).
     * White space definition is taken from String class (see: trim()).
     */
    public static boolean isWhitespace(char c) {
        return c <= ' ';
    }

    /**
     * Returns true if specified character is lowercase ASCII.
     * If user uses only ASCIIs, it is much much faster.
     */
    public static boolean isLowercaseAlpha(char c) {
        return (c >= 'a') && (c <= 'z');
    }

    /**
     * Returns true if specified character is uppercase ASCII.
     * If user uses only ASCIIs, it is much much faster.
     */
    public static boolean isUppercaseAlpha(char c) {
        return (c >= 'A') && (c <= 'Z');
    }

    public static boolean isAlphaOrDigit(char c) {
        return isDigit(c) || isAlpha(c);
    }

    public static boolean isWordChar(char c) {
        return isDigit(c) || isAlpha(c) || (c == '_');
    }

    public static boolean isPropertyNameChar(char c) {
        return isDigit(c) || isAlpha(c) || (c == '_') || (c == '.') || (c == '[') || (c == ']');
    }

    // ---------------------------------------------------------------- RFC

    /**
     * Indicates whether the given character is in the {@code ALPHA} set.
     *
     * @see RFC 3986, appendix A
     */
    public static boolean isAlpha(char c) {
        return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
    }

    /**
     * Indicates whether the given character is in the {@code DIGIT} set.
     *
     * @see RFC 3986, appendix A
     */
    public static boolean isDigit(char c) {
        return c >= '0' && c <= '9';
    }

    /**
     * Indicates whether the given character is the hexadecimal digit.
     */
    public static boolean isHexDigit(char c) {
        return (c >= '0' && c <= '9') || ((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F'));
    }

    /**
     * Indicates whether the given character is in the gen-delims set.
     *
     * @see RFC 3986, appendix A
     */
    public static boolean isGenericDelimiter(int c) {
        switch (c) {
            case ':':
            case '/':
            case '?':
            case '#':
            case '[':
            case ']':
            case '@':
                return true;
            default:
                return false;
        }
    }

    /**
     * Indicates whether the given character is in the sub-delims set.
     *
     * @see RFC 3986, appendix A
     */
    protected static boolean isSubDelimiter(int c) {
        switch (c) {
            case '!':
            case '$':
            case '&':
            case '\'':
            case '(':
            case ')':
            case '*':
            case '+':
            case ',':
            case ';':
            case '=':
                return true;
            default:
                return false;
        }
    }

    /**
     * Indicates whether the given character is in the reserved set.
     *
     * @see RFC 3986, appendix A
     */
    protected static boolean isReserved(char c) {
        return isGenericDelimiter(c) || isSubDelimiter(c);
    }

    /**
     * Indicates whether the given character is in the unreserved set.
     *
     * @see RFC 3986, appendix A
     */
    protected static boolean isUnreserved(char c) {
        return isAlpha(c) || isDigit(c) || c == '-' || c == '.' || c == '_' || c == '~';
    }

    /**
     * Indicates whether the given character is in the pchar set.
     *
     * @see RFC 3986, appendix A
     */
    protected static boolean isPchar(char c) {
        return isUnreserved(c) || isSubDelimiter(c) || c == ':' || c == '@';
    }


    // ---------------------------------------------------------------- conversions

    /**
     * Uppers lowercase ASCII char.
     */
    public static char toUpperAscii(char c) {
        if (isLowercaseAlpha(c)) {
            c -= (char) 0x20;
        }
        return c;
    }


    /**
     * Lowers uppercase ASCII char.
     */
    public static char toLowerAscii(char c) {
        if (isUppercaseAlpha(c)) {
            c += (char) 0x20;
        }
        return c;
    }

    /**
     * Converts hex char to int value.
     */
    public static int hex2int(char c) {
        switch (c) {
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
                return c - '0';
            case 'A':
            case 'B':
            case 'C':
            case 'D':
            case 'E':
            case 'F':
                return c - 55;
            case 'a':
            case 'b':
            case 'c':
            case 'd':
            case 'e':
            case 'f':
                return c - 87;
            default:
                throw new IllegalArgumentException("Not a hex: " + c);
        }
    }

    /**
     * Converts integer digit to heck char.
     */
    public static char int2hex(int i) {
        return HEX_CHARS[i];
    }

    public static final char[] HEX_CHARS = new char[] {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};

}