com.siashan.toolkit.crypt.util.StringUtils Maven / Gradle / Ivy

Go to download
package com.siashan.toolkit.crypt.util;

import com.siashan.toolkit.crypt.binary.CharEncoding;

import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

/**
 * 使用Java规范要求的编码将字符串与字节进行转换。这些编码是在
 * 标准字符集中指定。
 *
 * 这个类是不可变的，并且是线程安全的.
 *
 * @see CharEncoding
 * @see 标准字符集
 * @author siashan
 * @since 1.0.7
 */
public class StringUtils {
    public static final int INDEX_NOT_FOUND = -1;
    /**
     * 字符串常量：空字符串 {@code ""}
     */
    public static final String EMPTY = "";
    /**
     * 
     * 比较两个字符序列，如果它们表示相等的字符序列，则返回{@code true}
     * 
     *
     * 
     * {@code null}的处理没有异常。两个{@code null}引用被认为是相等的。比较是区分大小写的。
     * 
     *
     *      * StringUtils.equals(null, null)   = true
     * StringUtils.equals(null, "abc")  = false
     * StringUtils.equals("abc", null)  = false
     * StringUtils.equals("abc", "abc") = true
     * StringUtils.equals("abc", "ABC") = false
     * 
     *
     *
     * @see Object#equals(Object)
     * @param cs1
     *            第一个字符序列可以是{@code null}
     * @param cs2
     *            第二个字符序列可以是{@code null}
     * @return {@code true}如果字符序列相等（区分大小写），或者两者都是{@code null}
     */
    public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
        if (cs1 == cs2) {
            return true;
        }
        if (cs1 == null || cs2 == null) {
            return false;
        }
        if (cs1 instanceof String && cs2 instanceof String) {
            return cs1.equals(cs2);
        }
        return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length());
    }

    /**
     * 判断字符串是否非空白字符串
     *
     * @param str
     * @return
     */
    public static boolean isNotBlank(CharSequence str){
          return !isBlank(str);
    }

    /**
     * 判断字符串是否非空白字符串
     *
     * 
     *     不为 {@code null}
     *     不为空字符串：{@code ""}
     *     不为空格、全角空格、制表符、换行符，等不可见字符
     * 
     *
     * @param str  字符串
     * @return     是否为空白字符串
     */
    public static boolean isBlank(CharSequence str){
        int length;

        if ((str == null) || ((length = str.length()) == 0)) {
            return true;
        }

        for (int i = 0; i < length; i++) {
            // 只要有一个非空字符即为非空字符串
            if (false == isBlankChar(str.charAt(i))) {
                return false;
            }
        }

        return true;
    }


    /**
     * 是否空白符

     * 空白符包括空格、制表符、全角空格和不间断空格

     *
     * @param c 字符
     * @return 是否空白符
     * @see Character#isWhitespace(int)
     * @see Character#isSpaceChar(int)
     */
    public static boolean isBlankChar(char c) {
        return isBlankChar((int) c);
    }

    /**
     * 是否空白符

     * 空白符包括空格、制表符、全角空格和不间断空格

     *
     * @param c 字符
     * @return 是否空白符
     * @see Character#isWhitespace(int)
     * @see Character#isSpaceChar(int)
     */
    public static boolean isBlankChar(int c) {
        return Character.isWhitespace(c)
                || Character.isSpaceChar(c)
                || c == '\ufeff'
                || c == '\u202a';
    }

    /**
     * 调用 {@link String#getBytes（Charset）}
     *
     * @param string
     *            要编码的字符串（如果为null，则返回null）。
     * @param charset
     *            用于编码{@code String}的{@link Charset}
     * @return 编码字节
     */
    private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
        if (string == null) {
            return null;
        }
        return ByteBuffer.wrap(string.getBytes(charset));
    }

    /**
     * 使用UTF-8字符集将给定字符串编码到字节缓冲区，并将结果存储到新的字节数组中。
     *
     * @param string
     *            要编码的字符串可以是{@code null}
     * @return 编码字节，或{@code null}如果输入字符串为{@code null}
     */
    public static ByteBuffer getByteBufferUtf8(final String string) {
        return getByteBuffer(string, StandardCharsets.UTF_8);
    }

    /**
     * 调用{@linkstring#getBytes（Charset）}
     *
     * @param string
     *            要编码的字符串（如果为null，则返回null）。
     * @param charset
     *            用于编码{@code String}的{@link Charset}
     * @return 编码字节
     */
    public static byte[] getBytes(final String string, final Charset charset) {
        if (string == null) {
            return null;
        }
        return string.getBytes(charset);
    }

    /**
     * 使用ISO-8859-1字符集将给定字符串编码为字节序列，并将结果存储到新的字节数组中。
     *
     * @param string
     *            要编码的字符串可以是{@code null}
     * @return 编码字节，或{@code null}如果输入字符串为{@code null}
     */
    public static byte[] getBytesIso8859_1(final String string) {
        return getBytes(string, StandardCharsets.ISO_8859_1);
    }


    /**
     * 使用命名字符集将给定字符串编码为字节序列，并将结果存储到新的字节数组中
     * 
     * 此方法捕获{@link UnsupportedEncodingException}，并将其重新命名为{@link IllegalStateException}，这
     * 对于所需的字符集名称，不应发生这种情况。当编码需要在JRE中时，请使用此方法。
     * 
     *
     * @param string
     *            要编码的字符串可以是{@code null}
     * @param charsetName
     *            所需的{@link Charset}的名称
     * @return 编码字节，或{@code null}如果输入字符串为{@code null}
     */
    public static byte[] getBytesUnchecked(final String string, final String charsetName) {
        if (string == null) {
            return null;
        }
        try {
            return string.getBytes(charsetName);
        } catch (final UnsupportedEncodingException e) {
            throw StringUtils.newIllegalStateException(charsetName, e);
        }
    }

    /**
     * 使用US-ASCII字符集将给定字符串编码为字节序列，并将结果存储到新的字节数组中。
     *
     * @param string
     *            要编码的字符串可以是{@code null}
     * @return 编码字节，或{@code null}如果输入字符串为{@code null}
     */
    public static byte[] getBytesUsAscii(final String string) {
        return getBytes(string, StandardCharsets.US_ASCII);
    }

    /**
     * 使用UTF-16字符集将给定字符串编码为字节序列，并将结果存储到新的字节数组中。
     *
     * @param string
     *            要编码的字符串可以是{@code null}
     * @return 编码字节，或{@code null}如果输入字符串为{@code null}
     */
    public static byte[] getBytesUtf16(final String string) {
        return getBytes(string, StandardCharsets.UTF_16);
    }

    /**
     * 使用UTF-16BE字符集将给定字符串编码为字节序列，并将结果存储到新的字节数组中。
     *
     * @param string
     *            要编码的字符串可以是{@code null}
     * @return 编码字节，或{@code null}如果输入字符串为{@code null}
     */
    public static byte[] getBytesUtf16Be(final String string) {
        return getBytes(string, StandardCharsets.UTF_16BE);
    }

    /**
     * 使用UTF-16LE字符集将给定字符串编码为字节序列，并将结果存储到新的字节数组中。
     *
     * @param string
     *            要编码的字符串可以是{@code null}
     * @return 编码字节，或{@code null}如果输入字符串为{@code null}
     */
    public static byte[] getBytesUtf16Le(final String string) {
        return getBytes(string, StandardCharsets.UTF_16LE);
    }

    /**
     * 使用UTF-8字符集将给定字符串编码为字节序列，并将结果存储到新的字节数组中。
     *
     * @param string
     *            要编码的字符串可以是{@code null}
     * @return 编码字节，或{@code null}如果输入字符串为{@code null}
     */
    public static byte[] getBytesUtf8(final String string) {
        return getBytes(string, StandardCharsets.UTF_8);
    }

    private static IllegalStateException newIllegalStateException(final String charsetName,
                                                                  final UnsupportedEncodingException e) {
        return new IllegalStateException(charsetName + ": " + e);
    }

    /**
     * 通过使用给定的字符集对指定的字节数组进行解码，构造一个新的{@code String}。
     *
     * @param bytes
     *            要解码为字符的字节
     * @param charset
     *            {@link Charset}对{@code String}进行编码；非{@code null}
     * @return 使用给定的字符集从指定的字节数组解码的新{@code String}，或者{@code null}，如果输入字节数组为{@code null}。
     */
    public static String newString(final byte[] bytes, final Charset charset) {
        return bytes == null ? null : new String(bytes, charset);
    }

    /**
     * 通过使用给定的字符集对指定的字节数组进行解码，构造一个新的{@code String}.
     * 
     * 此方法捕获{@link UnsupportedEncodingException}，并将其作为{@link IllegalStateException}重新抛出，这
     * 对于所需的字符集名称，不应发生这种情况。当编码需要在JRE中时，请使用此方法。
     * 
     *
     * @param bytes
     *            要解码为字符的字节可以是{@code null}
     * @param charsetName
     *            所需的{@link Charset}的名称
     * @return 使用给定的字符集从指定的字节数组解码的新{@code String}，或者{@code null}，如果输入字节数组为{@code null}。
     */
    public static String newString(final byte[] bytes, final String charsetName) {
        if (bytes == null) {
            return null;
        }
        try {
            return new String(bytes, charsetName);
        } catch (final UnsupportedEncodingException e) {
            throw StringUtils.newIllegalStateException(charsetName, e);
        }
    }

    /**
     * 通过使用ISO-8859-1字符集对指定的字节数组进行解码，构造一个新的{@code String}。
     *
     * @param bytes
     *            要解码为字符的字节可以是{@code null}
     * @return 使用ISO-8859-1字符集从指定的字节数组解码的新{@code String}，或{@code null}如果输入字节数组为{@code null}。
     */
    public static String newStringIso8859_1(final byte[] bytes) {
        return newString(bytes, StandardCharsets.ISO_8859_1);
    }

    /**
     * 通过使用US-ASCII字符集对指定的字节数组进行解码，构造一个新的{@code String}。
     *
     * @param bytes
     *            要解码为字符的字节
     * @return 使用US-ASCII字符集从指定的字节数组解码的新{@code String}，或者{@code null}，如果输入字节数组为{@code null}。
     */
    public static String newStringUsAscii(final byte[] bytes) {
        return newString(bytes, StandardCharsets.US_ASCII);
    }

    /**
     * 通过使用UTF-16字符集对指定的字节数组进行解码，构造一个新的{@code String}。
     *
     * @param bytes
     *            要解码为字符的字节
     * @return 使用UTF-16字符集从指定的字节数组解码的新{@code String}或者{@code null}，如果输入字节数组为{@code null}。
     */
    public static String newStringUtf16(final byte[] bytes) {
        return newString(bytes, StandardCharsets.UTF_16);
    }

    /**
     * 通过使用UTF-16BE字符集对指定的字节数组进行解码，构造一个新的{@code String}。
     *
     * @param bytes
     *           要解码为字符的字节
     * @return 使用UTF-16BE字符集从指定的字节数组解码的新{@code String}，或者{@code null}，如果输入字节数组为{@code null}。
     */
    public static String newStringUtf16Be(final byte[] bytes) {
        return newString(bytes, StandardCharsets.UTF_16BE);
    }

    /**
     * 通过使用UTF-16LE字符集对指定的字节数组进行解码，构造一个新的{@code String}。
     *
     * @param bytes
     *            要解码为字符的字节
     * @return 使用UTF-16LE字符集从指定的字节数组解码的新{@code String}，或者{@code null}如果输入字节数组是{@code null}.
     */
    public static String newStringUtf16Le(final byte[] bytes) {
        return newString(bytes, StandardCharsets.UTF_16LE);
    }

    /**
     * 通过使用UTF-8字符集对指定的字节数组进行解码，构造一个新的{@code String}
     *
     * @param bytes
     *            要解码为字符的字节
     * @return 使用UTF-8字符集从指定的字节数组解码的新{@code String}，或者{@code null}，如果输入字节数组为{@code null}。
     */
    public static String newStringUtf8(final byte[] bytes) {
        return newString(bytes, StandardCharsets.UTF_8);
    }

    /**
     * 是否以指定字符串开头，忽略大小写
     *
     * @param str    被监测字符串
     * @param prefix 开头字符串
     * @return 是否以指定字符串开头
     */
    public static boolean startWithIgnoreCase(CharSequence str, CharSequence prefix) {
        return startWith(str, prefix, true);
    }

    /**
     * 指定范围内查找字符串，忽略大小写
     *
     * @param str       字符串
     * @param searchStr 需要查找位置的字符串
     * @return 位置
     */
    public static int lastIndexOfIgnoreCase(final CharSequence str, final CharSequence searchStr) {
        return lastIndexOfIgnoreCase(str, searchStr, str.length());
    }

    /**
     * 指定范围内查找字符串，忽略大小写

     * fromIndex 为搜索起始位置，从后往前计数
     *
     * @param str       字符串
     * @param searchStr 需要查找位置的字符串
     * @param fromIndex 起始位置，从后往前计数
     * @return 位置
     */
    public static int lastIndexOfIgnoreCase(final CharSequence str, final CharSequence searchStr, int fromIndex) {
        return lastIndexOf(str, searchStr, fromIndex, true);
    }

    /**
     * 指定范围内查找字符串

     * fromIndex 为搜索起始位置，从后往前计数
     *
     * @param str        字符串
     * @param searchStr  需要查找位置的字符串
     * @param fromIndex  起始位置，从后往前计数
     * @param ignoreCase 是否忽略大小写
     * @return 位置
     */
    public static int lastIndexOf(final CharSequence str, final CharSequence searchStr, int fromIndex, boolean ignoreCase) {
        if (str == null || searchStr == null) {
            return INDEX_NOT_FOUND;
        }
        if (fromIndex < 0) {
            fromIndex = 0;
        }
        fromIndex = Math.min(fromIndex, str.length());

        if (searchStr.length() == 0) {
            return fromIndex;
        }

        if (false == ignoreCase) {
            // 不忽略大小写调用JDK方法
            return str.toString().lastIndexOf(searchStr.toString(), fromIndex);
        }

        for (int i = fromIndex; i >= 0; i--) {
            if (isSubEquals(str, i, searchStr, 0, searchStr.length(), true)) {
                return i;
            }
        }
        return INDEX_NOT_FOUND;
    }

    /**
     * 截取两个字符串的不同部分（长度一致），判断截取的子串是否相同

     * 任意一个字符串为null返回false
     *
     * @param str1       第一个字符串
     * @param start1     第一个字符串开始的位置
     * @param str2       第二个字符串
     * @param start2     第二个字符串开始的位置
     * @param length     截取长度
     * @param ignoreCase 是否忽略大小写
     * @return 子串是否相同
     */
    public static boolean isSubEquals(CharSequence str1, int start1, CharSequence str2, int start2, int length, boolean ignoreCase) {
        if (null == str1 || null == str2) {
            return false;
        }

        return str1.toString().regionMatches(ignoreCase, start1, str2.toString(), start2, length);
    }

    /**
     * 是否以指定字符串开头

     * 如果给定的字符串和开头字符串都为null则返回true，否则任意一个值为null返回false
     *
     * @param str        被监测字符串
     * @param prefix     开头字符串
     * @param ignoreCase 是否忽略大小写
     * @return 是否以指定字符串开头
     */
    public static boolean startWith(CharSequence str, CharSequence prefix, boolean ignoreCase) {
        return startWith(str, prefix, ignoreCase, false);
    }

    /**
     * 是否以指定字符串开头

     * 如果给定的字符串和开头字符串都为null则返回true，否则任意一个值为null返回false
     *
     * @param str          被监测字符串
     * @param prefix       开头字符串
     * @param ignoreCase   是否忽略大小写
     * @param ignoreEquals 是否忽略字符串相等的情况
     * @return 是否以指定字符串开头
     */
    public static boolean startWith(CharSequence str, CharSequence prefix, boolean ignoreCase, boolean ignoreEquals) {
        if (null == str || null == prefix) {
            if (false == ignoreEquals) {
                return false;
            }
            return null == str && null == prefix;
        }

        boolean isStartWith;
        if (ignoreCase) {
            isStartWith = str.toString().toLowerCase().startsWith(prefix.toString().toLowerCase());
        } else {
            isStartWith = str.toString().startsWith(prefix.toString());
        }

        if (isStartWith) {
            return (false == ignoreEquals) || (false == equals(str, prefix, ignoreCase));
        }
        return false;
    }

    /**
     * 比较两个字符串是否相等。
     *
     * @param str1       要比较的字符串1
     * @param str2       要比较的字符串2
     * @param ignoreCase 是否忽略大小写
     * @return 如果两个字符串相同，或者都是{@code null}，则返回{@code true}
     */
    public static boolean equals(CharSequence str1, CharSequence str2, boolean ignoreCase) {
        if (null == str1) {
            // 只有两个都为null才判断相等
            return str2 == null;
        }
        if (null == str2) {
            // 字符串2空，字符串1非空，直接false
            return false;
        }

        if (ignoreCase) {
            return str1.toString().equalsIgnoreCase(str2.toString());
        } else {
            return str1.toString().contentEquals(str2);
        }
    }

    /**
     * 切割指定位置之后部分的字符串
     *
     * @param string    字符串
     * @param fromIndex 切割开始的位置（包括）
     * @return 切割后后剩余的后半部分字符串
     */
    public static String subSuf(CharSequence string, int fromIndex) {
        if (isEmpty(string)) {
            return null;
        }
        return sub(string, fromIndex, string.length());
    }

    /**
     * 字符串是否为空，空的定义如下：
     * 
     *     {@code null}
     *     空字符串：{@code ""}
     * 
     *
     * 例：
     * 
     *     {@code StrUtil.isEmpty(null)     // true}
     *     {@code StrUtil.isEmpty("")       // true}
     *     {@code StrUtil.isEmpty(" \t\n")  // false}
     *     {@code StrUtil.isEmpty("abc")    // false}
     * 
     *
     * 注意：该方法与 {@link #isBlank(CharSequence)} 的区别是：该方法不校验空白字符。
     * 建议：
     *
     * @param str 被检测的字符串
     * @return 是否为空
     * @see #isBlank(CharSequence)
     */
    public static boolean isEmpty(CharSequence str) {
        return str == null || str.length() == 0;
    }

    /**
     * 改进JDK subString

     * index从0开始计算，最后一个字符为-1

     * 如果from和to位置一样，返回 "" 

     * 如果from或to为负数，则按照length从后向前数位置，如果绝对值大于字符串长度，则from归到0，to归到length

     * 如果经过修正的index中from大于to，则互换from和to example: 

     * abcdefgh 2 3 =》 c 

     * abcdefgh 2 -3 =》 cde 

     *
     * @param str              String
     * @param fromIndexInclude 开始的index（包括）
     * @param toIndexExclude   结束的index（不包括）
     * @return 字串
     */
    public static String sub(CharSequence str, int fromIndexInclude, int toIndexExclude) {
        if (isEmpty(str)) {
            return str(str);
        }
        int len = str.length();

        if (fromIndexInclude < 0) {
            fromIndexInclude = len + fromIndexInclude;
            if (fromIndexInclude < 0) {
                fromIndexInclude = 0;
            }
        } else if (fromIndexInclude > len) {
            fromIndexInclude = len;
        }

        if (toIndexExclude < 0) {
            toIndexExclude = len + toIndexExclude;
            if (toIndexExclude < 0) {
                toIndexExclude = len;
            }
        } else if (toIndexExclude > len) {
            toIndexExclude = len;
        }

        if (toIndexExclude < fromIndexInclude) {
            int tmp = fromIndexInclude;
            fromIndexInclude = toIndexExclude;
            toIndexExclude = tmp;
        }

        if (fromIndexInclude == toIndexExclude) {
            return EMPTY;
        }

        return str.toString().substring(fromIndexInclude, toIndexExclude);
    }

    /**
     * {@link CharSequence} 转为字符串，null安全
     *
     * @param cs {@link CharSequence}
     * @return 字符串
     */
    public static String str(CharSequence cs) {
        return null == cs ? null : cs.toString();
    }

}