sviolet.thistle.util.conversion.StringUtils Maven / Gradle / Ivy

Go to download
/*
 * Copyright (C) 2015-2017 S.Violet
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Project GitHub: https://github.com/shepherdviolet/thistle
 * Email: [email protected]
 */

package sviolet.thistle.util.conversion;

import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.math.BigDecimal;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 字符串工具
 * @author S.Violet
 */
public class StringUtils {

    private static final String DECODE_DEC_UNICODE_REGEXP = "&#\\d*;";

    /**
     * 将字符串指定位置变为大写(字母)
     * @param src 源字符串
     * @param positions 变为大写的位置[0, length)
     * @return 变换后的字符串
     */
    public static String toUpperCase(String src, int... positions){
        if (src == null) {
            return null;
        }
        char[] chars = src.toCharArray();
        for (int position : positions){
            if(position < chars.length && position > -1){
                chars[position] -= (chars[position] > 96 && chars[position] < 123) ? 32 : 0;
            }
        }
        return String.valueOf(chars);
    }

    /**
     * 将字符串指定位置变为小写(字母)
     * @param src 源字符串
     * @param positions 变为小写的位置[0, length)
     * @return 变换后的字符串
     */
    public static String toLowerCase(String src, int... positions){
        if (src == null) {
            return null;
        }
        char[] chars = src.toCharArray();
        for (int position : positions){
            if(position < chars.length && position > -1){
                chars[position] += (chars[position] > 64 && chars[position] < 91) ? 32 : 0;
            }
        }
        return String.valueOf(chars);
    }

    /**
     * 将字符串中的数字字母标点转为全角
     * @param src 原字符串
     * @return 全角字符串
     */
    public static String toSBCCase(String src) {
        if (src == null) {
            return null;
        }
        char[] charArray = src.toCharArray();
        for (int i = 0; i< charArray.length; i++) {
            if (charArray[i] == 12288) {
                charArray[i] = (char) 32;
            }else if (charArray[i] > 65280 && charArray[i] < 65375) {
                charArray[i] = (char) (charArray[i] - 65248);
            }
        }
        return new String(charArray);
    }

    /**
     * 把异常转为String信息
     */
    public static String throwableToString(Throwable throwable) {
        if (throwable == null){
            return null;
        }
        Writer writer = new StringWriter();
        PrintWriter printWriter = new PrintWriter(writer);
        throwable.printStackTrace(printWriter);
        printWriter.close();
        return writer.toString();
    }

    /**
     * 将包含十进制Unicode编码的String, 转为普通编码的String
     *
     * 例如:"马特•达蒙"转为"马特•达蒙"
     */
    public static String decodeDecUnicode(String string){
        if (string == null){
            return null;
        }
        Matcher matcher = Pattern.compile(DECODE_DEC_UNICODE_REGEXP).matcher(string);
        StringBuffer stringBuffer = new StringBuffer();
        while (matcher.find()) {
            String s = matcher.group(0);
            s = s.replaceAll("(&#)|;", "");
            char c = (char) Integer.parseInt(s);
            matcher.appendReplacement(stringBuffer, Character.toString(c));
        }
        matcher.appendTail(stringBuffer);
        return stringBuffer.toString();
    }

    /**
     * 检查string中是否包含keywords
     * @param string string
     * @param keywords keywords
     * @return true:包含
     */
    public static boolean contains(String string, String keywords){
        if (string == null){
            return false;
        }
        return string.contains(keywords);
    }

    /**
     * Excel文件数值进度丢失特征: 小数第三位第四位第五位为000或999
     */
    private static Pattern resolveExcelPrecisionProblemPattern = Pattern.compile("^(-?\\d+\\.\\d{2})(000|999)(\\d)*$");

    /**
     * [特殊]通常用于处理Excel文件数据,
     * 因为Excel的数值有可能存在进度丢失的问题, 例如1.67变成1.669999999...3, 本方法专门识别这种情况, 并纠正精度丢失.
     * @param string excel中读取的数值, 例如1.669999999...3
     * @return 纠正后的数值, 例如1.67
     */
    public static String resolveExcelPrecisionProblem(String string){
        if (string == null || !resolveExcelPrecisionProblemPattern.matcher(string).matches()){
            return string;
        }
        return new BigDecimal(string).setScale(2, BigDecimal.ROUND_HALF_UP).toString();
    }

    /**
     * 使用指定字符分割字符串, 忽略空白项, 去除头尾空白, 返回List
     *
     * 
     * 例如:

     * splitAndTrim(" abc, def, ,ghj,,klm ", ",")

     * 结果为:

     * 'abc' 'def' 'ghj', 'klm'

     * 
     *
     * @param string 被切割的字符串
     * @param splitRegex 切割的字符
     * @return Not Null
     */
    public static List splitAndTrim(String string, String splitRegex) {
        if (string == null) {
            return new ArrayList<>(0);
        }
        String[] array = string.split(splitRegex);
        List result = new ArrayList<>(array.length);
        for (String item : array) {
            if (item == null || item.length() <= 0) {
                continue;
            }
            String trimmed = item.trim();
            if (trimmed.length() <= 0) {
                continue;
            }
            result.add(trimmed);
        }
        return result;
    }

    /**
     * 裁切字符串, 使得它的GBK编码字节长度小于等于指定值 (尾部裁切),
     * 不会把中文字节切成两半.
     * 支持: GB2312 GBK GB18030
     *
     * @param string 字符串
     * @param toLength 指定字节长度
     * @return GBK编码字节长度不大于toLength的字符串 (尾部裁切)
     */
    public static String truncateByGbkByteLength(String string, int toLength) {
        try {
            if (string == null) {
                return null;
            }
            if (toLength <= 0) {
                return "";
            }
            // Assume 2 bytes per char
            if ((string.length() << 1) <= toLength) {
                return string;
            }
            // To GBK byte array
            byte[] bytes = string.getBytes("GBK");
            if (bytes.length <= toLength) {
                return string;
            }

            /*
             * Check the last byte
             *
             * When the last byte is 0???????, there are the following situations:
             * 1.The last byte is a 'one byte char'.
             * 2.The last byte is the end of a 'two byte char'.
             */
            int flag = bytes[toLength - 1] & 0b10000000;
            if (flag == 0b00000000) {
                return new String(bytes, 0, toLength, "GBK");
            }

            /*
             * Traverse the byte array from the beginning according to GBK encoding rules:
             * 1.If 0??????? is encountered, it means this is a one byte char
             * 2.If 1??????? is encountered, it means this is a two byte char, skip next byte (It's the second byte of 'two byte char')
             */
            int i = 0;
            for (; i < toLength ; i++) {
                flag = bytes[i] & 0b10000000;
                // Two byte char if the byte is 1???????
                if (flag == 0b10000000) {
                    // Skip the second byte of 'two byte char'
                    i++;
                }
            }

            if (i == toLength) {
                // The last byte is 'one byte char' or the second byte of 'two byte char'
                return new String(bytes, 0, toLength, "GBK");
            } else {
                // The last byte is the first byte of 'two byte char'
                return new String(bytes, 0, toLength - 1, "GBK");
            }

        } catch (UnsupportedEncodingException e) {
            throw new IllegalStateException(e.getMessage(), e);
        }
    }

    /**
     * 裁切字符串, 使得它的UTF-8编码字节长度小于等于指定值 (尾部裁切)
     * 不会把中文字节切成两半.
     *
     * @param string 字符串
     * @param toLength 指定字节长度
     * @return UTF-8编码字节长度不大于toLength的字符串 (尾部裁切)
     */
    public static String truncateByUtf8ByteLength(String string, int toLength) {
        if (string == null) {
            return null;
        }
        if (toLength <= 0) {
            return "";
        }
        // Assume 4 bytes per char
        if ((string.length() << 2) <= toLength) {
            return string;
        }
        // To UTF-8 byte array
        byte[] bytes = string.getBytes(StandardCharsets.UTF_8);
        if (bytes.length <= toLength) {
            return string;
        }
        // The byte after last one
        int i = toLength;
        int flag = bytes[i] & 0b11000000;
        if (flag != 0b10000000) {
            // The byte after last one is [0xxxxxxx : One byte char] or [11xxxxxx : Head of multiple byte char]
            return new String(bytes, 0, toLength, StandardCharsets.UTF_8);
        }
        // The byte after last one is [10xxxxxx : Body of multiple byte char] --> looking for the head
        while (--i > 0) {
            if ((bytes[i] & 0b11000000) == 0b11000000) {
                // Meet [11xxxxxx : Head of multiple byte char] (0xxxxxxx is impossible here)
                return new String(bytes, 0, i, StandardCharsets.UTF_8);
            }
        }
        return "";
    }

    /**
     * 在字符串左边添加指定字符或删除字符, 直至满足长度要求


     *
     * 
     * 示例: 

     * ("12345678", 6, 6, '0') -> "345678" 

     * ("12345678", 10, 10, '0') -> "0012345678" 

     * ("12345678", 6, 10, '0') -> "12345678" 

     * ("12345678", 4, 6, '0') -> "345678" 

     * ("12345678", 10, 12, '0') -> "0012345678" 

     * 
     *
     * @param string 字符串
     * @param minLength 最小长度
     * @param maxLength 最大长度
     * @param paddingChar 填充字符
     * @return 满足长度要求的字符串
     */
    public static String leftPaddingToLength(String string, int minLength, int maxLength, char paddingChar) {
        if (string == null) {
            string = "";
        }
        if (minLength < 0) {
            minLength = 0;
        }
        if (maxLength < minLength) {
            maxLength = minLength;
        }
        int length = string.length();
        if (length > maxLength) {
            return string.substring(length - maxLength);
        }
        if (length >= minLength) {
            return string;
        }
        int paddingLength = minLength - length;
        if (paddingLength == 1) {
            return paddingChar + string;
        }
        StringBuilder padding = new StringBuilder(paddingLength);
        for (int i = 0 ; i < paddingLength ; i++) {
            padding.append(paddingChar);
        }
        return padding.toString() + string;
    }

    /**
     * 在字符串右边添加指定字符或删除字符, 直至满足长度要求


     *
     * 
     * 示例: 

     * ("12345678", 6, 6, '0') -> "123456" 

     * ("12345678", 10, 10, '0') -> "1234567800" 

     * ("12345678", 6, 10, '0') -> "12345678" 

     * ("12345678", 4, 6, '0') -> "123456" 

     * ("12345678", 10, 12, '0') -> "1234567800" 

     * 
     *
     * @param string 字符串
     * @param minLength 最小长度
     * @param maxLength 最大长度
     * @param paddingChar 填充字符
     * @return 满足长度要求的字符串
     */
    public static String rightPaddingToLength(String string, int minLength, int maxLength, char paddingChar) {
        if (string == null) {
            string = "";
        }
        if (minLength < 0) {
            minLength = 0;
        }
        if (maxLength < minLength) {
            maxLength = minLength;
        }
        int length = string.length();
        if (length > maxLength) {
            return string.substring(0, maxLength);
        }
        if (length >= minLength) {
            return string;
        }
        int paddingLength = minLength - length;
        if (paddingLength == 1) {
            return string + paddingChar;
        }
        StringBuilder padding = new StringBuilder(paddingLength);
        for (int i = 0 ; i < paddingLength ; i++) {
            padding.append(paddingChar);
        }
        return string + padding.toString();
    }

    /**
     * 从字符串左边开始, 将指定字符删掉, 直到出现其他字符或到达最小长度


     *
     * 
     * 示例: 

     * ("12345678", 0, '0') -> "12345678" 

     * ("0012345678", 0, '0') -> "12345678" 

     * ("0000", 0, '0') -> "" 

     * ("0000", 1, '0') -> "0" 

     * ("0000", 2, '0') -> "00" 

     * ("0000", 5, '0') -> "0000" 

     * 
     *
     * @param string 字符串
     * @param minLength 最小长度
     * @param trimChar 需要删除的字符
     */
    public static String leftTrimToLength(String string, int minLength, char trimChar) {
        if (string == null) {
            string = "";
        }
        if (minLength < 0) {
            minLength = 0;
        }
        int start = 0;
        for ( ; start < string.length() - minLength ; start++) {
            if (string.charAt(start) != trimChar) {
                break;
            }
        }
        if (start <= 0) {
            return string;
        }
        return string.substring(start);
    }

}