All Downloads are FREE. Search and download functionalities are using the official Maven repository.

sviolet.thistle.util.conversion.StringUtils Maven / Gradle / Ivy

/*
 * Copyright (C) 2015-2017 S.Violet
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Project GitHub: https://github.com/shepherdviolet/thistle
 * Email: [email protected]
 */

package sviolet.thistle.util.conversion;

import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.math.BigDecimal;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 字符串工具
 * @author S.Violet
 */
public class StringUtils {

    private static final String DECODE_DEC_UNICODE_REGEXP = "&#\\d*;";

    /**
     * 将字符串指定位置变为大写(字母)
     * @param src 源字符串
     * @param positions 变为大写的位置[0, length)
     * @return 变换后的字符串
     */
    public static String toUpperCase(String src, int... positions){
        if (src == null) {
            return null;
        }
        char[] chars = src.toCharArray();
        for (int position : positions){
            if(position < chars.length && position > -1){
                chars[position] -= (chars[position] > 96 && chars[position] < 123) ? 32 : 0;
            }
        }
        return String.valueOf(chars);
    }

    /**
     * 将字符串指定位置变为小写(字母)
     * @param src 源字符串
     * @param positions 变为小写的位置[0, length)
     * @return 变换后的字符串
     */
    public static String toLowerCase(String src, int... positions){
        if (src == null) {
            return null;
        }
        char[] chars = src.toCharArray();
        for (int position : positions){
            if(position < chars.length && position > -1){
                chars[position] += (chars[position] > 64 && chars[position] < 91) ? 32 : 0;
            }
        }
        return String.valueOf(chars);
    }

    /**
     * 将字符串中的数字字母标点转为全角
     * @param src 原字符串
     * @return 全角字符串
     */
    public static String toSBCCase(String src) {
        if (src == null) {
            return null;
        }
        char[] charArray = src.toCharArray();
        for (int i = 0; i< charArray.length; i++) {
            if (charArray[i] == 12288) {
                charArray[i] = (char) 32;
            }else if (charArray[i] > 65280 && charArray[i] < 65375) {
                charArray[i] = (char) (charArray[i] - 65248);
            }
        }
        return new String(charArray);
    }

    /**
     * 把异常转为String信息
     */
    public static String throwableToString(Throwable throwable) {
        if (throwable == null){
            return null;
        }
        Writer writer = new StringWriter();
        PrintWriter printWriter = new PrintWriter(writer);
        throwable.printStackTrace(printWriter);
        printWriter.close();
        return writer.toString();
    }

    /**
     * 

将包含十进制Unicode编码的String, 转为普通编码的String

* *

例如:"马特•达蒙"转为"马特•达蒙"

*/ public static String decodeDecUnicode(String string){ if (string == null){ return null; } Matcher matcher = Pattern.compile(DECODE_DEC_UNICODE_REGEXP).matcher(string); StringBuffer stringBuffer = new StringBuffer(); while (matcher.find()) { String s = matcher.group(0); s = s.replaceAll("(&#)|;", ""); char c = (char) Integer.parseInt(s); matcher.appendReplacement(stringBuffer, Character.toString(c)); } matcher.appendTail(stringBuffer); return stringBuffer.toString(); } /** * 检查string中是否包含keywords * @param string string * @param keywords keywords * @return true:包含 */ public static boolean contains(String string, String keywords){ if (string == null){ return false; } return string.contains(keywords); } /** * Excel文件数值进度丢失特征: 小数第三位第四位第五位为000或999 */ private static Pattern resolveExcelPrecisionProblemPattern = Pattern.compile("^(-?\\d+\\.\\d{2})(000|999)(\\d)*$"); /** * [特殊]通常用于处理Excel文件数据, * 因为Excel的数值有可能存在进度丢失的问题, 例如1.67变成1.669999999...3, 本方法专门识别这种情况, 并纠正精度丢失. * @param string excel中读取的数值, 例如1.669999999...3 * @return 纠正后的数值, 例如1.67 */ public static String resolveExcelPrecisionProblem(String string){ if (string == null || !resolveExcelPrecisionProblemPattern.matcher(string).matches()){ return string; } return new BigDecimal(string).setScale(2, BigDecimal.ROUND_HALF_UP).toString(); } /** *

使用指定字符分割字符串, 忽略空白项, 去除头尾空白, 返回List

* *

* 例如:
* splitAndTrim(" abc, def, ,ghj,,klm ", ",")
* 结果为:
* 'abc' 'def' 'ghj', 'klm'
*

* * @param string 被切割的字符串 * @param splitRegex 切割的字符 * @return Not Null */ public static List splitAndTrim(String string, String splitRegex) { if (string == null) { return new ArrayList<>(0); } String[] array = string.split(splitRegex); List result = new ArrayList<>(array.length); for (String item : array) { if (item == null || item.length() <= 0) { continue; } String trimmed = item.trim(); if (trimmed.length() <= 0) { continue; } result.add(trimmed); } return result; } /** * 裁切字符串, 使得它的GBK编码字节长度小于等于指定值 (尾部裁切), * 不会把中文字节切成两半. * 支持: GB2312 GBK GB18030 * * @param string 字符串 * @param toLength 指定字节长度 * @return GBK编码字节长度不大于toLength的字符串 (尾部裁切) */ public static String truncateByGbkByteLength(String string, int toLength) { try { if (string == null) { return null; } if (toLength <= 0) { return ""; } // Assume 2 bytes per char if ((string.length() << 1) <= toLength) { return string; } // To GBK byte array byte[] bytes = string.getBytes("GBK"); if (bytes.length <= toLength) { return string; } /* * Check the last byte * * When the last byte is 0???????, there are the following situations: * 1.The last byte is a 'one byte char'. * 2.The last byte is the end of a 'two byte char'. */ int flag = bytes[toLength - 1] & 0b10000000; if (flag == 0b00000000) { return new String(bytes, 0, toLength, "GBK"); } /* * Traverse the byte array from the beginning according to GBK encoding rules: * 1.If 0??????? is encountered, it means this is a one byte char * 2.If 1??????? is encountered, it means this is a two byte char, skip next byte (It's the second byte of 'two byte char') */ int i = 0; for (; i < toLength ; i++) { flag = bytes[i] & 0b10000000; // Two byte char if the byte is 1??????? if (flag == 0b10000000) { // Skip the second byte of 'two byte char' i++; } } if (i == toLength) { // The last byte is 'one byte char' or the second byte of 'two byte char' return new String(bytes, 0, toLength, "GBK"); } else { // The last byte is the first byte of 'two byte char' return new String(bytes, 0, toLength - 1, "GBK"); } } catch (UnsupportedEncodingException e) { throw new IllegalStateException(e.getMessage(), e); } } /** * 裁切字符串, 使得它的UTF-8编码字节长度小于等于指定值 (尾部裁切) * 不会把中文字节切成两半. * * @param string 字符串 * @param toLength 指定字节长度 * @return UTF-8编码字节长度不大于toLength的字符串 (尾部裁切) */ public static String truncateByUtf8ByteLength(String string, int toLength) { if (string == null) { return null; } if (toLength <= 0) { return ""; } // Assume 4 bytes per char if ((string.length() << 2) <= toLength) { return string; } // To UTF-8 byte array byte[] bytes = string.getBytes(StandardCharsets.UTF_8); if (bytes.length <= toLength) { return string; } // The byte after last one int i = toLength; int flag = bytes[i] & 0b11000000; if (flag != 0b10000000) { // The byte after last one is [0xxxxxxx : One byte char] or [11xxxxxx : Head of multiple byte char] return new String(bytes, 0, toLength, StandardCharsets.UTF_8); } // The byte after last one is [10xxxxxx : Body of multiple byte char] --> looking for the head while (--i > 0) { if ((bytes[i] & 0b11000000) == 0b11000000) { // Meet [11xxxxxx : Head of multiple byte char] (0xxxxxxx is impossible here) return new String(bytes, 0, i, StandardCharsets.UTF_8); } } return ""; } /** *

在字符串左边添加指定字符或删除字符, 直至满足长度要求


* *

* 示例:
* ("12345678", 6, 6, '0') -> "345678"
* ("12345678", 10, 10, '0') -> "0012345678"
* ("12345678", 6, 10, '0') -> "12345678"
* ("12345678", 4, 6, '0') -> "345678"
* ("12345678", 10, 12, '0') -> "0012345678"
*

* * @param string 字符串 * @param minLength 最小长度 * @param maxLength 最大长度 * @param paddingChar 填充字符 * @return 满足长度要求的字符串 */ public static String leftPaddingToLength(String string, int minLength, int maxLength, char paddingChar) { if (string == null) { string = ""; } if (minLength < 0) { minLength = 0; } if (maxLength < minLength) { maxLength = minLength; } int length = string.length(); if (length > maxLength) { return string.substring(length - maxLength); } if (length >= minLength) { return string; } int paddingLength = minLength - length; if (paddingLength == 1) { return paddingChar + string; } StringBuilder padding = new StringBuilder(paddingLength); for (int i = 0 ; i < paddingLength ; i++) { padding.append(paddingChar); } return padding.toString() + string; } /** *

在字符串右边添加指定字符或删除字符, 直至满足长度要求


* *

* 示例:
* ("12345678", 6, 6, '0') -> "123456"
* ("12345678", 10, 10, '0') -> "1234567800"
* ("12345678", 6, 10, '0') -> "12345678"
* ("12345678", 4, 6, '0') -> "123456"
* ("12345678", 10, 12, '0') -> "1234567800"
*

* * @param string 字符串 * @param minLength 最小长度 * @param maxLength 最大长度 * @param paddingChar 填充字符 * @return 满足长度要求的字符串 */ public static String rightPaddingToLength(String string, int minLength, int maxLength, char paddingChar) { if (string == null) { string = ""; } if (minLength < 0) { minLength = 0; } if (maxLength < minLength) { maxLength = minLength; } int length = string.length(); if (length > maxLength) { return string.substring(0, maxLength); } if (length >= minLength) { return string; } int paddingLength = minLength - length; if (paddingLength == 1) { return string + paddingChar; } StringBuilder padding = new StringBuilder(paddingLength); for (int i = 0 ; i < paddingLength ; i++) { padding.append(paddingChar); } return string + padding.toString(); } /** *

从字符串左边开始, 将指定字符删掉, 直到出现其他字符或到达最小长度


* *

* 示例:
* ("12345678", 0, '0') -> "12345678"
* ("0012345678", 0, '0') -> "12345678"
* ("0000", 0, '0') -> ""
* ("0000", 1, '0') -> "0"
* ("0000", 2, '0') -> "00"
* ("0000", 5, '0') -> "0000"
*

* * @param string 字符串 * @param minLength 最小长度 * @param trimChar 需要删除的字符 */ public static String leftTrimToLength(String string, int minLength, char trimChar) { if (string == null) { string = ""; } if (minLength < 0) { minLength = 0; } int start = 0; for ( ; start < string.length() - minLength ; start++) { if (string.charAt(start) != trimChar) { break; } } if (start <= 0) { return string; } return string.substring(start); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy