All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.dennisit.vplus.data.utils.StringUtils Maven / Gradle / Ivy

There is a newer version: 2.0.8
Show newest version
/*--------------------------------------------------------------------------
 *  Copyright (c) 2010-2020, Elon.su All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 * Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 * Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 * Neither the name of the elon developer nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 * Author: Elon.su, you can also mail [email protected]
 *--------------------------------------------------------------------------
 */
package com.github.dennisit.vplus.data.utils;

import com.alibaba.fastjson.JSONObject;
import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.lang.Nullable;
import org.springframework.util.Assert;
import org.springframework.web.util.HtmlUtils;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;
import java.util.regex.Pattern;
import java.util.stream.Stream;

/**
 * Created by Elon.su on 17/5/13.
 */
public class StringUtils extends org.springframework.util.StringUtils {

    private static final Logger LOG = LoggerFactory.getLogger(StringUtils.class);

    public static final String SPACE = org.apache.commons.lang3.StringUtils.SPACE;
    public static final String EMPTY = org.apache.commons.lang3.StringUtils.EMPTY;
    public static final String COMMA = ",";
    public static final String COMMA_FULL = ",|,";

    private static final String INT_STR = "0123456789";
    private static final String STR_STR = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
    private static final String ALL_STR = INT_STR + STR_STR;


    public static final String FILE_PDF = "pdf";
    public static final String FILE_DOC = "doc";
    public static final String FILE_DOCX = "docx";

    /**
     * All possible chars for representing a number as a String
     */
    final static byte[] DIGITS = {
            '0', '1', '2', '3', '4', '5',
            '6', '7', '8', '9', 'a', 'b',
            'c', 'd', 'e', 'f', 'g', 'h',
            'i', 'j', 'k', 'l', 'm', 'n',
            'o', 'p', 'q', 'r', 's', 't',
            'u', 'v', 'w', 'x', 'y', 'z',
            'A', 'B', 'C', 'D', 'E', 'F',
            'G', 'H', 'I', 'J', 'K', 'L',
            'M', 'N', 'O', 'P', 'Q', 'R',
            'S', 'T', 'U', 'V', 'W', 'X',
            'Y', 'Z'
    };


    private final static Pattern SPECIAL_CHARS_REGEX = Pattern.compile("[`'\"|/,;()-+*%#·•� \\s]");


    public static boolean isBlank(final CharSequence cs) {
        return org.apache.commons.lang3.StringUtils.isBlank(cs);
    }

    public static boolean isNotBlank(final CharSequence cs) {
        return !isBlank(cs);
    }

    public static String trim(String str) {
        return org.apache.commons.lang3.StringUtils.trim(str);
    }


    /**
     * 生成uuid,采用 jdk 9 的形式,优化性能
     *
     * @return UUID
     */
    public static String getUUID() {
        ThreadLocalRandom random = ThreadLocalRandom.current();
        long lsb = random.nextLong();
        long msb = random.nextLong();
        byte[] buf = new byte[32];
        formatUnsignedLong(lsb, buf, 20, 12);
        formatUnsignedLong(lsb >>> 48, buf, 16, 4);
        formatUnsignedLong(msb, buf, 12, 4);
        formatUnsignedLong(msb >>> 16, buf, 8, 4);
        formatUnsignedLong(msb >>> 32, buf, 0, 8);
        return new String(buf, Charsets.UTF_8);
    }


    private static void formatUnsignedLong(long val, byte[] buf, int offset, int len) {
        int charPos = offset + len;
        int radix = 1 << 4;
        int mask = radix - 1;
        do {
            buf[--charPos] = DIGITS[((int) val) & mask];
            val >>>= 4;
        } while (charPos > offset);
    }


    /**
     * 随机数生成
     *
     * @param count 字符长度
     * @return 随机数
     */
    public static String random(int count) {
        return random(count, RandomType.ALL);
    }

    /**
     * 随机数生成
     *
     * @param count      字符长度
     * @param randomType 随机数类别
     * @return 随机数
     */
    public static String random(int count, RandomType randomType) {
        if (count == 0) {
            return "";
        }
        Assert.isTrue(count > 0, "Requested random string length " + count + " is less than 0.");
        final ThreadLocalRandom random = ThreadLocalRandom.current();
        char[] buffer = new char[count];
        for (int i = 0; i < count; i++) {
            if (RandomType.INT == randomType) {
                buffer[i] = INT_STR.charAt(random.nextInt(INT_STR.length()));
            } else if (RandomType.STRING == randomType) {
                buffer[i] = STR_STR.charAt(random.nextInt(STR_STR.length()));
            } else {
                buffer[i] = ALL_STR.charAt(random.nextInt(ALL_STR.length()));
            }
        }
        return new String(buffer);
    }

    /**
     * 转义HTML用于安全过滤
     *
     * @param html html
     * @return {String}
     */
    public static String escapeHtml(String html) {
        return HtmlUtils.htmlEscape(html);
    }


    /**
     * 判断一个字符串是否是数字
     *
     * @param cs the CharSequence to check, may be null
     * @return {boolean}
     */
    public static boolean isNumeric(final CharSequence cs) {
        if (StringUtils.isBlank(cs)) {
            return false;
        }
        for (int i = cs.length(); --i >= 0; ) {
            int chr = cs.charAt(i);
            if (chr < 48 || chr > 57) {
                return false;
            }
        }
        return true;
    }

    /**
     * 分割 字符串 删除常见 空白符
     *
     * @param str       字符串
     * @param delimiter 分割符
     * @return 字符串数组
     */
    public static String[] splitTrim(@Nullable String str, @Nullable String delimiter) {
        return StringUtils.delimitedListToStringArray(str, delimiter, " \t\n\n\f");
    }


    public static String firstCharToLowerCase(String str) {
        char firstChar = str.charAt(0);
        if (firstChar >= 'A' && firstChar <= 'Z') {
            char[] arr = str.toCharArray();
            arr[0] += ('a' - 'A');
            return new String(arr);
        }
        return str;
    }

    public static String toLowerCase(String str) {
        if (isBlank(str)) {
            return str;
        }
        return str.toLowerCase();
    }

    public static String toUppserCase(String str) {
        if (isBlank(str)) {
            return str;
        }
        return str.toUpperCase();
    }

    public static String firstCharToUpperCase(String str) {
        char firstChar = str.charAt(0);
        if (firstChar >= 'a' && firstChar <= 'z') {
            char[] arr = str.toCharArray();
            arr[0] -= ('a' - 'A');
            return new String(arr);
        }
        return str;
    }

    /**
     * 有 任意 一个 Blank
     *
     * @param css CharSequence
     * @return boolean
     */
    public static boolean isAnyBlank(final CharSequence... css) {
        if (ObjectUtils.isEmpty(css)) {
            return true;
        }
        return Stream.of(css).anyMatch(StringUtils::isBlank);
    }

    /**
     * 是否全非 Blank
     *
     * @param css CharSequence
     * @return boolean
     */
    public static boolean isNoneBlank(final CharSequence... css) {
        if (ObjectUtils.isEmpty(css)) {
            return false;
        }
        return Stream.of(css).allMatch(StringUtils::isNotBlank);
    }

    public static String join(Collection coll) {
        return collectionToDelimitedString(coll, COMMA);
    }

    public static String join(Collection coll, String delim) {
        return collectionToDelimitedString(coll, delim);
    }

    public static String join(Object[] arr) {
        return join(arr, COMMA);
    }

    public static String join(Object[] arr, String delim) {
        return arrayToDelimitedString(arr, delim);
    }

    public static List asList(String text) {
        return asList(text, COMMA_FULL);
    }

    public static List asList(String text, String delim) {
        return isNotBlank(text) ? Arrays.asList(org.apache.commons.lang3.StringUtils.split(text, delim)) : Lists.newArrayList();
    }

    public static String htmlEscape(String text) {
        return HtmlUtils.htmlEscape(text);
    }

    public static String htmlText(File f) {
        try {
            Document doc = Jsoup.parse(f, StandardCharsets.UTF_8.name());
            return doc.text();//ownText();//.outerHtml();
        } catch (Exception e) {
            LOG.error(e.getLocalizedMessage(), e);
        }
        return EMPTY;
    }

    public static String wordText(File f) {
        if (toLowerCase(f.getName()).endsWith(FILE_DOC)) {
            return docText(f);
        }
        if (toLowerCase(f.getName()).endsWith(FILE_DOCX)) {
            return docxText(f);
        }
        return EMPTY;
    }

    public static String docText(File f) {
        try {
            if (toLowerCase(f.getName()).endsWith(FILE_DOC)) {
                FileInputStream fis = new FileInputStream(f);
                WordExtractor ex = new WordExtractor(fis);
                String text = ex.getText();
                text = text.replaceAll("(\\r\\n){2,}", "\r\n").replaceAll("(\\n){2,}", "\n");
                fis.close();
                return trim(text);
            }
        } catch (Exception e) {
            LOG.error(e.getLocalizedMessage(), e);
        }
        return EMPTY;
    }

    public static String docxText(File f) {
        try {
            if (toLowerCase(f.getName()).endsWith(FILE_DOCX)) {
                FileInputStream fis = new FileInputStream(f);
                XWPFDocument xwpf = new XWPFDocument(fis);
                POIXMLTextExtractor ex = new XWPFWordExtractor(xwpf);
                String text = ex.getText();
                text = text.replaceAll("(\\r\\n){2,}", "\r\n").replaceAll("(\\n){2,}", "\n");
                fis.close();
                return trim(text);
            }
        } catch (Exception e) {
            LOG.error(e.getLocalizedMessage(), e);
        }
        return EMPTY;
    }

    public static String pdfText(File f) {
        try {
            if (toLowerCase(f.getName()).endsWith(FILE_PDF)) {
                PDFParser p = new PDFParser(new RandomAccessFile(f, "r"));
                p.parse();
                PDDocument pdd = p.getPDDocument();
                PDFTextStripper ts = new PDFTextStripper();
                String c = ts.getText(pdd);
                pdd.close();
                return trim(c);
            }
        } catch (Exception e) {
            LOG.error(e.getLocalizedMessage(), e);
        }
        return EMPTY;
    }

    public static boolean isExtension(String fileName, String extension) {
        if (isBlank(fileName) || isBlank(extension)) {
            return false;
        }
        return toLowerCase(fileName).endsWith(toLowerCase(extension));
    }


    public static boolean isJson(String content) {
        if (isBlank(content)) {
            return false;
        }
        try {
            JSONObject.parseObject(content);
            return true;
        } catch (Exception e) {
            return false;
        }
    }

    /**
     * 重复某个字符
     *
     * @param c     被重复的字符
     * @param count 重复的数目
     * @return 重复字符字符串
     */
    public static String repeat(char c, int count) {
        char[] result = new char[count];
        for (int i = 0; i < count; i++) {
            result[i] = c;
        }
        return new String(result);
    }

    /**
     * 从流中读取内容
     *
     * @param in      输入流
     * @param charset 字符集
     * @return 内容
     * @throws IOException IO 异常
     */
    public static String getString(InputStream in, String charset) throws IOException {
        StringBuilder content = new StringBuilder(); // 存储返回的内容

        // 从返回的内容中读取所需内容
        BufferedReader reader = new BufferedReader(new InputStreamReader(in, charset));
        String line = null;
        while ((line = reader.readLine()) != null) {
            content.append(line);
        }

        return content.toString();
    }


    /**
     * 获取堆栈信息
     *
     * @param throwable 异常
     * @return 堆栈信息
     */
    public static String getStackTrace(Throwable throwable) {
        StringWriter sw = new StringWriter();
        PrintWriter pw = new PrintWriter(sw);
        try {
            throwable.printStackTrace(pw);
            return "\n" + sw.toString();
        } finally {
            pw.close();
        }
    }

    /**
     * 格式化文本
     *
     * @param template 文本模板,被替换的部分用 {} 表示
     * @param values   参数值
     * @return 格式化后的文本
     */
    public static String format(String template, Object... values) {
        if (values.length == 0 || isBlank(template)) {
            return template;
        }
        final StringBuilder sb = new StringBuilder();
        final int length = template.length();

        int valueIndex = 0;
        char currentChar;
        for (int i = 0; i < length; i++) {
            if (valueIndex >= values.length) {
                sb.append(sub(template, i, length));
                break;
            }

            currentChar = template.charAt(i);
            if (currentChar == '{') {
                final char nextChar = template.charAt(++i);
                if (nextChar == '}') {
                    sb.append(values[valueIndex++]);
                } else {
                    sb.append('{').append(nextChar);
                }
            } else {
                sb.append(currentChar);
            }

        }

        return sb.toString();
    }


    /**
     * 清理字符串,清理出某些不可见字符和一些sql特殊字符
     *
     * @param txt 文本
     * @return {String}
     */
    @Nullable
    public static String cleanText(@Nullable String txt) {
        if (txt == null) {
            return null;
        }
        return SPECIAL_CHARS_REGEX.matcher(txt).replaceAll(StringPool.EMPTY);
    }

    /**
     * 获取标识符,用于参数清理
     *
     * @param param 参数
     * @return 清理后的标识符
     */
    @Nullable
    public static String cleanIdentifier(@Nullable String param) {
        if (param == null) {
            return null;
        }
        StringBuilder paramBuilder = new StringBuilder();
        for (int i = 0; i < param.length(); i++) {
            char c = param.charAt(i);
            if (Character.isJavaIdentifierPart(c)) {
                paramBuilder.append(c);
            }
        }
        return paramBuilder.toString();
    }

    /**
     * 改进JDK subString
* index从0开始计算,最后一个字符为-1
* * @param string String * @param fromIndex 开始的index(包括) * @param toIndex 结束的index(不包括) * @return 字串 */ public static String sub(String string, int fromIndex, int toIndex) { int len = string.length(); if (fromIndex < 0) { fromIndex = len + fromIndex; if (toIndex == 0) { toIndex = len; } } if (toIndex < 0) { toIndex = len + toIndex; } if (toIndex < fromIndex) { int tmp = fromIndex; fromIndex = toIndex; toIndex = tmp; } if (fromIndex == toIndex) { return EMPTY; } char[] strArray = string.toCharArray(); char[] newStrArray = Arrays.copyOfRange(strArray, fromIndex, toIndex); return new String(newStrArray); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy