All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.dennisit.vplus.data.utils.StringUtils Maven / Gradle / Ivy

/*--------------------------------------------------------------------------
 *  Copyright (c) 2010-2020, Elon.su All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 * Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 * Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 * Neither the name of the elon developer nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 * Author: Elon.su, you can also mail [email protected]
 *--------------------------------------------------------------------------
*/
package com.github.dennisit.vplus.data.utils;

import com.google.common.collect.Lists;
import org.apache.commons.io.FilenameUtils;
import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.client.RestTemplate;
import org.springframework.web.util.HtmlUtils;

import java.io.File;
import java.io.FileInputStream;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;

/**
 * Created by Elon.su on 17/5/13.
 */
public class StringUtils extends org.springframework.util.StringUtils {

    private static final Logger LOG = LoggerFactory.getLogger(StringUtils.class);

    public static final String SPACE = org.apache.commons.lang3.StringUtils.SPACE;
    public static final String EMPTY = org.apache.commons.lang3.StringUtils.EMPTY;
    public static final String COMMA = ",";

    public static final String COMMA_FULL = ",|,";

    public static final String FILE_PDF = "pdf";
    public static final String FILE_DOC = "doc";
    public static final String FILE_DOCX = "docx";

    public static boolean isBlank(final CharSequence cs) {
        return org.apache.commons.lang3.StringUtils.isBlank(cs);
    }

    public static boolean isNotBlank(final CharSequence cs) {
        return !isBlank(cs);
    }

    public static String trim(String str) {
        return org.apache.commons.lang3.StringUtils.trim(str);
    }

    public static String firstCharToLowerCase(String str) {
        char firstChar = str.charAt(0);
        if (firstChar >= 'A' && firstChar <= 'Z') {
            char[] arr = str.toCharArray();
            arr[0] += ('a' - 'A');
            return new String(arr);
        }
        return str;
    }

    public static String toLowerCase(String str){
        if(isBlank(str)){
            return str;
        }
        return str.toLowerCase();
    }

    public static String toUppserCase(String str){
        if(isBlank(str)){
            return str;
        }
        return str.toUpperCase();
    }

    public static String firstCharToUpperCase(String str) {
        char firstChar = str.charAt(0);
        if (firstChar >= 'a' && firstChar <= 'z') {
            char[] arr = str.toCharArray();
            arr[0] -= ('a' - 'A');
            return new String(arr);
        }
        return str;
    }

    public static String join(Collection coll) {
        return collectionToDelimitedString(coll, COMMA);
    }

    public static String join(Collection coll, String delim) {
        return collectionToDelimitedString(coll, delim);
    }

    public static String join(Object[] arr) {
        return join(arr, COMMA);
    }

    public static String join(Object[] arr, String delim) {
        return arrayToDelimitedString(arr, delim);
    }

    public static List asList(String text){
        return asList(text, COMMA_FULL);
    }

    public static List asList(String text, String delim){
        return isNotBlank(text) ? Arrays.asList(org.apache.commons.lang3.StringUtils.split(text, delim)) : Lists.newArrayList();
    }

    public static String htmlEscape(String text){
        return HtmlUtils.htmlEscape(text);
    }

    public static String htmlText(File f){
        try {
            Document doc = Jsoup.parse(f, StandardCharsets.UTF_8.name());
            return doc.text();//ownText();//.outerHtml();
        } catch (Exception e) {
            LOG.error(e.getLocalizedMessage(), e);
        }
        return EMPTY;
    }

    public static String wordText(File f){
        if(toLowerCase(f.getName()).endsWith(FILE_DOC)){
            return docText(f);
        }
        if(toLowerCase(f.getName()).endsWith(FILE_DOCX)){
            return docxText(f);
        }
        return EMPTY;
    }

    public static String docText(File f){
        try {
            if(toLowerCase(f.getName()).endsWith(FILE_DOC)) {
                FileInputStream fis = new FileInputStream(f);
                WordExtractor ex = new WordExtractor(fis);
                String text = ex.getText();
                text = text.replaceAll("(\\r\\n){2,}", "\r\n").replaceAll("(\\n){2,}", "\n");
                fis.close();
                return trim(text);
            }
        } catch (Exception e){
            LOG.error(e.getLocalizedMessage(), e);
        }
        return EMPTY;
    }

    public static String docxText(File f){
        try {
            if(toLowerCase(f.getName()).endsWith(FILE_DOCX)) {
                FileInputStream fis = new FileInputStream(f);
                XWPFDocument xwpf = new XWPFDocument(fis);
                POIXMLTextExtractor ex = new XWPFWordExtractor(xwpf);
                String text = ex.getText();
                text = text.replaceAll("(\\r\\n){2,}", "\r\n").replaceAll("(\\n){2,}", "\n");
                fis.close();
                return trim(text);
            }
        } catch (Exception e){
            LOG.error(e.getLocalizedMessage(), e);
        }
        return EMPTY;
    }

    public static String pdfText(File f) {
        try {
            if(toLowerCase(f.getName()).endsWith(FILE_PDF)){
                PDFParser p = new PDFParser(new RandomAccessFile(f,"r"));
                p.parse();
                PDDocument pdd = p.getPDDocument();
                PDFTextStripper ts = new PDFTextStripper();
                String c = ts.getText(pdd);
                pdd.close();
                return trim(c);
            }
        }catch (Exception e){
            LOG.error(e.getLocalizedMessage(), e);
        }
        return EMPTY;
    }

    public static boolean isExtension(String fileName, String extension){
        if(isBlank(fileName) || isBlank(extension)){
            return false;
        }
        return toLowerCase(fileName).endsWith(toLowerCase(extension));
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy