All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.huaban.analysis.jieba.CharacterUtil Maven / Gradle / Ivy

package com.huaban.analysis.jieba;

import java.util.regex.Pattern;

public class CharacterUtil {
    public static Pattern reSkip = Pattern.compile("(\\d+\\.\\d+|[a-zA-Z0-9]+)");
    private static final char[] connectors = new char[] {'+', '#', '&', '.', '_'};
    
    public static boolean isChineseLetter(char ch) {
        if (ch >= 0x4E00 && ch <= 0x9FA5) return true;
        return false;
    }    
    
    public static boolean isEnglishLetter(char ch) {
        if ((ch >= 0x0041 && ch <= 0x005A) || (ch >= 0x0061 && ch <= 0x007A))
            return true;
        return false;
    }
    
    public static boolean isDigit(char ch) {
        if (ch >= 0x0030 && ch <= 0x0039) return true;
        return false;
    }
    
    public static boolean isConnector(char ch) {
        for (char connector : connectors)
            if (ch == connector) return true;
        return false;
    }
    
    public static boolean ccFind(char ch) {
        if(isChineseLetter(ch)) return true;
        if(isEnglishLetter(ch)) return true;
        if(isDigit(ch)) return true;
        if(isConnector(ch)) return true;
        return false;
    }

    /**
     * 全角->半角,大写->小写
     * @param input
     * @return
     */
	public static char regularize(char input){
        if (input == 12288) {
            return 32;
        }else if (input > 65280 && input < 65375) {
            return (char) (input - 65248);
        }else if (input >= 'A' && input <= 'Z') {
        	return (input += 32);
		}
        return input;
	}
	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy