com.huaban.analysis.jieba.CharacterUtil Maven / Gradle / Ivy
package com.huaban.analysis.jieba;
import java.util.regex.Pattern;
public class CharacterUtil {
public static Pattern reSkip = Pattern.compile("(\\d+\\.\\d+|[a-zA-Z0-9]+)");
private static final char[] connectors = new char[] {'+', '#', '&', '.', '_'};
public static boolean isChineseLetter(char ch) {
if (ch >= 0x4E00 && ch <= 0x9FA5) return true;
return false;
}
public static boolean isEnglishLetter(char ch) {
if ((ch >= 0x0041 && ch <= 0x005A) || (ch >= 0x0061 && ch <= 0x007A))
return true;
return false;
}
public static boolean isDigit(char ch) {
if (ch >= 0x0030 && ch <= 0x0039) return true;
return false;
}
public static boolean isConnector(char ch) {
for (char connector : connectors)
if (ch == connector) return true;
return false;
}
public static boolean ccFind(char ch) {
if(isChineseLetter(ch)) return true;
if(isEnglishLetter(ch)) return true;
if(isDigit(ch)) return true;
if(isConnector(ch)) return true;
return false;
}
/**
* 全角->半角,大写->小写
* @param input
* @return
*/
public static char regularize(char input){
if (input == 12288) {
return 32;
}else if (input > 65280 && input < 65375) {
return (char) (input - 65248);
}else if (input >= 'A' && input <= 'Z') {
return (input += 32);
}
return input;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy