com.seejoke.pinyin.SpellConverterImpl Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of core-tools Show documentation
Show all versions of core-tools Show documentation
提供java常用的、流行的工具方法,减少项目冗余代码
package com.seejoke.pinyin;
import com.google.common.collect.ArrayListMultimap;
import com.seejoke.core.utils.StringUtils;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
/**
* @auther: [email protected]
* @date: 2018/5/3 17:45
* @description:
*/
public class SpellConverterImpl implements ISpellConverter {
private final ArrayListMultimap duoYinZiMap;
public SpellConverterImpl() {
this.duoYinZiMap = SpellDictionary.getDefault().getDuoYinZiMap();
}
@Override
public String[] getPinyin(char ch) throws IllegalPinyinException {
try {
HanyuPinyinOutputFormat outputFormat = new HanyuPinyinOutputFormat();
outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
outputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
outputFormat.setVCharType(HanyuPinyinVCharType.WITH_V);
// ASCII >=33 ASCII<=125的直接返回
if (ch >= 32 && ch <= 125) {
return new String[]{String.valueOf(ch)};
}
return StringUtils.distinct(PinyinHelper.toHanyuPinyinStringArray(ch, outputFormat));
} catch (BadHanyuPinyinOutputFormatCombination e) {
throw new IllegalPinyinException(e);
}
}
@Override
public String getPinyin(String chinese) throws IllegalPinyinException {
if (StringUtils.isEmpty(chinese)) {
return null;
}
chinese = chinese.replaceAll("[\\.,\\,!·\\!?\\?;\\;\\(\\)()\\[\\]\\:: ]+", " ").trim();
StringBuilder py_sb = new StringBuilder(32);
char[] chs = chinese.toCharArray();
for (int i = 0; i < chs.length; i++) {
String[] py_arr = getPinyin(chs[i]);
if (py_arr == null || py_arr.length < 1) {
throw new IllegalPinyinException("pinyin array is empty, char:" + chs[i] + ",chinese:" + chinese);
}
if (py_arr.length == 1) {
py_sb.append(convertInitialToUpperCase(py_arr[0]));
} else if (py_arr.length == 2 && py_arr[0].equals(py_arr[1])) {
py_sb.append(convertInitialToUpperCase(py_arr[0]));
} else {
String resultPy = null, defaultPy = null;
;
for (String py : py_arr) {
String left = null; //向左多取一个字,例如 银[行]
if (i >= 1 && i + 1 <= chinese.length()) {
left = chinese.substring(i - 1, i + 1);
if (duoYinZiMap.containsKey(py) && duoYinZiMap.get(py).contains(left)) {
resultPy = py;
break;
}
}
String right = null; //向右多取一个字,例如 [长]沙
if (i <= chinese.length() - 2) {
right = chinese.substring(i, i + 2);
if (duoYinZiMap.containsKey(py) && duoYinZiMap.get(py).contains(right)) {
resultPy = py;
break;
}
}
String middle = null; //左右各多取一个字,例如 龙[爪]槐
if (i >= 1 && i + 2 <= chinese.length()) {
middle = chinese.substring(i - 1, i + 2);
if (duoYinZiMap.containsKey(py) && duoYinZiMap.get(py).contains(middle)) {
resultPy = py;
break;
}
}
String left3 = null; //向左多取2个字,如 芈月[传],列车长
if (i >= 2 && i + 1 <= chinese.length()) {
left3 = chinese.substring(i - 2, i + 1);
if (duoYinZiMap.containsKey(py) && duoYinZiMap.get(py).contains(left3)) {
resultPy = py;
break;
}
}
String right3 = null; //向右多取2个字,如 [长]孙无忌
if (i <= chinese.length() - 3) {
right3 = chinese.substring(i, i + 3);
if (duoYinZiMap.containsKey(py) && duoYinZiMap.get(py).contains(right3)) {
resultPy = py;
break;
}
}
if (duoYinZiMap.containsKey(py) && duoYinZiMap.get(py).contains(String.valueOf(chs[i]))) { //默认拼音
defaultPy = py;
}
}
if (StringUtils.isEmpty(resultPy)) {
if (StringUtils.isNotEmpty(defaultPy)) {
resultPy = defaultPy;
} else {
resultPy = py_arr[0];
}
}
py_sb.append(convertInitialToUpperCase(resultPy));
}
}
return py_sb.toString();
}
private String convertInitialToUpperCase(String str) {
if (str == null || str.length() == 0) {
return "";
}
return str.substring(0, 1).toUpperCase() + str.substring(1);
}
}