All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.seejoke.pinyin.SpellConverterImpl Maven / Gradle / Ivy

There is a newer version: 0.3
Show newest version
package com.seejoke.pinyin;

import com.google.common.collect.ArrayListMultimap;
import com.seejoke.core.utils.StringUtils;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

/**
 * @auther: [email protected]
 * @date: 2018/5/3 17:45
 * @description:
 */
public class SpellConverterImpl implements ISpellConverter {
    private final ArrayListMultimap duoYinZiMap;

    public SpellConverterImpl() {
        this.duoYinZiMap = SpellDictionary.getDefault().getDuoYinZiMap();
    }

    @Override
    public String[] getPinyin(char ch) throws IllegalPinyinException {
        try {
            HanyuPinyinOutputFormat outputFormat = new HanyuPinyinOutputFormat();
            outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
            outputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
            outputFormat.setVCharType(HanyuPinyinVCharType.WITH_V);

            // ASCII >=33 ASCII<=125的直接返回
            if (ch >= 32 && ch <= 125) {
                return new String[]{String.valueOf(ch)};
            }
            return StringUtils.distinct(PinyinHelper.toHanyuPinyinStringArray(ch, outputFormat));
        } catch (BadHanyuPinyinOutputFormatCombination e) {
            throw new IllegalPinyinException(e);
        }
    }

    @Override
    public String getPinyin(String chinese) throws IllegalPinyinException {
        if (StringUtils.isEmpty(chinese)) {
            return null;
        }

        chinese = chinese.replaceAll("[\\.,\\,!·\\!?\\?;\\;\\(\\)()\\[\\]\\:: ]+", " ").trim();

        StringBuilder py_sb = new StringBuilder(32);
        char[] chs = chinese.toCharArray();
        for (int i = 0; i < chs.length; i++) {
            String[] py_arr = getPinyin(chs[i]);
            if (py_arr == null || py_arr.length < 1) {
                throw new IllegalPinyinException("pinyin array is empty, char:" + chs[i] + ",chinese:" + chinese);
            }
            if (py_arr.length == 1) {
                py_sb.append(convertInitialToUpperCase(py_arr[0]));
            } else if (py_arr.length == 2 && py_arr[0].equals(py_arr[1])) {
                py_sb.append(convertInitialToUpperCase(py_arr[0]));
            } else {
                String resultPy = null, defaultPy = null;
                ;
                for (String py : py_arr) {
                    String left = null;    //向左多取一个字,例如 银[行]
                    if (i >= 1 && i + 1 <= chinese.length()) {
                        left = chinese.substring(i - 1, i + 1);
                        if (duoYinZiMap.containsKey(py) && duoYinZiMap.get(py).contains(left)) {
                            resultPy = py;
                            break;
                        }
                    }

                    String right = null;    //向右多取一个字,例如 [长]沙
                    if (i <= chinese.length() - 2) {
                        right = chinese.substring(i, i + 2);
                        if (duoYinZiMap.containsKey(py) && duoYinZiMap.get(py).contains(right)) {
                            resultPy = py;
                            break;
                        }
                    }

                    String middle = null;    //左右各多取一个字,例如 龙[爪]槐
                    if (i >= 1 && i + 2 <= chinese.length()) {
                        middle = chinese.substring(i - 1, i + 2);
                        if (duoYinZiMap.containsKey(py) && duoYinZiMap.get(py).contains(middle)) {
                            resultPy = py;
                            break;
                        }
                    }
                    String left3 = null;    //向左多取2个字,如 芈月[传],列车长
                    if (i >= 2 && i + 1 <= chinese.length()) {
                        left3 = chinese.substring(i - 2, i + 1);
                        if (duoYinZiMap.containsKey(py) && duoYinZiMap.get(py).contains(left3)) {
                            resultPy = py;
                            break;
                        }
                    }

                    String right3 = null;    //向右多取2个字,如 [长]孙无忌
                    if (i <= chinese.length() - 3) {
                        right3 = chinese.substring(i, i + 3);
                        if (duoYinZiMap.containsKey(py) && duoYinZiMap.get(py).contains(right3)) {
                            resultPy = py;
                            break;
                        }
                    }

                    if (duoYinZiMap.containsKey(py) && duoYinZiMap.get(py).contains(String.valueOf(chs[i]))) {    //默认拼音
                        defaultPy = py;
                    }
                }

                if (StringUtils.isEmpty(resultPy)) {
                    if (StringUtils.isNotEmpty(defaultPy)) {
                        resultPy = defaultPy;
                    } else {
                        resultPy = py_arr[0];
                    }
                }
                py_sb.append(convertInitialToUpperCase(resultPy));
            }
        }

        return py_sb.toString();
    }

    private String convertInitialToUpperCase(String str) {
        if (str == null || str.length() == 0) {
            return "";
        }
        return str.substring(0, 1).toUpperCase() + str.substring(1);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy