All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.seejoke.pinyin.SpellDictionary Maven / Gradle / Ivy

There is a newer version: 0.3
Show newest version
package com.seejoke.pinyin;

import com.google.common.collect.ArrayListMultimap;
import com.seejoke.core.utils.IoUtils;
import com.seejoke.core.utils.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.Enumeration;

/**
 * @auther: [email protected]
 * @date: 2018/5/3 17:46
 * @description:
 */
public class SpellDictionary {

    private final Logger logger = LoggerFactory.getLogger(this.getClass());

    private static final String PREFIX = "spell/";

    private ArrayListMultimap duoYinZiMap;

    private static final String CONFIG_NAME = "py4j.txt";

    private static final String PINYIN_SEPARATOR = "#";

    private static final String WORD_SEPARATOR = "/";

    private volatile boolean initialized;

    public SpellDictionary() {

    }

    public static SpellDictionary getDefault() {
        return SingletonHolder.INSTANCE;
    }

    public ArrayListMultimap getDuoYinZiMap() {
        checkInit();
        return duoYinZiMap;
    }

    private void checkInit() {
        if (!initialized) {
            loadVocabulary();
        }
    }

    private synchronized void loadVocabulary() {
        if (initialized) {
            return;
        }
        this.duoYinZiMap = loadVocabulary0(CONFIG_NAME);
        initialized = true;
    }

    private ArrayListMultimap loadVocabulary0(String name) {
        debug("******start load py4j config******");
        ArrayListMultimap duoYinZiMap = ArrayListMultimap.create(512, 32);
        String filename = PREFIX + name;
        try {
            ClassLoader cl = Thread.currentThread().getContextClassLoader();
            Enumeration urls = cl.getResources(filename);
            if (urls != null) {
                while (urls.hasMoreElements()) {
                    URL url = urls.nextElement();
                    parseURL(url, duoYinZiMap);
                }
            }
        } catch (Exception e) {
            error("caught exception when load py4j vocabulary", e);
            throw new RuntimeException("caught exception when load py4j vocabulary", e);
        }
        debug("******load py4j config over******");
        debug("py4j map key size:{}", duoYinZiMap.keySet().size());
        return duoYinZiMap;
    }

    private void parseURL(URL url, ArrayListMultimap duoYinZiMap) {
        debug("load py4j dictionary file:{}", url.getPath());
        InputStream in = null;
        BufferedReader br = null;
        try {
            in = url.openStream();
            br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
            String line = null;
            while ((line = br.readLine()) != null) {

                String[] arr = line.split(PINYIN_SEPARATOR);

                if (StringUtils.isNotEmpty(arr[1])) {
                    String[] dyzs = arr[1].split(WORD_SEPARATOR);
                    for (String dyz : dyzs) {
                        if (StringUtils.isNotEmpty(dyz)) {
                            duoYinZiMap.put(arr[0], dyz.trim());
                        }
                    }
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(String.format("load py4j config:%s error", url), e);
        } finally {
            IoUtils.closeQuietly(br);
            IoUtils.closeQuietly(in);
        }
    }

    private void error(String msg, Throwable err) {
        logger.error(msg, err);
    }

    private void debug(String msg, Object... args) {
        if (logger.isDebugEnabled()) {
            logger.debug(msg, args);
        }
    }

    private static class SingletonHolder {
        private static final SpellDictionary INSTANCE = new SpellDictionary();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy