All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sourceforge.pinyin4j.ChineseToPinyinResource Maven / Gradle / Ivy

Go to download

Support Chinese character (both Simplified and Tranditional) to most popular Pinyin systems, including Hanyu Pinyin, Tongyong Pinyin, Wade-Giles, MPS2, Yale and Gwoyeu Romatzyh. Support multiple pronounciations and customized output.

The newest version!
/**
 * This file is part of pinyin4j (http://sourceforge.net/projects/pinyin4j/) and distributed under
 * GNU GENERAL PUBLIC LICENSE (GPL).
 * 

* pinyin4j is free software; you can redistribute it and/or modify it under the terms of the GNU * General Public License as published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. *

* pinyin4j is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. *

* You should have received a copy of the GNU General Public License along with pinyin4j. */ /** * */ package net.sourceforge.pinyin4j; import net.sourceforge.pinyin4j.multipinyin.Trie; import java.io.FileNotFoundException; import java.io.IOException; /** * Manage all external resources required in PinyinHelper class. * * @author Li Min ([email protected]) */ class ChineseToPinyinResource { /** * A hash table contains pairs */ private Trie unicodeToHanyuPinyinTable = null; /** * @param unicodeToHanyuPinyinTable The unicodeToHanyuPinyinTable to set. */ private void setUnicodeToHanyuPinyinTable(Trie unicodeToHanyuPinyinTable) { this.unicodeToHanyuPinyinTable = unicodeToHanyuPinyinTable; } /** * @return Returns the unicodeToHanyuPinyinTable. */ Trie getUnicodeToHanyuPinyinTable() { return unicodeToHanyuPinyinTable; } /** * Private constructor as part of the singleton pattern. */ private ChineseToPinyinResource() { initializeResource(); } /** * Initialize a hash-table contains pairs */ private void initializeResource() { try { final String resourceName = "/pinyindb/unicode_to_hanyu_pinyin.txt"; final String resourceMultiName = "/pinyindb/multi_pinyin.txt"; setUnicodeToHanyuPinyinTable(new Trie()); getUnicodeToHanyuPinyinTable() .load(ResourceHelper.getResourceInputStream(resourceName)); getUnicodeToHanyuPinyinTable().loadMultiPinyin( ResourceHelper.getResourceInputStream(resourceMultiName)); getUnicodeToHanyuPinyinTable().loadMultiPinyinExtend(); } catch (FileNotFoundException ex) { ex.printStackTrace(); } catch (IOException ex) { ex.printStackTrace(); } } Trie getHanyuPinyinTrie(char ch) { String codepointHexStr = Integer.toHexString((int) ch).toUpperCase(); // fetch from hashtable return getUnicodeToHanyuPinyinTable().get(codepointHexStr); } /** * Get the unformatted Hanyu Pinyin representations of the given Chinese * character in array format. * * @param ch given Chinese character in Unicode * @return The Hanyu Pinyin strings of the given Chinese character in array * format; return null if there is no corresponding Pinyin string. */ String[] getHanyuPinyinStringArray(char ch) { String pinyinRecord = getHanyuPinyinRecordFromChar(ch); return parsePinyinString(pinyinRecord); } String[] parsePinyinString(String pinyinRecord) { if (null != pinyinRecord) { int indexOfLeftBracket = pinyinRecord.indexOf(Field.LEFT_BRACKET); int indexOfRightBracket = pinyinRecord.lastIndexOf(Field.RIGHT_BRACKET); String stripedString = pinyinRecord.substring(indexOfLeftBracket + Field.LEFT_BRACKET.length(), indexOfRightBracket); return stripedString.split(Field.COMMA); } else return null; // no record found or mal-formatted record } /** * @param record given record string of Hanyu Pinyin * @return return true if record is not null and record is not "none0" and * record is not mal-formatted, else return false */ private boolean isValidRecord(String record) { final String noneStr = "(none0)"; return (null != record) && !record.equals(noneStr) && record.startsWith(Field.LEFT_BRACKET) && record.endsWith(Field.RIGHT_BRACKET); } /** * @param ch given Chinese character in Unicode * @return corresponding Hanyu Pinyin Record in Properties file; null if no * record found */ private String getHanyuPinyinRecordFromChar(char ch) { // convert Chinese character to code point (integer) // please refer to http://www.unicode.org/glossary/#code_point // Another reference: http://en.wikipedia.org/wiki/Unicode int codePointOfChar = ch; String codepointHexStr = Integer.toHexString(codePointOfChar).toUpperCase(); // fetch from hashtable Trie trie = getUnicodeToHanyuPinyinTable().get(codepointHexStr); String foundRecord = null; if (trie != null) foundRecord = trie.getPinyin(); return isValidRecord(foundRecord) ? foundRecord : null; } /** * Singleton factory method. * * @return the one and only MySingleton. */ static ChineseToPinyinResource getInstance() { return ChineseToPinyinResourceHolder.theInstance; } /** * Singleton implementation helper. */ private static class ChineseToPinyinResourceHolder { static final ChineseToPinyinResource theInstance = new ChineseToPinyinResource(); } /** * A class encloses common string constants used in Properties files * * @author Li Min ([email protected]) */ class Field { static final String LEFT_BRACKET = "("; static final String RIGHT_BRACKET = ")"; static final String COMMA = ","; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy