com.caucho.vfs.Encoding Maven / Gradle / Ivy
/*
* Copyright (c) 1998-2018 Caucho Technology -- all rights reserved
*
* This file is part of Resin(R) Open Source
*
* Each copy or derived work must preserve the copyright notice and this
* notice unmodified.
*
* Resin Open Source is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Resin Open Source is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
* of NON-INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with Resin Open Source; if not, write to the
*
* Free Software Foundation, Inc.
* 59 Temple Place, Suite 330
* Boston, MA 02111-1307 USA
*
* @author Scott Ferguson
*/
package com.caucho.vfs;
import com.caucho.util.CharBuffer;
import com.caucho.vfs.i18n.EncodingReader;
import com.caucho.vfs.i18n.EncodingWriter;
import com.caucho.vfs.i18n.ISO8859_1Writer;
import com.caucho.vfs.i18n.JDKReader;
import com.caucho.vfs.i18n.JDKWriter;
import java.io.InputStream;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Locale;
import java.util.concurrent.ConcurrentHashMap;
/**
* Converts between the mime encoding names and Java encoding names.
*/
public class Encoding {
private static ConcurrentHashMap _javaName;
private static ConcurrentHashMap _mimeName;
private static ConcurrentHashMap _localeName;
// map from an encoding name to its EncodingReader factory.
static final ConcurrentHashMap _readEncodingFactories
= new ConcurrentHashMap();
// map from an encoding name to its EncodingWriter factory.
static final ConcurrentHashMap _writeEncodingFactories
= new ConcurrentHashMap();
static final EncodingWriter _latin1Writer = new ISO8859_1Writer();
/**
* Can't create an instance of the encoding class.
*/
private Encoding() {}
/**
* Returns the canonical mime name for the given character encoding.
*
* @param encoding character encoding name, possibly an alias
*
* @return canonical mime name for the encoding.
*/
public static String getMimeName(String encoding)
{
if (encoding == null)
return null;
String value = _mimeName.get(encoding);
if (value != null)
return value;
String upper = normalize(encoding);
String lookup = _mimeName.get(upper);
value = lookup == null ? upper : lookup;
_mimeName.put(encoding, value);
return value;
}
/**
* Returns the canonical mime name for the given locale.
*
* @param locale locale to use.
*
* @return canonical mime name for the encoding.
*/
public static String getMimeName(Locale locale)
{
if (locale == null)
return "utf-8";
String mimeName = _localeName.get(locale.toString());
if (mimeName == null)
mimeName = _localeName.get(locale.getLanguage());
if (mimeName == null)
return "utf-8";
else
return mimeName;
}
/**
* Returns a Reader to translate bytes to characters. If a specialized
* reader exists in com.caucho.vfs.i18n, use it.
*
* @param is the input stream.
* @param encoding the encoding name.
*
* @return a reader for the translation
*/
public static Reader getReadEncoding(InputStream is, String encoding)
throws UnsupportedEncodingException
{
return getReadFactory(encoding).create(is);
}
/**
* Returns a Reader to translate bytes to characters. If a specialized
* reader exists in com.caucho.vfs.i18n, use it.
*
* @param is the input stream.
* @param encoding the encoding name.
*
* @return a reader for the translation
*/
public static EncodingReader getReadFactory(final String encoding)
throws UnsupportedEncodingException
{
String encKey = encoding == null ? "iso-8859-1" : encoding;
EncodingReader factory = _readEncodingFactories.get(encKey);
if (factory == null) {
try {
String javaEncoding = Encoding.getJavaName(encoding);
if (javaEncoding == null)
javaEncoding = "ISO8859_1";
String className = "com.caucho.vfs.i18n." + javaEncoding + "Reader";
Class cl = Class.forName(className);
factory = (EncodingReader) cl.newInstance();
factory.setJavaEncoding(javaEncoding);
} catch (Throwable e) {
}
if (factory == null) {
String javaEncoding = Encoding.getJavaName(encoding);
if (javaEncoding == null)
javaEncoding = "ISO8859_1";
factory = new JDKReader();
factory.setJavaEncoding(javaEncoding);
}
_readEncodingFactories.put(encKey, factory);
}
return factory;
}
/**
* Returns an EncodingWriter to translate characters to bytes.
*
* @param encoding the encoding name.
*
* @return a writer for the translation
*/
public static EncodingWriter getWriteEncoding(String encoding)
{
if (encoding == null)
encoding = "iso-8859-1";
EncodingWriter factory = _writeEncodingFactories.get(encoding);
if (factory != null)
return factory.create();
factory = _writeEncodingFactories.get(encoding);
if (factory == null) {
try {
String javaEncoding = Encoding.getJavaName(encoding);
if (javaEncoding == null)
javaEncoding = "ISO8859_1";
String className = "com.caucho.vfs.i18n." + javaEncoding + "Writer";
Class cl = Class.forName(className);
factory = (EncodingWriter) cl.newInstance();
factory.setJavaEncoding(javaEncoding);
} catch (Throwable e) {
}
if (factory == null) {
factory = new JDKWriter();
String javaEncoding = Encoding.getJavaName(encoding);
if (javaEncoding == null)
javaEncoding = "ISO8859_1";
factory.setJavaEncoding(javaEncoding);
}
_writeEncodingFactories.put(encoding, factory);
}
// return factory.create(factory.getJavaEncoding());
// charset uses the original encoding, not the java encoding
return factory.create(encoding);
}
/**
* Returns the latin 1 writer.
*/
public static EncodingWriter getLatin1Writer()
{
return _latin1Writer;
}
/**
* Returns the Java name for the given encoding.
*
* @param encoding character encoding name
*
* @return Java encoding name
*/
public static String getJavaName(String encoding)
{
if (encoding == null)
return null;
String javaName = _javaName.get(encoding);
if (javaName != null)
return javaName;
String upper = normalize(encoding);
javaName = _javaName.get(upper);
if (javaName == null) {
String lookup = _mimeName.get(upper);
if (lookup != null)
javaName = _javaName.get(lookup);
}
if (javaName == null)
javaName = upper;
_javaName.put(encoding, javaName);
return javaName;
}
/**
* Returns the Java name for the given locale.
*
* @param locale the locale to use
*
* @return Java encoding name
*/
public static String getJavaName(Locale locale)
{
if (locale == null)
return null;
return getJavaName(getMimeName(locale));
}
/**
* Normalize the user's encoding name to avoid case issues.
*/
private static String normalize(String name)
{
CharBuffer cb = new CharBuffer();
int len = name.length();
for (int i = 0; i < len; i++) {
char ch = name.charAt(i);
if (Character.isLowerCase(ch))
cb.append(Character.toUpperCase(ch));
else if (ch == '_')
cb.append('-');
else
cb.append(ch);
}
return cb.close();
}
static {
_javaName = new ConcurrentHashMap();
_mimeName = new ConcurrentHashMap();
_localeName = new ConcurrentHashMap();
_mimeName.put("ANSI-X3.4-1968", "US-ASCII");
_mimeName.put("ISO-IR-6", "US-ASCII");
_mimeName.put("ISO-646.IRV:1991", "US-ASCII");
_mimeName.put("ASCII", "US-ASCII");
_mimeName.put("ISO646-US", "US-ASCII");
_mimeName.put("US-ASCII", "US-ASCII");
_mimeName.put("us", "US-ASCII");
_mimeName.put("IBM367", "US-ASCII");
_mimeName.put("CP367", "US-ASCII");
_mimeName.put("CSASCII", "US-ASCII");
_javaName.put("US-ASCII", "ISO8859_1");
_mimeName.put("ISO-2022-KR", "ISO-2022-KR");
_mimeName.put("CSISO2022KR", "ISO-2022-KR");
_mimeName.put("ISO2022-KR", "ISO-2022-KR");
_javaName.put("ISO-2022-KR", "ISO2022_KR");
_mimeName.put("EUC-KR", "EUC-KR");
_mimeName.put("CSEUCKR", "EUC-KR");
_javaName.put("EUC-KR", "EUC_KR");
_mimeName.put("ISO-2022-JP", "ISO-2022-JP");
_mimeName.put("CSISO2022JP", "ISO-2022-JP");
_mimeName.put("ISO2022-JP", "ISO-2022-JP");
_javaName.put("ISO-2022-JP", "ISO2022JP");
_mimeName.put("ISO-2022-JP-2", "ISO-2022-JP-2");
_mimeName.put("CSISO2022JP2", "ISO-2022-JP-2");
_mimeName.put("ISO2022-JP2", "ISO-2022-JP-2");
_javaName.put("ISO-2022-JP-2", "ISO2022_JP2");
_mimeName.put("ISO_8859-1:1987", "ISO-8859-1");
_mimeName.put("ISO-IR-100", "ISO-8859-1");
_mimeName.put("ISO-8859-1", "ISO-8859-1");
_mimeName.put("LATIN1", "ISO-8859-1");
_mimeName.put("LATIN-1", "ISO-8859-1");
_mimeName.put("L1", "ISO-8859-1");
_mimeName.put("IBM819", "ISO-8859-1");
_mimeName.put("CP819", "ISO-8859-1");
_mimeName.put("CSISOLATIN1", "ISO-8859-1");
_mimeName.put("ISO8859-1", "ISO-8859-1");
_mimeName.put("8859-1", "ISO-8859-1");
_mimeName.put("8859_1", "ISO-8859-1");
_javaName.put("ISO-8859-1", "ISO8859_1");
_mimeName.put("ISO-8859-2:1987", "ISO-8859-2");
_mimeName.put("ISO-IR-101", "ISO-8859-2");
_mimeName.put("ISO-8859-2", "ISO-8859-2");
_mimeName.put("LATIN2", "ISO-8859-2");
_mimeName.put("LATIN-2", "ISO-8859-2");
_mimeName.put("L2", "ISO-8859-2");
_mimeName.put("CSISOLATIN2", "ISO-8859-2");
_mimeName.put("ISO8859-2", "ISO-8859-2");
_javaName.put("ISO-8859-2", "ISO8859_2");
_mimeName.put("ISO-8859-3:1988", "ISO-8859-3");
_mimeName.put("ISO-IR-109", "ISO-8859-3");
_mimeName.put("ISO-8859-3", "ISO-8859-3");
_mimeName.put("ISO-8859-3", "ISO-8859-3");
_mimeName.put("LATIN3", "ISO-8859-3");
_mimeName.put("LATIN-3", "ISO-8859-3");
_mimeName.put("L3", "ISO-8859-3");
_mimeName.put("CSISOLATIN3", "ISO-8859-3");
_mimeName.put("ISO8859-3", "ISO-8859-3");
_javaName.put("ISO-8859-3", "ISO8859_3");
_mimeName.put("ISO-8859-4:1988", "ISO-8859-4");
_mimeName.put("ISO-IR-110", "ISO-8859-4");
_mimeName.put("ISO-8859-4", "ISO-8859-4");
_mimeName.put("ISO-8859-4", "ISO-8859-4");
_mimeName.put("LATIN4", "ISO-8859-4");
_mimeName.put("LATIN-4", "ISO-8859-4");
_mimeName.put("L4", "ISO-8859-4");
_mimeName.put("CSISOLATIN4", "ISO-8859-4");
_mimeName.put("ISO8859-4", "ISO-8859-4");
_javaName.put("ISO-8859-4", "ISO8859_4");
_mimeName.put("ISO-8859-5:1988", "ISO-8859-5");
_mimeName.put("ISO-IR-144", "ISO-8859-5");
_mimeName.put("ISO-8859-5", "ISO-8859-5");
_mimeName.put("ISO-8859-5", "ISO-8859-5");
_mimeName.put("CYRILLIC", "ISO-8859-5");
_mimeName.put("CSISOLATINCYRILLIC", "ISO-8859-5");
_mimeName.put("ISO8859-5", "ISO-8859-5");
_javaName.put("ISO-8859-5", "ISO8859_5");
_mimeName.put("ISO-8859-6:1987", "ISO-8859-6");
_mimeName.put("ISO-IR-127", "ISO-8859-6");
_mimeName.put("ISO-8859-6", "ISO-8859-6");
_mimeName.put("ISO-8859-6", "ISO-8859-6");
_mimeName.put("ECMA-114", "ISO-8859-6");
_mimeName.put("ASMO-708", "ISO-8859-6");
_mimeName.put("ARABIC", "ISO-8859-6");
_mimeName.put("CSISOLATINARABIC", "ISO-8859-6");
_mimeName.put("ISO8859-6", "ISO-8859-6");
_javaName.put("ISO-8859-6", "ISO8859_6");
_mimeName.put("ISO-8859-7:1987", "ISO-8859-7");
_mimeName.put("ISO-IR-126", "ISO-8859-7");
_mimeName.put("ISO-8859-7", "ISO-8859-7");
_mimeName.put("ISO-8859-7", "ISO-8859-7");
_mimeName.put("ELOT-928", "ISO-8859-7");
_mimeName.put("ECMA-118", "ISO-8859-7");
_mimeName.put("GREEK", "ISO-8859-7");
_mimeName.put("GREEK8", "ISO-8859-7");
_mimeName.put("CSISOLATINGREEN", "ISO-8859-7");
_mimeName.put("ISO8859-7", "ISO-8859-7");
_javaName.put("ISO-8859-7", "ISO8859_7");
_mimeName.put("ISO-8859-8:1988", "ISO-8859-8");
_mimeName.put("ISO-IR-138", "ISO-8859-8");
_mimeName.put("ISO-8859-8", "ISO-8859-8");
_mimeName.put("ISO-8859-8", "ISO-8859-8");
_mimeName.put("HEBREW", "ISO-8859-8");
_mimeName.put("CSISOLATINHEBREW", "ISO-8859-8");
_mimeName.put("ISO8859-8", "ISO-8859-8");
_javaName.put("ISO-8859-8", "ISO8859_8");
_mimeName.put("ISO-8859-9:1989", "ISO-8859-9");
_mimeName.put("ISO-IR-148", "ISO-8859-9");
_mimeName.put("ISO-8859-9", "ISO-8859-9");
_mimeName.put("ISO-8859-9", "ISO-8859-9");
_mimeName.put("LATIN5", "ISO-8859-9");
_mimeName.put("LATIN-5", "ISO-8859-9");
_mimeName.put("L5", "ISO-8859-9");
_mimeName.put("CSISOLATIN5", "ISO-8859-9");
_mimeName.put("ISO8859-9", "ISO-8859-9");
_javaName.put("ISO-8859-9", "ISO8859_9");
/* unsupported by java
_mimeName.put("ISO_8859-10:1992", "ISO-8859-10");
_mimeName.put("iso-ir-157", "ISO-8859-10");
_mimeName.put("I6", "ISO-8859-10");
_mimeName.put("cslSOLatin6", "ISO-8859-10");
_mimeName.put("latin6", "ISO-8859-10");
_javaName.put("ISO-8859-10", "ISO8859_10");
*/
_mimeName.put("UTF-7", "UTF-7");
_mimeName.put("UTF7", "UTF-7");
_javaName.put("UTF-7", "UTF7");
_mimeName.put("UTF-8", "utf-8");
_mimeName.put("UTF8", "utf-8");
_javaName.put("UTF-8", "UTF8");
_mimeName.put("UTF-16", "utf-16");
_mimeName.put("UTF16", "utf-16");
_javaName.put("UTF-16", "UTF16");
_mimeName.put("UTF-16-REV", "utf-16-rev");
_mimeName.put("UTF16-REV", "utf-16-rev");
_javaName.put("utf-16-rev", "UTF16_REV");
_mimeName.put("JIS-ENCODING", "JIS_Encoding");
_mimeName.put("JIS-ENCODING", "JIS_Encoding");
_mimeName.put("CSJISENCODING", "JIS_Encoding");
_javaName.put("JIS_Encoding", "JIS_ENCODING");
_mimeName.put("SHIFT-JIS", "Shift_JIS");
_mimeName.put("SHIFT_JIS", "Shift_JIS");
_mimeName.put("CSSHIFTJIS", "Shift_JIS");
_mimeName.put("SJIS", "Shift_JIS");
_javaName.put("Shift_JIS", "SJIS");
_mimeName.put("EUC-JP", "EUC-JP");
_mimeName.put("EUC-JP", "EUC-JP");
_mimeName.put("EUCJP", "EUC-JP");
_mimeName.put("EUC-JP-LINUX", "EUC-JP");
_javaName.put("EUC-JP", "EUC_JP");
_mimeName.put("GB2312", "GB2312");
_mimeName.put("CSGB2312", "GB2312");
_javaName.put("GB2312", "GB2312");
_mimeName.put("GBK", "GBK");
_javaName.put("GBK", "GBK");
_mimeName.put("BIG5", "Big5");
_mimeName.put("BIG-5", "Big5");
_mimeName.put("CSBIG5", "Big5");
_javaName.put("Big5", "BIG5");
_mimeName.put("KOI8-R", "KOI8-R");
_mimeName.put("KOI-8-R", "KOI8-R");
_mimeName.put("KOI8-R", "KOI8-R");
_javaName.put("KOI8-R", "KOI8-R");
_mimeName.put("MS950", "ms950");
_javaName.put("ms950", "MS950");
_javaName.put("JAVA", "JAVA");
_mimeName.put("windows-hack", "ISO-8859-1");
_mimeName.put("WINDOWS-HACK", "ISO-8859-1");
_javaName.put("WINDOWS-HACK", "WindowsHack");
// #4180
_mimeName.put("MACROMAN", "utf-8");
_javaName.put("MacRoman", "utf-8");
_mimeName.put("KS_C_5601-1987", "ks_c_5601-1987");
_javaName.put("ks_c_5601-1987", "Cp949");
_javaName.put("IBM500", "Cp500");
String []cp = new String[] {
"037", "1006", "1025", "1026", "1046", "1097",
"1098", "1112", "1122", "1123", "1124", "1250",
"1251", "1252", "1253", "1254", "1255", "1256",
"1257", "1258", "1381", "273", "277", "278", "280", "284",
"285", "297", "33722", "420", "424", "437", "500", "737",
"775", "838", "850", "852", "855", "857", "860", "861", "862",
"863", "864", "865", "866", "868", "869", "870", "871", "874",
"875", "918", "921", "922", "930", "933", "935", "937", "939",
"942", "948", "949", "964", "970"
};
for (int i = 0; i < cp.length; i++) {
_mimeName.put("CP" + cp[i], "windows-" + cp[i]);
_mimeName.put("WINDOWS-" + cp[i], "windows-" + cp[i]);
_javaName.put("windows-" + cp[i], "Cp" + cp[i]);
}
// from http://www.w3c.org/International/O-charset-lang.html
_localeName = new ConcurrentHashMap();
_localeName.put("af", "ISO-8859-1");
_localeName.put("sq", "ISO-8859-1");
_localeName.put("ar", "ISO-8859-6");
_localeName.put("eu", "ISO-8859-1");
_localeName.put("bg", "ISO-8859-5");
_localeName.put("be", "ISO-8859-5");
_localeName.put("ca", "ISO-8859-1");
_localeName.put("hr", "ISO-8859-2");
_localeName.put("cs", "ISO-8859-2");
_localeName.put("da", "ISO-8859-1");
_localeName.put("nl", "ISO-8859-1");
_localeName.put("en", "ISO-8859-1");
_localeName.put("eo", "ISO-8859-3");
_localeName.put("et", "ISO-8859-4");
_localeName.put("fo", "ISO-8859-1");
_localeName.put("fi", "ISO-8859-1");
_localeName.put("fr", "ISO-8859-1");
_localeName.put("gl", "ISO-8859-1");
_localeName.put("de", "ISO-8859-1");
_localeName.put("el", "ISO-8859-7");
_localeName.put("iw", "ISO-8859-8");
_localeName.put("hu", "ISO-8859-2");
_localeName.put("is", "ISO-8859-1");
_localeName.put("ga", "ISO-8859-1");
_localeName.put("it", "ISO-8859-1");
_localeName.put("ja", "Shift_JIS");
_localeName.put("lv", "ISO-8859-4");
_localeName.put("lt", "ISO-8859-4");
_localeName.put("mk", "ISO-8859-5");
_localeName.put("mt", "ISO-8859-3");
_localeName.put("no", "ISO-8859-1");
_localeName.put("pl", "ISO-8859-2");
_localeName.put("pt", "ISO-8859-1");
_localeName.put("ro", "ISO-8859-2");
// _localeName.put("ru", "KOI8-R");
_localeName.put("ru", "ISO-8859-5");
_localeName.put("gd", "ISO-8859-1");
_localeName.put("sr", "ISO-8859-5");
_localeName.put("sk", "ISO-8859-2");
_localeName.put("sl", "ISO-8859-2");
_localeName.put("es", "ISO-8859-1");
_localeName.put("sv", "ISO-8859-1");
_localeName.put("tr", "ISO-8859-9");
_localeName.put("uk", "ISO-8859-5");
_localeName.put("ko", "EUC-KR");
_localeName.put("zh", "GB2312");
_localeName.put("zh_TW", "Big5");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy