com.caucho.vfs.Encoding Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of resin Show documentation
Show all versions of resin Show documentation
Resin Java Application Server
/*
* Copyright (c) 1998-2018 Caucho Technology -- all rights reserved
*
* This file is part of Resin(R) Open Source
*
* Each copy or derived work must preserve the copyright notice and this
* notice unmodified.
*
* Resin Open Source is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Resin Open Source is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
* of NON-INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with Resin Open Source; if not, write to the
*
* Free Software Foundation, Inc.
* 59 Temple Place, Suite 330
* Boston, MA 02111-1307 USA
*
* @author Scott Ferguson
*/
package com.caucho.vfs;
import com.caucho.util.CharBuffer;
import com.caucho.vfs.i18n.EncodingReader;
import com.caucho.vfs.i18n.EncodingWriter;
import com.caucho.vfs.i18n.ISO8859_1Writer;
import com.caucho.vfs.i18n.JDKReader;
import com.caucho.vfs.i18n.JDKWriter;
import java.io.InputStream;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Locale;
import java.util.concurrent.ConcurrentHashMap;
/**
* Converts between the mime encoding names and Java encoding names.
*/
public class Encoding {
private static ConcurrentHashMap _javaName;
private static ConcurrentHashMap _mimeName;
private static ConcurrentHashMap _localeName;
// map from an encoding name to its EncodingReader factory.
static final ConcurrentHashMap _readEncodingFactories
= new ConcurrentHashMap();
// map from an encoding name to its EncodingWriter factory.
static final ConcurrentHashMap _writeEncodingFactories
= new ConcurrentHashMap();
static final EncodingWriter _latin1Writer = new ISO8859_1Writer();
/**
* Can't create an instance of the encoding class.
*/
private Encoding() {}
/**
* Returns the canonical mime name for the given character encoding.
*
* @param encoding character encoding name, possibly an alias
*
* @return canonical mime name for the encoding.
*/
public static String getMimeName(String encoding)
{
if (encoding == null)
return null;
String value = _mimeName.get(encoding);
if (value != null)
return value;
String upper = normalize(encoding);
String lookup = _mimeName.get(upper);
value = lookup == null ? upper : lookup;
_mimeName.put(encoding, value);
return value;
}
/**
* Returns the canonical mime name for the given locale.
*
* @param locale locale to use.
*
* @return canonical mime name for the encoding.
*/
public static String getMimeName(Locale locale)
{
if (locale == null)
return "utf-8";
String mimeName = _localeName.get(locale.toString());
if (mimeName == null)
mimeName = _localeName.get(locale.getLanguage());
if (mimeName == null)
return "utf-8";
else
return mimeName;
}
/**
* Returns a Reader to translate bytes to characters. If a specialized
* reader exists in com.caucho.vfs.i18n, use it.
*
* @param is the input stream.
* @param encoding the encoding name.
*
* @return a reader for the translation
*/
public static Reader getReadEncoding(InputStream is, String encoding)
throws UnsupportedEncodingException
{
return getReadFactory(encoding).create(is);
}
/**
* Returns a Reader to translate bytes to characters. If a specialized
* reader exists in com.caucho.vfs.i18n, use it.
*
* @param is the input stream.
* @param encoding the encoding name.
*
* @return a reader for the translation
*/
public static EncodingReader getReadFactory(final String encoding)
throws UnsupportedEncodingException
{
String encKey = encoding == null ? "iso-8859-1" : encoding;
EncodingReader factory = _readEncodingFactories.get(encKey);
if (factory == null) {
try {
String javaEncoding = Encoding.getJavaName(encoding);
if (javaEncoding == null)
javaEncoding = "ISO8859_1";
String className = "com.caucho.vfs.i18n." + javaEncoding + "Reader";
Class cl = Class.forName(className);
factory = (EncodingReader) cl.newInstance();
factory.setJavaEncoding(javaEncoding);
} catch (Throwable e) {
}
if (factory == null) {
String javaEncoding = Encoding.getJavaName(encoding);
if (javaEncoding == null)
javaEncoding = "ISO8859_1";
factory = new JDKReader();
factory.setJavaEncoding(javaEncoding);
}
_readEncodingFactories.put(encKey, factory);
}
return factory;
}
/**
* Returns an EncodingWriter to translate characters to bytes.
*
* @param encoding the encoding name.
*
* @return a writer for the translation
*/
public static EncodingWriter getWriteEncoding(String encoding)
{
if (encoding == null)
encoding = "iso-8859-1";
EncodingWriter factory = _writeEncodingFactories.get(encoding);
if (factory != null)
return factory.create();
factory = _writeEncodingFactories.get(encoding);
if (factory == null) {
try {
String javaEncoding = Encoding.getJavaName(encoding);
if (javaEncoding == null)
javaEncoding = "ISO8859_1";
String className = "com.caucho.vfs.i18n." + javaEncoding + "Writer";
Class cl = Class.forName(className);
factory = (EncodingWriter) cl.newInstance();
factory.setJavaEncoding(javaEncoding);
} catch (Throwable e) {
}
if (factory == null) {
factory = new JDKWriter();
String javaEncoding = Encoding.getJavaName(encoding);
if (javaEncoding == null)
javaEncoding = "ISO8859_1";
factory.setJavaEncoding(javaEncoding);
}
_writeEncodingFactories.put(encoding, factory);
}
// return factory.create(factory.getJavaEncoding());
// charset uses the original encoding, not the java encoding
return factory.create(encoding);
}
/**
* Returns the latin 1 writer.
*/
public static EncodingWriter getLatin1Writer()
{
return _latin1Writer;
}
/**
* Returns the Java name for the given encoding.
*
* @param encoding character encoding name
*
* @return Java encoding name
*/
public static String getJavaName(String encoding)
{
if (encoding == null)
return null;
String javaName = _javaName.get(encoding);
if (javaName != null)
return javaName;
String upper = normalize(encoding);
javaName = _javaName.get(upper);
if (javaName == null) {
String lookup = _mimeName.get(upper);
if (lookup != null)
javaName = _javaName.get(lookup);
}
if (javaName == null)
javaName = upper;
_javaName.put(encoding, javaName);
return javaName;
}
/**
* Returns the Java name for the given locale.
*
* @param locale the locale to use
*
* @return Java encoding name
*/
public static String getJavaName(Locale locale)
{
if (locale == null)
return null;
return getJavaName(getMimeName(locale));
}
/**
* Normalize the user's encoding name to avoid case issues.
*/
private static String normalize(String name)
{
CharBuffer cb = new CharBuffer();
int len = name.length();
for (int i = 0; i < len; i++) {
char ch = name.charAt(i);
if (Character.isLowerCase(ch))
cb.append(Character.toUpperCase(ch));
else if (ch == '_')
cb.append('-');
else
cb.append(ch);
}
return cb.close();
}
static {
_javaName = new ConcurrentHashMap();
_mimeName = new ConcurrentHashMap();
_localeName = new ConcurrentHashMap();
_mimeName.put("ANSI-X3.4-1968", "US-ASCII");
_mimeName.put("ISO-IR-6", "US-ASCII");
_mimeName.put("ISO-646.IRV:1991", "US-ASCII");
_mimeName.put("ASCII", "US-ASCII");
_mimeName.put("ISO646-US", "US-ASCII");
_mimeName.put("US-ASCII", "US-ASCII");
_mimeName.put("us", "US-ASCII");
_mimeName.put("IBM367", "US-ASCII");
_mimeName.put("CP367", "US-ASCII");
_mimeName.put("CSASCII", "US-ASCII");
_javaName.put("US-ASCII", "ISO8859_1");
_mimeName.put("ISO-2022-KR", "ISO-2022-KR");
_mimeName.put("CSISO2022KR", "ISO-2022-KR");
_mimeName.put("ISO2022-KR", "ISO-2022-KR");
_javaName.put("ISO-2022-KR", "ISO2022_KR");
_mimeName.put("EUC-KR", "EUC-KR");
_mimeName.put("CSEUCKR", "EUC-KR");
_javaName.put("EUC-KR", "EUC_KR");
_mimeName.put("ISO-2022-JP", "ISO-2022-JP");
_mimeName.put("CSISO2022JP", "ISO-2022-JP");
_mimeName.put("ISO2022-JP", "ISO-2022-JP");
_javaName.put("ISO-2022-JP", "ISO2022JP");
_mimeName.put("ISO-2022-JP-2", "ISO-2022-JP-2");
_mimeName.put("CSISO2022JP2", "ISO-2022-JP-2");
_mimeName.put("ISO2022-JP2", "ISO-2022-JP-2");
_javaName.put("ISO-2022-JP-2", "ISO2022_JP2");
_mimeName.put("ISO_8859-1:1987", "ISO-8859-1");
_mimeName.put("ISO-IR-100", "ISO-8859-1");
_mimeName.put("ISO-8859-1", "ISO-8859-1");
_mimeName.put("LATIN1", "ISO-8859-1");
_mimeName.put("LATIN-1", "ISO-8859-1");
_mimeName.put("L1", "ISO-8859-1");
_mimeName.put("IBM819", "ISO-8859-1");
_mimeName.put("CP819", "ISO-8859-1");
_mimeName.put("CSISOLATIN1", "ISO-8859-1");
_mimeName.put("ISO8859-1", "ISO-8859-1");
_mimeName.put("8859-1", "ISO-8859-1");
_mimeName.put("8859_1", "ISO-8859-1");
_javaName.put("ISO-8859-1", "ISO8859_1");
_mimeName.put("ISO-8859-2:1987", "ISO-8859-2");
_mimeName.put("ISO-IR-101", "ISO-8859-2");
_mimeName.put("ISO-8859-2", "ISO-8859-2");
_mimeName.put("LATIN2", "ISO-8859-2");
_mimeName.put("LATIN-2", "ISO-8859-2");
_mimeName.put("L2", "ISO-8859-2");
_mimeName.put("CSISOLATIN2", "ISO-8859-2");
_mimeName.put("ISO8859-2", "ISO-8859-2");
_javaName.put("ISO-8859-2", "ISO8859_2");
_mimeName.put("ISO-8859-3:1988", "ISO-8859-3");
_mimeName.put("ISO-IR-109", "ISO-8859-3");
_mimeName.put("ISO-8859-3", "ISO-8859-3");
_mimeName.put("ISO-8859-3", "ISO-8859-3");
_mimeName.put("LATIN3", "ISO-8859-3");
_mimeName.put("LATIN-3", "ISO-8859-3");
_mimeName.put("L3", "ISO-8859-3");
_mimeName.put("CSISOLATIN3", "ISO-8859-3");
_mimeName.put("ISO8859-3", "ISO-8859-3");
_javaName.put("ISO-8859-3", "ISO8859_3");
_mimeName.put("ISO-8859-4:1988", "ISO-8859-4");
_mimeName.put("ISO-IR-110", "ISO-8859-4");
_mimeName.put("ISO-8859-4", "ISO-8859-4");
_mimeName.put("ISO-8859-4", "ISO-8859-4");
_mimeName.put("LATIN4", "ISO-8859-4");
_mimeName.put("LATIN-4", "ISO-8859-4");
_mimeName.put("L4", "ISO-8859-4");
_mimeName.put("CSISOLATIN4", "ISO-8859-4");
_mimeName.put("ISO8859-4", "ISO-8859-4");
_javaName.put("ISO-8859-4", "ISO8859_4");
_mimeName.put("ISO-8859-5:1988", "ISO-8859-5");
_mimeName.put("ISO-IR-144", "ISO-8859-5");
_mimeName.put("ISO-8859-5", "ISO-8859-5");
_mimeName.put("ISO-8859-5", "ISO-8859-5");
_mimeName.put("CYRILLIC", "ISO-8859-5");
_mimeName.put("CSISOLATINCYRILLIC", "ISO-8859-5");
_mimeName.put("ISO8859-5", "ISO-8859-5");
_javaName.put("ISO-8859-5", "ISO8859_5");
_mimeName.put("ISO-8859-6:1987", "ISO-8859-6");
_mimeName.put("ISO-IR-127", "ISO-8859-6");
_mimeName.put("ISO-8859-6", "ISO-8859-6");
_mimeName.put("ISO-8859-6", "ISO-8859-6");
_mimeName.put("ECMA-114", "ISO-8859-6");
_mimeName.put("ASMO-708", "ISO-8859-6");
_mimeName.put("ARABIC", "ISO-8859-6");
_mimeName.put("CSISOLATINARABIC", "ISO-8859-6");
_mimeName.put("ISO8859-6", "ISO-8859-6");
_javaName.put("ISO-8859-6", "ISO8859_6");
_mimeName.put("ISO-8859-7:1987", "ISO-8859-7");
_mimeName.put("ISO-IR-126", "ISO-8859-7");
_mimeName.put("ISO-8859-7", "ISO-8859-7");
_mimeName.put("ISO-8859-7", "ISO-8859-7");
_mimeName.put("ELOT-928", "ISO-8859-7");
_mimeName.put("ECMA-118", "ISO-8859-7");
_mimeName.put("GREEK", "ISO-8859-7");
_mimeName.put("GREEK8", "ISO-8859-7");
_mimeName.put("CSISOLATINGREEN", "ISO-8859-7");
_mimeName.put("ISO8859-7", "ISO-8859-7");
_javaName.put("ISO-8859-7", "ISO8859_7");
_mimeName.put("ISO-8859-8:1988", "ISO-8859-8");
_mimeName.put("ISO-IR-138", "ISO-8859-8");
_mimeName.put("ISO-8859-8", "ISO-8859-8");
_mimeName.put("ISO-8859-8", "ISO-8859-8");
_mimeName.put("HEBREW", "ISO-8859-8");
_mimeName.put("CSISOLATINHEBREW", "ISO-8859-8");
_mimeName.put("ISO8859-8", "ISO-8859-8");
_javaName.put("ISO-8859-8", "ISO8859_8");
_mimeName.put("ISO-8859-9:1989", "ISO-8859-9");
_mimeName.put("ISO-IR-148", "ISO-8859-9");
_mimeName.put("ISO-8859-9", "ISO-8859-9");
_mimeName.put("ISO-8859-9", "ISO-8859-9");
_mimeName.put("LATIN5", "ISO-8859-9");
_mimeName.put("LATIN-5", "ISO-8859-9");
_mimeName.put("L5", "ISO-8859-9");
_mimeName.put("CSISOLATIN5", "ISO-8859-9");
_mimeName.put("ISO8859-9", "ISO-8859-9");
_javaName.put("ISO-8859-9", "ISO8859_9");
/* unsupported by java
_mimeName.put("ISO_8859-10:1992", "ISO-8859-10");
_mimeName.put("iso-ir-157", "ISO-8859-10");
_mimeName.put("I6", "ISO-8859-10");
_mimeName.put("cslSOLatin6", "ISO-8859-10");
_mimeName.put("latin6", "ISO-8859-10");
_javaName.put("ISO-8859-10", "ISO8859_10");
*/
_mimeName.put("UTF-7", "UTF-7");
_mimeName.put("UTF7", "UTF-7");
_javaName.put("UTF-7", "UTF7");
_mimeName.put("UTF-8", "utf-8");
_mimeName.put("UTF8", "utf-8");
_javaName.put("UTF-8", "UTF8");
_mimeName.put("UTF-16", "utf-16");
_mimeName.put("UTF16", "utf-16");
_javaName.put("UTF-16", "UTF16");
_mimeName.put("UTF-16-REV", "utf-16-rev");
_mimeName.put("UTF16-REV", "utf-16-rev");
_javaName.put("utf-16-rev", "UTF16_REV");
_mimeName.put("JIS-ENCODING", "JIS_Encoding");
_mimeName.put("JIS-ENCODING", "JIS_Encoding");
_mimeName.put("CSJISENCODING", "JIS_Encoding");
_javaName.put("JIS_Encoding", "JIS_ENCODING");
_mimeName.put("SHIFT-JIS", "Shift_JIS");
_mimeName.put("SHIFT_JIS", "Shift_JIS");
_mimeName.put("CSSHIFTJIS", "Shift_JIS");
_mimeName.put("SJIS", "Shift_JIS");
_javaName.put("Shift_JIS", "SJIS");
_mimeName.put("EUC-JP", "EUC-JP");
_mimeName.put("EUC-JP", "EUC-JP");
_mimeName.put("EUCJP", "EUC-JP");
_mimeName.put("EUC-JP-LINUX", "EUC-JP");
_javaName.put("EUC-JP", "EUC_JP");
_mimeName.put("GB2312", "GB2312");
_mimeName.put("CSGB2312", "GB2312");
_javaName.put("GB2312", "GB2312");
_mimeName.put("GBK", "GBK");
_javaName.put("GBK", "GBK");
_mimeName.put("BIG5", "Big5");
_mimeName.put("BIG-5", "Big5");
_mimeName.put("CSBIG5", "Big5");
_javaName.put("Big5", "BIG5");
_mimeName.put("KOI8-R", "KOI8-R");
_mimeName.put("KOI-8-R", "KOI8-R");
_mimeName.put("KOI8-R", "KOI8-R");
_javaName.put("KOI8-R", "KOI8-R");
_mimeName.put("MS950", "ms950");
_javaName.put("ms950", "MS950");
_javaName.put("JAVA", "JAVA");
_mimeName.put("windows-hack", "ISO-8859-1");
_mimeName.put("WINDOWS-HACK", "ISO-8859-1");
_javaName.put("WINDOWS-HACK", "WindowsHack");
// #4180
_mimeName.put("MACROMAN", "utf-8");
_javaName.put("MacRoman", "utf-8");
_mimeName.put("KS_C_5601-1987", "ks_c_5601-1987");
_javaName.put("ks_c_5601-1987", "Cp949");
_javaName.put("IBM500", "Cp500");
String []cp = new String[] {
"037", "1006", "1025", "1026", "1046", "1097",
"1098", "1112", "1122", "1123", "1124", "1250",
"1251", "1252", "1253", "1254", "1255", "1256",
"1257", "1258", "1381", "273", "277", "278", "280", "284",
"285", "297", "33722", "420", "424", "437", "500", "737",
"775", "838", "850", "852", "855", "857", "860", "861", "862",
"863", "864", "865", "866", "868", "869", "870", "871", "874",
"875", "918", "921", "922", "930", "933", "935", "937", "939",
"942", "948", "949", "964", "970"
};
for (int i = 0; i < cp.length; i++) {
_mimeName.put("CP" + cp[i], "windows-" + cp[i]);
_mimeName.put("WINDOWS-" + cp[i], "windows-" + cp[i]);
_javaName.put("windows-" + cp[i], "Cp" + cp[i]);
}
// from http://www.w3c.org/International/O-charset-lang.html
_localeName = new ConcurrentHashMap();
_localeName.put("af", "ISO-8859-1");
_localeName.put("sq", "ISO-8859-1");
_localeName.put("ar", "ISO-8859-6");
_localeName.put("eu", "ISO-8859-1");
_localeName.put("bg", "ISO-8859-5");
_localeName.put("be", "ISO-8859-5");
_localeName.put("ca", "ISO-8859-1");
_localeName.put("hr", "ISO-8859-2");
_localeName.put("cs", "ISO-8859-2");
_localeName.put("da", "ISO-8859-1");
_localeName.put("nl", "ISO-8859-1");
_localeName.put("en", "ISO-8859-1");
_localeName.put("eo", "ISO-8859-3");
_localeName.put("et", "ISO-8859-4");
_localeName.put("fo", "ISO-8859-1");
_localeName.put("fi", "ISO-8859-1");
_localeName.put("fr", "ISO-8859-1");
_localeName.put("gl", "ISO-8859-1");
_localeName.put("de", "ISO-8859-1");
_localeName.put("el", "ISO-8859-7");
_localeName.put("iw", "ISO-8859-8");
_localeName.put("hu", "ISO-8859-2");
_localeName.put("is", "ISO-8859-1");
_localeName.put("ga", "ISO-8859-1");
_localeName.put("it", "ISO-8859-1");
_localeName.put("ja", "Shift_JIS");
_localeName.put("lv", "ISO-8859-4");
_localeName.put("lt", "ISO-8859-4");
_localeName.put("mk", "ISO-8859-5");
_localeName.put("mt", "ISO-8859-3");
_localeName.put("no", "ISO-8859-1");
_localeName.put("pl", "ISO-8859-2");
_localeName.put("pt", "ISO-8859-1");
_localeName.put("ro", "ISO-8859-2");
// _localeName.put("ru", "KOI8-R");
_localeName.put("ru", "ISO-8859-5");
_localeName.put("gd", "ISO-8859-1");
_localeName.put("sr", "ISO-8859-5");
_localeName.put("sk", "ISO-8859-2");
_localeName.put("sl", "ISO-8859-2");
_localeName.put("es", "ISO-8859-1");
_localeName.put("sv", "ISO-8859-1");
_localeName.put("tr", "ISO-8859-9");
_localeName.put("uk", "ISO-8859-5");
_localeName.put("ko", "EUC-KR");
_localeName.put("zh", "GB2312");
_localeName.put("zh_TW", "Big5");
}
}