All Downloads are FREE. Search and download functionalities are using the official Maven repository.

panda.lang.Charsets Maven / Gradle / Ivy

package panda.lang;

import java.nio.charset.Charset;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

/**
 * Character encoding names required of every implementation of the Java platform.
 *
 * From the Java documentation Standard charsets:
 * 

* Every implementation of the Java platform is required to support the following character encodings. Consult the * release documentation for your implementation to see if any other encodings are supported. Consult the release * documentation for your implementation to see if any other encodings are supported. *

* *
    *
  • US-ASCII
    * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.
  • *
  • ISO-8859-1
    * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.
  • *
  • UTF-8
    * Eight-bit Unicode Transformation Format.
  • *
  • UTF-16BE
    * Sixteen-bit Unicode Transformation Format, big-endian byte order.
  • *
  • UTF-16LE
    * Sixteen-bit Unicode Transformation Format, little-endian byte order.
  • *
  • UTF-16
    * Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order * accepted on input, big-endian used on output.)
  • *
* * This perhaps would best belong in the [lang] project. Even if a similar interface is defined in [lang], it is not * foreseen that [codec] would be made to depend on [lang]. * * @see Standard charsets * */ public class Charsets { /** * Japanese charsets.

*

* Every implementation of the Java platform is required to support this character encoding. *

* * @see Standard * charsets */ public static final String MS932 = "MS932"; public static final String Shift_JIS = "Shift_JIS"; public static final String EUC_JP = "EUC-JP"; /** * Chinese charsets.

*

* Every implementation of the Java platform is required to support this character encoding. *

* * @see Standard * charsets */ public static final String GB2312 = "GB2312"; public static final String GBK = "GBK"; public static final String BIG5 = "Big5"; /** * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.

*

* Every implementation of the Java platform is required to support this character encoding. *

* * @see Standard * charsets */ public static final String ISO_8859_1 = "ISO-8859-1"; /** *

* Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode * character set. *

*

* Every implementation of the Java platform is required to support this character encoding. *

* * @see Standard * charsets */ public static final String US_ASCII = "US-ASCII"; /** *

* Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial * byte-order mark (either order accepted on input, big-endian used on output) *

*

* Every implementation of the Java platform is required to support this character encoding. *

* * @see Standard * charsets */ public static final String UTF_16 = "UTF-16"; /** *

* Sixteen-bit Unicode Transformation Format, big-endian byte order. *

*

* Every implementation of the Java platform is required to support this character encoding. *

* * @see Standard * charsets */ public static final String UTF_16BE = "UTF-16BE"; /** *

* Sixteen-bit Unicode Transformation Format, little-endian byte order. *

*

* Every implementation of the Java platform is required to support this character encoding. *

* * @see Standard * charsets */ public static final String UTF_16LE = "UTF-16LE"; public static final String UTF_32BE = "UTF-32BE"; public static final String UTF_32LE = "UTF-32LE"; /** *

* Eight-bit Unicode Transformation Format. *

*

* Every implementation of the Java platform is required to support this character encoding. *

* * @see Standard * charsets */ public static final String UTF_8 = "UTF-8"; /** * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. *

* Every implementation of the Java platform is required to support this character encoding. * * @see Standard * charsets */ public static final Charset CS_ISO_8859_1 = Charset.forName(ISO_8859_1); /** * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode * character set. *

* Every implementation of the Java platform is required to support this character encoding. * * @see Standard * charsets */ public static final Charset CS_US_ASCII = Charset.forName(US_ASCII); /** * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial * byte-order mark (either order accepted on input, big-endian used on output) *

* Every implementation of the Java platform is required to support this character encoding. * * @see Standard * charsets */ public static final Charset CS_UTF_16 = Charset.forName(UTF_16); /** * Sixteen-bit Unicode Transformation Format, big-endian byte order. *

* Every implementation of the Java platform is required to support this character encoding. * * @see Standard * charsets */ public static final Charset CS_UTF_16BE = Charset.forName(UTF_16BE); /** * Sixteen-bit Unicode Transformation Format, little-endian byte order. *

* Every implementation of the Java platform is required to support this character encoding. * * @see Standard * charsets */ public static final Charset CS_UTF_16LE = Charset.forName(UTF_16LE); public static final Charset CS_UTF_32BE = Charset.forName(UTF_32BE); public static final Charset CS_UTF_32LE = Charset.forName(UTF_32LE); /** * Eight-bit Unicode Transformation Format. *

* Every implementation of the Java platform is required to support this character encoding. * * @see Standard * charsets */ public static final Charset CS_UTF_8 = Charset.forName(UTF_8); /** * Returns the given Charset or the default Charset if the given Charset is null. * * @param charset A charset or null. * @return the given Charset or the default Charset if the given Charset is null */ public static Charset toCharset(final Charset charset) { return charset == null ? Charset.defaultCharset() : charset; } /** * Returns a Charset for the named charset. If the name is null, return the default Charset. * * @param charset The name of the requested charset, may be null. * @return a Charset for the named charset * @throws java.nio.charset.UnsupportedCharsetException If the named charset is unavailable */ public static Charset toCharset(final String charset) { return charset == null ? Charset.defaultCharset() : Charset.forName(charset); } /** * Returns a Charset for the named charset. If the name is null, return the default Charset. * * @param charset The name of the requested charset, may be null. * @param defCharset Default charset value * @return a Charset for the named charset */ public static Charset toCharset(final String charset, Charset defCharset) { if (defCharset == null) { defCharset = Charset.defaultCharset(); } if (Strings.isNotEmpty(charset)) { try { return Charset.forName(charset); } catch (Exception e) { //skip } } return defCharset; } private final static Map charsetMap = new ConcurrentHashMap(); static { loadBuiltInCharsetMap(); } /** * Loads a preset language-to-encoding map. It assumes the usual character encodings for most * languages. The previous content of the encoding map will be lost. This default map currently * contains the following mappings: *

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
arISO-8859-6
beISO-8859-5
bgISO-8859-5
caISO-8859-1
csISO-8859-2
daISO-8859-1
deISO-8859-1
elISO-8859-7
enISO-8859-1
esISO-8859-1
etISO-8859-1
fiISO-8859-1
frISO-8859-1
hrISO-8859-2
huISO-8859-2
isISO-8859-1
itISO-8859-1
iwISO-8859-8
jaShift_JIS
koEUC-KR
ltISO-8859-2
lvISO-8859-2
mkISO-8859-5
nlISO-8859-1
noISO-8859-1
plISO-8859-2
ptISO-8859-1
roISO-8859-2
ruISO-8859-5
shISO-8859-5
skISO-8859-2
slISO-8859-2
sqISO-8859-2
srISO-8859-5
svISO-8859-1
trISO-8859-9
ukISO-8859-5
zhGB2312
zh_TWBig5
*/ public static void loadBuiltInCharsetMap() { charsetMap.clear(); charsetMap.put("ar", "ISO-8859-6"); charsetMap.put("be", "ISO-8859-5"); charsetMap.put("bg", "ISO-8859-5"); charsetMap.put("ca", "ISO-8859-1"); charsetMap.put("cs", "ISO-8859-2"); charsetMap.put("da", "ISO-8859-1"); charsetMap.put("de", "ISO-8859-1"); charsetMap.put("el", "ISO-8859-7"); charsetMap.put("en", "ISO-8859-1"); charsetMap.put("es", "ISO-8859-1"); charsetMap.put("et", "ISO-8859-1"); charsetMap.put("fi", "ISO-8859-1"); charsetMap.put("fr", "ISO-8859-1"); charsetMap.put("hr", "ISO-8859-2"); charsetMap.put("hu", "ISO-8859-2"); charsetMap.put("is", "ISO-8859-1"); charsetMap.put("it", "ISO-8859-1"); charsetMap.put("iw", "ISO-8859-8"); charsetMap.put("ja", "Shift_JIS"); charsetMap.put("ko", "EUC-KR"); charsetMap.put("lt", "ISO-8859-2"); charsetMap.put("lv", "ISO-8859-2"); charsetMap.put("mk", "ISO-8859-5"); charsetMap.put("nl", "ISO-8859-1"); charsetMap.put("no", "ISO-8859-1"); charsetMap.put("pl", "ISO-8859-2"); charsetMap.put("pt", "ISO-8859-1"); charsetMap.put("ro", "ISO-8859-2"); charsetMap.put("ru", "ISO-8859-5"); charsetMap.put("sh", "ISO-8859-5"); charsetMap.put("sk", "ISO-8859-2"); charsetMap.put("sl", "ISO-8859-2"); charsetMap.put("sq", "ISO-8859-2"); charsetMap.put("sr", "ISO-8859-5"); charsetMap.put("sv", "ISO-8859-1"); charsetMap.put("tr", "ISO-8859-9"); charsetMap.put("uk", "ISO-8859-5"); charsetMap.put("zh", "GB2312"); charsetMap.put("zh_TW", "Big5"); } /** * Clears language-to-encoding map. * * @see #loadBuiltInCharsetMap * @see #setCharset */ public static void clearCharsetMap() { charsetMap.clear(); } /** * Sets the character set encoding to use for templates of a given locale. * * @param locale locale * @param encoding encoding * @see #clearCharsetMap * @see #loadBuiltInCharsetMap */ public static void setCharset(Locale locale, String encoding) { charsetMap.put(locale.toString(), encoding); } /** * Gets the preferred character encoding for the given locale, or the default encoding if no * encoding is set explicitly for the specified locale. You can associate encodings with locales * using {@link #setCharset(Locale, String)} or {@link #loadBuiltInCharsetMap()}. * * @param loc the locale * @return the preferred character encoding for the locale. */ public static String charsetFromLocale(Locale loc) { // Try for a full name match (may include country and variant) String charset = (String)charsetMap.get(loc.toString()); if (charset == null) { if (loc.getVariant().length() > 0) { Locale l = new Locale(loc.getLanguage(), loc.getCountry()); charset = (String)charsetMap.get(l.toString()); if (charset != null) { charsetMap.put(loc.toString(), charset); } } charset = (String)charsetMap.get(loc.getLanguage()); if (charset != null) { charsetMap.put(loc.toString(), charset); } } return charset; } /** * is the specified charset name a unicode charset? * * @param charset charset * @return true/false */ public static boolean isUnicodeCharset(String charset) { return Strings.startsWithIgnoreCase(charset, "UTF-"); } public static boolean isSupportedCharset(String charset) { if (Strings.isNotEmpty(charset)) { return Charset.isSupported(charset); } return false; } public static Charset defaultCharset() { return Charset.defaultCharset(); } public static Charset defaultCharset(Charset cs) { return cs == null ? defaultCharset() : cs; } public static Charset defaultCharset(Charset cs, Charset def) { return cs == null ? def : cs; } public static String defaultEncoding() { return Charset.defaultCharset().name(); } public static String defaultEncoding(String enc) { return enc == null ? defaultEncoding() : enc; } public static String defaultEncoding(String enc, String def) { return enc == null ? def : enc; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy