All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.neovisionaries.i18n.ScriptCode Maven / Gradle / Ivy

/*
 * Copyright (C) 2013-2014 Neo Visionaries Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.neovisionaries.i18n;


import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;


/**
 * ISO 15924 script code.
 *
 * @since 1.2
 * @author Takahiko Kawasaki
 */
public enum ScriptCode
{

    /**
     * Undefined [-1]
     *
     * 

* This is not an official ISO 15924 code. *

* * @since 1.14 * @see #Zxxx Zxxx: 997 Code for unwritten languages * @see #Zyyy Zyyy: 998 Code for undetermined script * @see #Zzzz Zzzz: 999 Code for uncoded script */ Undefined(-1, "Undefined"), /** * Afaka [439] */ Afak(439, "Afaka"), /** * Caucasian Albanian [239] */ Aghb(239, "Caucasian Albanian"), /** * Arabic [160] */ Arab(160, "Arabic"), /** * Imperial Aramaic [124] */ Armi(124, "Imperial Aramaic"), /** * Armenian [230] */ Armn(230, "Armenian"), /** * Avestan [134] */ Avst(134, "Avestan"), /** * Balinese [360] */ Bali(360, "Balinese"), /** * Bamum [435] */ Bamu(435, "Bamum"), /** * Bassa Vah [259] */ Bass(259, "Bassa Vah"), /** * Batak [365] */ Batk(365, "Batak"), /** * Bengali [325] */ Beng(325, "Bengali"), /** * Blissymbols [550] */ Blis(550, "Blissymbols"), /** * Bopomofo [285] */ Bopo(285, "Bopomofo"), /** * Brahmi [300] */ Brah(300, "Brahmi"), /** * Braille [570] */ Brai(570, "Braille"), /** * Buginese [367] */ Bugi(367, "Buginese"), /** * Buhid [372] */ Buhd(372, "Buhid"), /** * Chakma [349] */ Cakm(349, "Chakma"), /** * Unified Canadian Aboriginal Syllabics [440] */ Cans(440, "Unified Canadian Aboriginal Syllabics"), /** * Carian [201] */ Cari(201, "Carian"), /** * Cham [358] */ Cham(358, "Cham"), /** * Cherokee [445] */ Cher(445, "Cherokee"), /** * Cirth [291] */ Cirt(291, "Cirth"), /** * Coptic [204] */ Copt(204, "Coptic"), /** * Cypriot [403] */ Cprt(403, "Cypriot"), /** * Cyrillic [220] */ Cyrl(220, "Cyrillic"), /** * Cyrillic [221] */ Cyrs(221, "Cyrillic"), /** * Devanagari [315] */ Deva(315, "Devanagari"), /** * Deseret [250] */ Dsrt(250, "Deseret"), /** * Duployan shorthand, Duployan stenography [755] */ Dupl(755, "Duployan shorthand, Duployan stenography"), /** * Egyptian demotic [070] */ Egyd(070, "Egyptian demotic"), /** * Egyptian hieratic [060] */ Egyh(060, "Egyptian hieratic"), /** * Egyptian hieroglyphs [050] */ Egyp(050, "Egyptian hieroglyphs"), /** * Elbasan [226] */ Elba(226, "Elbasan"), /** * Ethiopic [430] */ Ethi(430, "Ethiopic"), /** * Georgian [240] */ Geor(240, "Georgian"), /** * Khutsuri [241] */ Geok(241, "Khutsuri"), /** * Glagolitic [225] */ Glag(225, "Glagolitic"), /** * Gothic [206] */ Goth(206, "Gothic"), /** * Grantha [343] */ Gran(343, "Grantha"), /** * Greek [200] */ Grek(200, "Greek"), /** * Gujarati [320] */ Gujr(320, "Gujarati"), /** * Gurmukhi [310] */ Guru(310, "Gurmukhi"), /** * Hangul [286] */ Hang(286, "Hangul"), /** * Han [500] */ Hani(500, "Han"), /** * Hanunoo [371] */ Hano(371, "Hanunoo"), /** * Han [501] */ Hans(501, "Han"), /** * Han [502] */ Hant(502, "Han"), /** * Hebrew [125] */ Hebr(125, "Hebrew"), /** * Hiragana [410] */ Hira(410, "Hiragana"), /** * Anatolian Hieroglyphs [080] */ Hluw(80, "Anatolian Hieroglyphs"), /** * Pahawh Hmong [450] */ Hmng(450, "Pahawh Hmong"), /** * Japanese syllabaries [412] */ Hrkt(412, "Japanese syllabaries"), /** * Old Hungarian [176] */ Hung(176, "Old Hungarian"), /** * Indus [610] */ Inds(610, "Indus"), /** * Old Italic [210] */ Ital(210, "Old Italic"), /** * Javanese [361] */ Java(361, "Javanese"), /** * Japanese [413] */ Jpan(413, "Japanese"), /** * Jurchen [510] */ Jurc(510, "Jurchen"), /** * Kayah Li [357] */ Kali(357, "Kayah Li"), /** * Katakana [411] */ Kana(411, "Katakana"), /** * Kharoshthi [305] */ Khar(305, "Kharoshthi"), /** * Khmer [355] */ Khmr(355, "Khmer"), /** * Khojki [322] */ Khoj(322, "Khojki"), /** * Kannada [345] */ Knda(345, "Kannada"), /** * Korean [287] */ Kore(287, "Korean"), /** * Kpelle [436] */ Kpel(436, "Kpelle"), /** * Kaithi [317] */ Kthi(317, "Kaithi"), /** * Tai Tham [351] */ Lana(351, "Tai Tham"), /** * Lao [356] */ Laoo(356, "Lao"), /** * Latin [217] */ Latf(217, "Latin"), /** * Latin [216] */ Latg(216, "Latin"), /** * Latin [215] */ Latn(215, "Latin"), /** * Lepcha [335] */ Lepc(335, "Lepcha"), /** * Limbu [336] */ Limb(336, "Limbu"), /** * Linear A [400] */ Lina(400, "Linear A"), /** * Linear B [401] */ Linb(401, "Linear B"), /** * Lisu [399] */ Lisu(399, "Lisu"), /** * Loma [437] */ Loma(437, "Loma"), /** * Lycian [202] */ Lyci(202, "Lycian"), /** * Lydian [116] */ Lydi(116, "Lydian"), /** * Mahajani [314] */ Mahj(314, "Mahajani"), /** * Mandaic, Mandaean [140] */ Mand(140, "Mandaic, Mandaean"), /** * Manichaean [139] */ Mani(139, "Manichaean"), /** * Mayan hieroglyphs [090] */ Maya(90, "Mayan hieroglyphs"), /** * Mende [438] */ Mend(438, "Mende"), /** * Meroitic Cursive [101] */ Merc(101, "Meroitic Cursive"), /** * Meroitic Hieroglyphs [100] */ Mero(100, "Meroitic Hieroglyphs"), /** * Malayalam [347] */ Mlym(347, "Malayalam"), /** * Moon [218] */ Moon(218, "Moon"), /** * Mongolian [145] */ Mong(145, "Mongolian"), /** * Mro, Mru [199] */ Mroo(199, "Mro, Mru"), /** * Meitei Mayek [337] */ Mtei(337, "Meitei Mayek"), /** * Myanmar [350] */ Mymr(350, "Myanmar"), /** * Old North Arabian [106] */ Narb(106, "Old North Arabian"), /** * Nabataean [159] */ Nbat(159, "Nabataean"), /** * Nakhi Geba [420] */ Nkgb(420, "Nakhi Geba"), /** * N’Ko [165] */ Nkoo(165, "N’Ko"), /** * Nushu [499] */ Nshu(499, "Nushu"), /** * Ogham [212] */ Ogam(212, "Ogham"), /** * Ol Chiki [261] */ Olck(261, "Ol Chiki"), /** * Old Turkic, Orkhon Runic [175] */ Orkh(175, "Old Turkic, Orkhon Runic"), /** * Oriya [327] */ Orya(327, "Oriya"), /** * Osmanya [260] */ Osma(260, "Osmanya"), /** * Palmyrene [126] */ Palm(126, "Palmyrene"), /** * Old Permic [227] */ Perm(227, "Old Permic"), /** * Phags-pa [331] */ Phag(331, "Phags-pa"), /** * Inscriptional Pahlavi [131] */ Phli(131, "Inscriptional Pahlavi"), /** * Psalter Pahlavi [132] */ Phlp(132, "Psalter Pahlavi"), /** * Book Pahlavi [133] */ Phlv(133, "Book Pahlavi"), /** * Phoenician [115] */ Phnx(115, "Phoenician"), /** * Miao [282] */ Plrd(282, "Miao"), /** * Inscriptional Parthian [130] */ Prti(130, "Inscriptional Parthian"), /** * Reserved for private use [900] */ Qaaa(900, "Reserved for private use"), /** * Reserved for private use [949] */ Qabx(949, "Reserved for private use"), /** * Rejang [363] */ Rjng(363, "Rejang"), /** * Rongorongo [620] */ Roro(620, "Rongorongo"), /** * Runic [211] */ Runr(211, "Runic"), /** * Samaritan [123] */ Samr(123, "Samaritan"), /** * Sarati [292] */ Sara(292, "Sarati"), /** * Old South Arabian [105] */ Sarb(105, "Old South Arabian"), /** * Saurashtra [344] */ Saur(344, "Saurashtra"), /** * SignWriting [095] */ Sgnw(95, "SignWriting"), /** * Shavian [281] */ Shaw(281, "Shavian"), /** * Sharada [319] */ Shrd(319, "Sharada"), /** * Khudawadi, Sindhi [318] */ Sind(318, "Khudawadi, Sindhi"), /** * Sinhala [348] */ Sinh(348, "Sinhala"), /** * Sora Sompeng [398] */ Sora(398, "Sora Sompeng"), /** * Sundanese [362] */ Sund(362, "Sundanese"), /** * Syloti Nagri [316] */ Sylo(316, "Syloti Nagri"), /** * Syriac [135] */ Syrc(135, "Syriac"), /** * Syriac [138] */ Syre(138, "Syriac"), /** * Syriac [137] */ Syrj(137, "Syriac"), /** * Syriac [136] */ Syrn(136, "Syriac"), /** * Tagbanwa [373] */ Tagb(373, "Tagbanwa"), /** * Takri [321] */ Takr(321, "Takri"), /** * Tai Le [353] */ Tale(353, "Tai Le"), /** * New Tai Lue [354] */ Talu(354, "New Tai Lue"), /** * Tamil [346] */ Taml(346, "Tamil"), /** * Tangut [520] */ Tang(520, "Tangut"), /** * Tai Viet [359] */ Tavt(359, "Tai Viet"), /** * Telugu [340] */ Telu(340, "Telugu"), /** * Tengwar [290] */ Teng(290, "Tengwar"), /** * Tifinagh [120] */ Tfng(120, "Tifinagh"), /** * Tagalog [370] */ Tglg(370, "Tagalog"), /** * Thaana [170] */ Thaa(170, "Thaana"), /** * Thai [352] */ Thai(352, "Thai"), /** * Tibetan [330] */ Tibt(330, "Tibetan"), /** * Tirhuta [326] */ Tirh(326, "Tirhuta"), /** * Ugaritic [040] */ Ugar(040, "Ugaritic"), /** * Vai [470] */ Vaii(470, "Vai"), /** * Visible Speech [280] */ Visp(280, "Visible Speech"), /** * Warang Citi [262] */ Wara(262, "Warang Citi"), /** * Woleai [480] */ Wole(480, "Woleai"), /** * Old Persian [030] */ Xpeo(030, "Old Persian"), /** * Cuneiform, Sumero-Akkadian [020] */ Xsux(020, "Cuneiform, Sumero-Akkadian"), /** * Yi [460] */ Yiii(460, "Yi"), /** * Code for inherited script [994] */ Zinh(994, "Code for inherited script"), /** * Mathematical notation [995] */ Zmth(995, "Mathematical notation"), /** * Symbols [996] */ Zsym(996, "Symbols"), /** * Code for unwritten documents [997] */ Zxxx(997, "Code for unwritten documents"), /** * Code for undetermined script [998] */ Zyyy(998, "Code for undetermined script"), /** * Code for uncoded script [999] */ Zzzz(999, "Code for uncoded script") ; /** * Map to look up ScriptCode by numeric code. */ private static final Map numericMap = new HashMap(); static { for (ScriptCode sc : values()) { if (sc.getNumeric() != -1) { numericMap.put(sc.getNumeric(), sc); } } } /** * The numeric code of this script code. */ private final int numeric; /** * English name of this script code. */ private final String name; /** * Constructor. */ private ScriptCode(int numeric, String name) { this.numeric = numeric; this.name = name; } /** * Get the numeric code of this script code. * * @return * Numeric code. */ public int getNumeric() { return numeric; } /** * Get English name of this script code. * * @return * English name. */ public String getName() { return name; } /** * Get a {@code ScriptCode} instance that corresponds to the given * ISO 15924 alpha-4 code. * *

* This method calls {@link #getByCode(String, boolean) getByCode}{@code (code, true)}. * Note that the behavior has changed since the version 1.13. In the older versions, * this method was an alias of {@code getByCode(code, false)}. *

* * @param code * ISO 15924 alpha-4 code. Or "Undefined" (case sensitive). * * @return * A {@code ScriptCode} instance, or {@code null} if not found. */ public static ScriptCode getByCode(String code) { return getByCode(code, true); } /** * Get a {@code ScriptCode} instance that corresponds to the given * ISO 15924 alpha-4 code. * *

* This method calls {@link #getByCode(String, boolean) getByCode}{@code (code, false)}. *

* * @param code * ISO 15924 alpha-4 code. Or "Undefined" (case insensitive). * * @return * A {@code ScriptCode} instance, or {@code null} if not found. * * @since 1.13 */ public static ScriptCode getByCodeIgnoreCase(String code) { return getByCode(code, false); } /** * Get a {@code ScriptCode} instance that corresponds to the given * ISO 15924 alpha-4 code. * * @param code * ISO 15924 alpha-4 code. Or "Undefined" (its case sensitivity * depends on the value of {@code caseSensitive}). * * @param caseSensitive * If {@code true}, the first letter of the given code should be * capital and the other letters should be small. If {@code false}, * whether letters are capital or small does not matter. * For example, {@code getByCode("JPAN", true)} returns * {@code null} but {@code getByCode("JPAN", false)} returns * {@link #Jpan}. * * @return * A {@code ScriptCode} instance, or {@code null} if not found. */ public static ScriptCode getByCode(String code, boolean caseSensitive) { if (code == null) { return null; } switch (code.length()) { case 4: case 9: break; default: return null; } code = canonicalize(code, caseSensitive); try { return Enum.valueOf(ScriptCode.class, code); } catch (IllegalArgumentException e) { return null; } } /** * Get a {@code ScriptCode} instance that corresponds to the given * ISO 15924 numeric code. * * @param code * ISO 15924 numeric code. * * @return * A {@code ScriptCode} instance, or {@code null} if not found. * If 0 or a negative value is given, {@code null} is returned. */ public static ScriptCode getByCode(int code) { if (code <= 0) { return null; } return numericMap.get(code); } private static String canonicalize(String code, boolean caseSensitive) { if (code == null || code.length() == 0) { return null; } if (caseSensitive) { return code; } // A new instance is assigned to this variable // if modification is needed. StringBuilder sb = null; for (int i = 0; i < code.length(); ++i) { char ch = code.charAt(i); // The first letter. if (i == 0) { if (Character.isUpperCase(ch) == false) { // Modification is needed. sb = new StringBuilder(); sb.append(Character.toUpperCase(ch)); } } // The second and subsequent letters. else { if (sb == null) { if (Character.isLowerCase(ch) == false) { // Modification is needed. sb = new StringBuilder(); // Copy all the previous letters so far. sb.append(code.substring(0, i)); // Lower the current letter. sb.append(Character.toLowerCase(ch)); } } else { sb.append(Character.toLowerCase(ch)); } } } if (sb == null) { return code; } else { return sb.toString(); } } /** * Get a list of {@code ScriptCode} by a name regular expression. * *

* This method is almost equivalent to {@link #findByName(Pattern) * findByName}{@code (Pattern.compile(regex))}. *

* * @param regex * Regular expression for names. * * @return * List of {@code ScriptCode}. If nothing has matched, * an empty list is returned. * * @throws IllegalArgumentException * {@code regex} is {@code null}. * * @throws java.util.regex.PatternSyntaxException * {@code regex} failed to be compiled. * * @since 1.11 */ public static List findByName(String regex) { if (regex == null) { throw new IllegalArgumentException("regex is null."); } // Compile the regular expression. This may throw // java.util.regex.PatternSyntaxException. Pattern pattern = Pattern.compile(regex); return findByName(pattern); } /** * Get a list of {@code ScriptCode} by a name pattern. * *

* For example, the list obtained by the code snippet below: *

* *
     * Pattern pattern = Pattern.compile("Egyptian.*");
     * List<ScriptCode> list = ScriptCode.findByName(pattern);
* *

* contains 3 {@code ScriptCode}s as listed below. *

* *
    *
  1. {@link #Egyd} : Egyptian demotic *
  2. {@link #Egyh} : Egyptian hieratic *
  3. {@link #Egyp} : Egyptian hieroglyps *
* * @param pattern * Pattern to match names. * * @return * List of {@code ScriptCode}. If nothing has matched, * an empty list is returned. * * @throws IllegalArgumentException * {@code pattern} is {@code null}. * * @since 1.11 */ public static List findByName(Pattern pattern) { if (pattern == null) { throw new IllegalArgumentException("pattern is null."); } List list = new ArrayList(); for (ScriptCode entry : values()) { // If the name matches the given pattern. if (pattern.matcher(entry.getName()).matches()) { list.add(entry); } } return list; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy