All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.basistech.util.LanguageCode Maven / Gradle / Ivy

There is a newer version: 38.0.3
Show newest version
/*
* Copyright 2014 Basis Technology Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

//CHECKSTYLE:OFF
/*

 *  DO NOT EDIT THIS FILE    /Users/benson/x/rosette-common-java-api/target/checkout/api/target/generated-sources/com/basistech/util/LanguageCode.java

 *  It has been AutoGen-ed   Thu Feb  4 10:51:06 2016

 *  and the template file    /Users/benson/x/rosette-common-java-api/target/checkout/api/src/main/templates/LanguageCode.java.tpl

 */
package com.basistech.util;

import java.util.Map;
import java.util.HashMap;

/**
Enumeration for a set of ISO 639-based language codes used in Basis products.

LanguageCodes are based on the Feb 10, 2009 version of ISO 639-3. A LanguageCode is either standard, meaning that it is based on an ISO 639-3 language code, or nonstandard, meaning that it is a Basis extension. The nonstandard LanguageCodes are:
  • {@link #UNKNOWN UNKNOWN}
  • {@link #SIMPLIFIED_CHINESE SIMPLIFIED_CHINESE}
  • {@link #TRADITIONAL_CHINESE TRADITIONAL_CHINESE}
  • {@link #ENGLISH_UPPERCASE ENGLISH_UPPERCASE}
LanguageCodes have the following attributes.
  • {@linkplain #languageName() Name}: An ASCII, English name for the language. For standard LanguageCodes, it is based on the ISO 639-3 reference name field; in some cases it is a simplified version of the field. No two LanguageCodes have the same value of this attribute.
  • {@linkplain #ISO639_3() ISO 639-3 code}: For standard LanguageCodes, it is a three-letter ISO 639-3 code. For nonstandard LanguageCodes, it is a three-letter code different from any ISO 639-3 code. No two LanguageCodes have the same value of this attribute.
  • {@linkplain #ISO639_1() ISO 639-1 code}: In the ISO 639-3 specification, all languages have a three-letter code, and some languages also have a two-letter ISO 639-1 code. For standard LanguageCodes, this attribute is either a two-letter ISO 639-1 code, or {@link #UNCODED_ISO639_1 UNCODED_ISO639_1} ("zz"). For nonstandard LanguageCodes, this attribute is either "xx" for {@link #UNKNOWN UNKNOWN}, or a five-letter code of the form ab_cde. Except for {@link #UNCODED_ISO639_1 UNCODED_ISO639_1}, no two LanguageCodes have the same value of this attribute.
  • {@linkplain #getDefaultScript() Default script}: For languages predominantly written in only one script, this attribute is that script. For other languages, it is {@link ISO15924#Zyyy}. This mapping from languages to scripts is provided by Basis; it does not directly correspond to any ISO data.
  • {@linkplain #languageID() Numeric ID}: A unique integer for each LanguageCode. It is not necessarily the same value as the position of the LanguageCode in the result of {@link #values() values()}.
*/ public enum LanguageCode { /**
Unknown xxx xx Zyyy 0
*/ UNKNOWN (0, "xxx", "xx", "Unknown", ISO15924.Zyyy), /**
Afrikaans afr af Latn 75
*/ AFRIKAANS (75, "afr", "af", "Afrikaans", ISO15924.Latn), /**
Albanian sqi sq Latn 1
*/ ALBANIAN (1, "sqi", "sq", "Albanian", ISO15924.Latn), /**
Amharic amh am Ethi 62
*/ AMHARIC (62, "amh", "am", "Amharic", ISO15924.Ethi), /**
Arabic ara ar Arab 2
*/ ARABIC (2, "ara", "ar", "Arabic", ISO15924.Arab), /**
Bengali ben bn Beng 3
*/ BENGALI (3, "ben", "bn", "Bengali", ISO15924.Beng), /**
Bulgarian bul bg Cyrl 4
*/ BULGARIAN (4, "bul", "bg", "Bulgarian", ISO15924.Cyrl), /**
Catalan cat ca Latn 5
*/ CATALAN (5, "cat", "ca", "Catalan", ISO15924.Latn), /**
Chinese zho zh Hani 6
*/ CHINESE (6, "zho", "zh", "Chinese", ISO15924.Hani), /**
Croatian hrv hr Latn 7
*/ CROATIAN (7, "hrv", "hr", "Croatian", ISO15924.Latn), /**
Czech ces cs Latn 8
*/ CZECH (8, "ces", "cs", "Czech", ISO15924.Latn), /**
Danish dan da Latn 9
*/ DANISH (9, "dan", "da", "Danish", ISO15924.Latn), /**
Dari prs zz Arab 60
*/ DARI (60, "prs", "zz", "Dari", ISO15924.Arab), /**
Dutch nld nl Latn 10
*/ DUTCH (10, "nld", "nl", "Dutch", ISO15924.Latn), /**
English eng en Latn 11
*/ ENGLISH (11, "eng", "en", "English", ISO15924.Latn), /**
English Uppercase uen en_uc Latn 59
*/ ENGLISH_UPPERCASE (59, "uen", "en_uc", "English Uppercase", ISO15924.Latn), /**
Estonian est et Latn 12
*/ ESTONIAN (12, "est", "et", "Estonian", ISO15924.Latn), /**
Finnish fin fi Latn 13
*/ FINNISH (13, "fin", "fi", "Finnish", ISO15924.Latn), /**
French fra fr Latn 14
*/ FRENCH (14, "fra", "fr", "French", ISO15924.Latn), /**
German deu de Latn 15
*/ GERMAN (15, "deu", "de", "German", ISO15924.Latn), /**
Greek ell el Grek 16
*/ GREEK (16, "ell", "el", "Greek", ISO15924.Grek), /**
Gujarati guj gu Gujr 17
*/ GUJARATI (17, "guj", "gu", "Gujarati", ISO15924.Gujr), /**
Hebrew heb he Hebr 18
*/ HEBREW (18, "heb", "he", "Hebrew", ISO15924.Hebr), /**
Hindi hin hi Deva 19
*/ HINDI (19, "hin", "hi", "Hindi", ISO15924.Deva), /**
Hungarian hun hu Latn 20
*/ HUNGARIAN (20, "hun", "hu", "Hungarian", ISO15924.Latn), /**
Icelandic isl is Latn 21
*/ ICELANDIC (21, "isl", "is", "Icelandic", ISO15924.Latn), /**
Indonesian ind id Latn 22
*/ INDONESIAN (22, "ind", "id", "Indonesian", ISO15924.Latn), /**
Italian ita it Latn 23
*/ ITALIAN (23, "ita", "it", "Italian", ISO15924.Latn), /**
Japanese jpn ja Hani 24
*/ JAPANESE (24, "jpn", "ja", "Japanese", ISO15924.Hani), /**
Kannada kan kn Knda 25
*/ KANNADA (25, "kan", "kn", "Kannada", ISO15924.Knda), /**
Kinyarwanda kin rw Latn 67
*/ KINYARWANDA (67, "kin", "rw", "Kinyarwanda", ISO15924.Latn), /**
Korean kor ko Hang 26
*/ KOREAN (26, "kor", "ko", "Korean", ISO15924.Hang), /**
Kurdish kur ku Arab 27
*/ KURDISH (27, "kur", "ku", "Kurdish", ISO15924.Arab), /**
Latvian lav lv Latn 28
*/ LATVIAN (28, "lav", "lv", "Latvian", ISO15924.Latn), /**
Lithuanian lit lt Latn 29
*/ LITHUANIAN (29, "lit", "lt", "Lithuanian", ISO15924.Latn), /**
Macedonian mkd mk Cyrl 30
*/ MACEDONIAN (30, "mkd", "mk", "Macedonian", ISO15924.Cyrl), /**
Malagasy mlg mg Latn 63
*/ MALAGASY (63, "mlg", "mg", "Malagasy", ISO15924.Latn), /**
Malay msa ms Latn 31
*/ MALAY (31, "msa", "ms", "Malay", ISO15924.Latn), /**
Malay, Standard zsm ms_sd Latn 83
*/ STANDARD_MALAY (83, "zsm", "ms_sd", "Malay, Standard", ISO15924.Latn), /**
Malayalam mal ml Mlym 32
*/ MALAYALAM (32, "mal", "ml", "Malayalam", ISO15924.Mlym), /**
Norwegian nor no Latn 33
*/ NORWEGIAN (33, "nor", "no", "Norwegian", ISO15924.Latn), /**
Norwegian Bokmal nob nb Latn 34
*/ NORWEGIAN_BOKMAL (34, "nob", "nb", "Norwegian Bokmal", ISO15924.Latn), /**
Norwegian Nynorsk nno nn Latn 35
*/ NORWEGIAN_NYNORSK (35, "nno", "nn", "Norwegian Nynorsk", ISO15924.Latn), /**
Nyanja nya ny Latn 65
*/ NYANJA (65, "nya", "ny", "Nyanja", ISO15924.Latn), /**
Pedi nso zz Latn 78
*/ PEDI (78, "nso", "zz", "Pedi", ISO15924.Latn), /**
Persian fas fa Arab 37
*/ PERSIAN (37, "fas", "fa", "Persian", ISO15924.Arab), /**
Plateau Malagasy plt zz Latn 64
*/ PLATEAU_MALAGASY (64, "plt", "zz", "Plateau Malagasy", ISO15924.Latn), /**
Polish pol pl Latn 38
*/ POLISH (38, "pol", "pl", "Polish", ISO15924.Latn), /**
Portuguese por pt Latn 39
*/ PORTUGUESE (39, "por", "pt", "Portuguese", ISO15924.Latn), /**
Pushto pus ps Arab 36
*/ PUSHTO (36, "pus", "ps", "Pushto", ISO15924.Arab), /**
Romanian ron ro Latn 40
*/ ROMANIAN (40, "ron", "ro", "Romanian", ISO15924.Latn), /**
Rundi run rn Latn 66
*/ RUNDI (66, "run", "rn", "Rundi", ISO15924.Latn), /**
Russian rus ru Cyrl 41
*/ RUSSIAN (41, "rus", "ru", "Russian", ISO15924.Cyrl), /**
Sango sag sg Latn 68
*/ SANGO (68, "sag", "sg", "Sango", ISO15924.Latn), /**
Serbian srp sr Zyyy 42
*/ SERBIAN (42, "srp", "sr", "Serbian", ISO15924.Zyyy), /**
Seselwa Creole French crs zz Latn 69
*/ SESELWA_CREOLE_FRENCH (69, "crs", "zz", "Seselwa Creole French", ISO15924.Latn), /**
Shona sna sn Latn 73
*/ SHONA (73, "sna", "sn", "Shona", ISO15924.Latn), /**
Chinese, Simplified zhs zh_sc Hans 43
*/ SIMPLIFIED_CHINESE (43, "zhs", "zh_sc", "Chinese, Simplified", ISO15924.Hans), /**
Slovak slk sk Latn 44
*/ SLOVAK (44, "slk", "sk", "Slovak", ISO15924.Latn), /**
Slovenian slv sl Latn 45
*/ SLOVENIAN (45, "slv", "sl", "Slovenian", ISO15924.Latn), /**
Somali som so Latn 46
*/ SOMALI (46, "som", "so", "Somali", ISO15924.Latn), /**
Southern Sotho sot st Latn 79
*/ SOUTHERN_SOTHO (79, "sot", "st", "Southern Sotho", ISO15924.Latn), /**
South Ndebele nbl nr Latn 77
*/ SOUTH_NDEBELE (77, "nbl", "nr", "South Ndebele", ISO15924.Latn), /**
Spanish spa es Latn 47
*/ SPANISH (47, "spa", "es", "Spanish", ISO15924.Latn), /**
Swahili swa sw Latn 70
*/ SWAHILI (70, "swa", "sw", "Swahili", ISO15924.Latn), /**
Swati ssw ss Latn 71
*/ SWATI (71, "ssw", "ss", "Swati", ISO15924.Latn), /**
Swedish swe sv Latn 48
*/ SWEDISH (48, "swe", "sv", "Swedish", ISO15924.Latn), /**
Tagalog tgl tl Latn 49
*/ TAGALOG (49, "tgl", "tl", "Tagalog", ISO15924.Latn), /**
Tamil tam ta Taml 50
*/ TAMIL (50, "tam", "ta", "Tamil", ISO15924.Taml), /**
Telugu tel te Telu 51
*/ TELUGU (51, "tel", "te", "Telugu", ISO15924.Telu), /**
Thai tha th Thai 52
*/ THAI (52, "tha", "th", "Thai", ISO15924.Thai), /**
Tigrinya tir ti Ethi 72
*/ TIGRINYA (72, "tir", "ti", "Tigrinya", ISO15924.Ethi), /**
Chinese, Traditional zht zh_tc Hant 53
*/ TRADITIONAL_CHINESE (53, "zht", "zh_tc", "Chinese, Traditional", ISO15924.Hant), /**
Tsonga tso ts Latn 82
*/ TSONGA (82, "tso", "ts", "Tsonga", ISO15924.Latn), /**
Tswana tsn tn Latn 80
*/ TSWANA (80, "tsn", "tn", "Tswana", ISO15924.Latn), /**
Turkish tur tr Latn 54
*/ TURKISH (54, "tur", "tr", "Turkish", ISO15924.Latn), /**
Ukrainian ukr uk Cyrl 55
*/ UKRAINIAN (55, "ukr", "uk", "Ukrainian", ISO15924.Cyrl), /**
Urdu urd ur Arab 56
*/ URDU (56, "urd", "ur", "Urdu", ISO15924.Arab), /**
Uzbek uzb uz Zyyy 57
*/ UZBEK (57, "uzb", "uz", "Uzbek", ISO15924.Zyyy), /**
Venda ven ve Latn 81
*/ VENDA (81, "ven", "ve", "Venda", ISO15924.Latn), /**
Vietnamese vie vi Latn 58
*/ VIETNAMESE (58, "vie", "vi", "Vietnamese", ISO15924.Latn), /**
Western Farsi pes zz Arab 61
*/ WESTERN_FARSI (61, "pes", "zz", "Western Farsi", ISO15924.Arab), /**
Xhosa xho xh Latn 76
*/ XHOSA (76, "xho", "xh", "Xhosa", ISO15924.Latn), /**
Zulu zul zu Latn 74
*/ ZULU (74, "zul", "zu", "Zulu", ISO15924.Latn), ; private int id; private String iso3; private String iso1; private String name; private ISO15924 defaultScript; LanguageCode(int id, String iso3, String iso1, String name, ISO15924 defaultScript ) { this.id = id; this.iso3 = iso3; this.iso1 = iso1; this.name = name; this.defaultScript = defaultScript; } /** * Returns the numeric ID attribute. * @return the numeric ID attribute. */ public int languageID() { return id; } /** * Returns the ISO639-1 code attribute. * @return the ISO639-1 code attribute. */ public String ISO639_1() { return iso1; } /** * Returns the ISO639-3 code attribute. * @return the ISO639-3 code attribute. */ public String ISO639_3() { return iso3; } /** * Returns the default script attribute. * @return the default script attribute. */ public ISO15924 getDefaultScript() { return defaultScript; } /** * Returns the name attribute. * @return the name attribute. */ public String languageName() { return name; } // "NativeCode" is a convention in com.basistech.jnigen.BeanJNIGenerator.ClassInfo.MethodIDInitialization. /** * Get the numeric ID for this language. * @return the numeric ID for this language. */ int getNativeCode() { return id; } static LanguageCode lookupByNativeCode(int nativeCode) { if (nativeCode < 0 || values().length <= nativeCode) throw new IllegalArgumentException("Invalid Language ID native code " + nativeCode); else return values()[id_index[nativeCode]]; } /** * Returns whether there is a LanguageCode with ID languageID. * @param languageID the numeric ID of a LanguageCode. * @return whether there is a LanguageCode with ID languageID. */ // This gives callers a way to check lookup validity if they're working with attribute values from a messy or // dangerous source such as a file. It wouldn't be so bad to force callers to just probe for the exception, but // this way is a little cleaner. Providing this function also makes the API more parallel to the C++ API, where // the analagous probe would have been harder for callers to do without being careful. // // We could even provide these and the lookup functions as Maps, but that's a little more functionality than I think // callers need, and we'd have to do more work to implement Map for the nice fast array lookup that we use for the // lookup by ID. Not that I know it's performance-sensitive, but the speed is nice to have. // public static boolean LanguageIDIsValid(int languageID) { try { lookupByLanguageID(languageID); return true; } catch (IllegalArgumentException e) { return false; } } /** * Returns the LanguageCode with ID languageID. * @param languageID the numeric ID of a LanguageCode. * @return the LanguageCode with ID languageID. * @throws IllegalArgumentException if there is no such LanguageCode. */ public static LanguageCode lookupByLanguageID(int languageID) { return lookupByNativeCode(languageID); } /** * Returns whether there is a LanguageCode with ISO code attribute iso639. * @param iso639 An ISO code attribute of a LanguageCode: either its ISO 639-3 code attribute, or its * ISO 639-1 code attribute. The comparison is case-sensitive. Returns false for {@link * #UNCODED_ISO639_1 UNCODED_ISO639_1}. * @return whether there is a LanguageCode with ISO code attribute iso639. */ // See LanguageIDIsValid for notes. public static boolean ISO639IsValid(String iso639) { try { lookupByISO639(iso639); return true; } catch (IllegalArgumentException e) { return false; } } /** * Returns the LanguageCode with ISO code attribute iso639. * @param iso639 An ISO code attribute of a LanguageCode: either its ISO 639-3 code attribute, or its * ISO 639-1 code attribute (but not {@link #UNCODED_ISO639_1 UNCODED_ISO639_1}, because that value does not * uniquely identify a language code.) The comparison is case-sensitive. * @return the LanguageCode with ISO code attribute iso639. * @throws IllegalArgumentException if there is no such LanguageCode, or if iso639 equals * {@link #UNCODED_ISO639_1 UNCODED_ISO639_1}. */ public static LanguageCode lookupByISO639(String iso639){ int size = iso639.length(); LanguageCode result = null; if (size == ISO639_1_CODE_LENGTH || size == ISO639_1_BASIS_CODE_LENGTH) result = iso639_1_index.get(iso639); else result = iso639_3_index.get(iso639); if (result == null) throw new IllegalArgumentException("Invalid ISO639 " + iso639); else return result; } /** * If the given LanguageCode is non-standard, returns the ISO 639-3 standard LanguageCode that best encapsulates it. * This does not apply to {@link #UNKNOWN UNKNOWN}. If the given LanguageCode is already standard, it is returned as it is. * @param lc A LanguageCode. * @return the standard LanguageCode that encapsulates lc if it is non-standard. Otherwise returns lc. */ public static LanguageCode normalizeNonStandard(LanguageCode lc) { switch(lc) { case SIMPLIFIED_CHINESE: case TRADITIONAL_CHINESE: return LanguageCode.CHINESE; case ENGLISH_UPPERCASE: return LanguageCode.ENGLISH; default: return lc; } } /** The string "zz", used as the ISO 639-1 attribute for languages present in the ISO 639-3 * specification, but for which the ISO 639-1 specification does not define a code. */ public static final String UNCODED_ISO639_1 = "zz"; private static final int ISO639_1_CODE_LENGTH = 2; private static final int ISO639_1_BASIS_CODE_LENGTH = 5; // 2 + underscore + 2-char-suffix // Indexes to look up elements by their attributes. // // In C++ the string lookups are implemented with code-generated static const sorted lists of strings that are // binary searched. We could do that here, but this implementation was faster to program. Furthermore, static // initialization is more predictable in Java (called once each time the class is loaded), so the compile-time setup // that we do in C++ is not needed for that purpose. I'm not even certain that hash tables are needed (the previous // version used linear search), but I'm making this sub-linear because the C++ version is sub-linear. private static Map iso639_1_index; private static Map iso639_3_index; private static int[] id_index; // id_index[id] = position of language code #id in values() private static final float HASHMAP_DEFAULT_LOAD_FACTOR = 0.75f; // // Initialization // static { // Initialize the attribute indexes. int indexCapacity = (int) (values().length / HASHMAP_DEFAULT_LOAD_FACTOR); iso639_1_index = new HashMap(indexCapacity); iso639_3_index = new HashMap(indexCapacity); id_index = new int[values().length]; LanguageCode[] values = values(); for (int valuesIdx = 0; valuesIdx < values.length; valuesIdx++) { LanguageCode code = values[valuesIdx]; if (! code.ISO639_1().equals(UNCODED_ISO639_1)) iso639_1_index.put(code.ISO639_1(), code); iso639_3_index.put(code.ISO639_3(), code); id_index[code.languageID()] = valuesIdx; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy