com.force.i18n.HumanLanguage Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of grammaticus Show documentation
Show all versions of grammaticus Show documentation
Localization Framework that allows grammatically correct renaming of nouns
/*
* Copyright (c) 2017, salesforce.com, inc.
* All rights reserved.
* Licensed under the BSD 3-Clause license.
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
*/
package com.force.i18n;
import static com.force.i18n.LanguageConstants.*;
import java.io.Serializable;
import java.util.Locale;
import com.force.i18n.commons.text.CaseFolder;
import com.force.i18n.commons.text.DeferredStringBuilder;
/**
* Interface for a Human Spoken Language, into which an application should be localized.
* The assumption is that an enum (or enum-like object) will be created to support the set
* of supported languages.
*
* Usually there is a mapping from the language field of a locale to one of these languages,
* but due to regional variation, a company may want to use a full locale to represent the human
* language, especially for variants in Spanish, Portugese, German, and Chinese.
*
* For the override html language, see this page: http://www.w3.org/International/articles/bcp47/
* @author stamm
*/
public interface HumanLanguage extends Serializable{
/**
* @return the locale associated with this user language
*/
Locale getLocale();
/**
* @return the string for the locale associated with this user language (convenience method)
*/
String getLocaleString();
/**
* @return the text direction of the language (right now just RightToLeft vs LeftToRight)
*/
TextDirection getDirection();
/**
* @return the "override" language for historical reasons. Generally, this means that the
* locale for the language was wrong (dutch), or Java prevents it from being correct (iw vs he)
*/
String getOverrideLanguage();
/**
* @return the language code to use for HTTP communication (see RFC 1766)
*/
String getHttpLanguageCode();
/**
* @return the path, relative to a "base" directory of labels where the labels
* for this particular language will be found.
* This handles the special cases for Dutch, English, and Simplified Chinese
*/
String getDefaultLabelDirectoryPath();
/**
* @return the default language encoding charset to use for the language
*/
String getDefaultFileEncoding();
/**
* Return the user email encoding, which differs from the email encoding for
* Thai and Korean for reasons I don't understand.
* TODO: Why is this different from alternate? Really. What the hell
* @return the default user email encoding charset to use for the language
*/
String getDefaultUserEmailEncoding();
/**
* @return the email encoding to use when sending out emails in the given language
* Same as file encoding, except it wants to use UTF-8 whenever possible, unlike
* user emails.
*/
String getSystemEmailEncoding();
/**
* @return the language to use as the fallback language for translations
* The difference between this and fallback language is what the "fallback"
* for translations. So French doesn't fall back to English, because that would be wrong,
* just the fallback for _XX languages. This is only used for customer translations.
*
* Summary: Use this only for country language variants, not for anything else.
*/
HumanLanguage getTranslationFallbackLanguage();
/**
* NOTE: You must ensure that the fallback language returned has a lower ordinal than this language in the
* enum (i.e. the extension language needs to appear afterwards if it's an enum). Note, this is usually the same as
* translationfallbackLanguage, and only differs for mutually intelligible languages, like Malay and Indonesian
* @return the language to use as the fallback language for labels that are not available in this language
*/
HumanLanguage getFallbackLanguage();
/**
* @return whether this language is used for linguistic testing. Esperanto is used in
* grammaticus for this.
*/
default boolean isTestOnlyLanguage() {
return false;
}
/**
* @return whether this language generally has translated values for the applications,
* as opposed to being a country/dialect variant of another language. This doesn't stop
* an application from translating, but will let you omit the large number of english and
* arabic variants that will fallback to another language.
*/
default boolean isTranslatedLanguage() {
return true;
}
/**
* @return whether use of fallback strings in this language should be considered a problem.
* Note: this should only applies to non-fully translated languages that don't have a fallback
* to a "normal" language.
*/
boolean shouldLogFallbackStrings();
/**
* @return the key for the label in the LanguageName and TranslatedLabelName label section.
* This differs only to handle the en_US and nl_NL historical anomalies
*/
String getLabelKey();
/**
* @return whether turkish locale specific case folding should be used to handle
* the dotted/dotless i problem.
*/
boolean hasTurkicCaseFolding();
/**
* Return the value with the "case folded" using the unicode algorithm
* for lowercase based on the current user language
* @param input the string to be case folded (i.e. lowercased)
* @return the string with converted to lowercase
* @see CaseFolder#toFoldedCase(String, boolean)
*/
String toFoldedCase(String input);
// Comparison and hashing
/**
* Note: this library assumes that the set of human languages is finite and
* established at the beginning of the application lifecycle (like an Enum).
* So equality comparisons in this library may be made using ==.
*
* @param o the reference object with which to compare.
* @return {@code true} if this object is the same as the {@code o} argument; {@code false} otherwise.
*/
@Override
boolean equals(Object o);
/**
* @return an integer that represents this language in the total set of human values.
* The ordering of languages should be by ordinal.
*
* The "List" returned by the {@link LanguageProvider#getAll()} requires the
* ordinal is the index in that list.
*
* Note: this does assume that an Enum implements HumanLanguage. I would recommend this.
* In any case, t
*/
int ordinal();
/**
* Helper methods for implementations of HumanLanguage. In JDK8, this would be the
* implementation for the interface, but alas, we once supported JDK7.
*/
public static class Helper {
public static HumanLanguage get(Locale locale) {
return LanguageProviderFactory.get().getLanguage(locale);
}
public static HumanLanguage get(String localeString) {
return LanguageProviderFactory.get().getLanguage(localeString);
}
/**
* @return {@code true} if the language is simplified chinese (which is
* signified by the country, and not the language)
* @param language the language to test
*/
public static boolean isSimplifiedChinese(HumanLanguage language) {
switch (language.getLocaleString()) {
case CHINESE_CN:
case CHINESE_SG:
case CHINESE_MY:
return true;
default:
}
return false;
}
/**
* @param language the language to test
* @return {@code true} if the language needs dotted i case folding
* (turkish)
*/
public static boolean hasTurkicCaseFolding(HumanLanguage language) {
String langStr = language.getLocale().getLanguage();
return langStr.equals(LanguageConstants.TURKISH)
|| langStr.equals(LanguageConstants.AZERBAIJANI)
|| langStr.equals(LanguageConstants.KAZAKH)
;
}
/**
* @param language the language to test
* @return the default language encoding charset to use for the language
*/
public static String getDefaultFileEncoding(HumanLanguage language) {
switch (language.getLocale().getLanguage()) {
// Don't change the Japanese encoding unless you also
// change PlainFormatter.header() and other weirdness in the code
// that depends on the Japanese file encoding being "MS932"
case JAPANESE: return "MS932";
case KOREAN: return "EUC-KR";
case CHINESE: return isSimplifiedChinese(language) ? "GB2312" : "Big5";
case THAI: return "TIS-620";
case UKRAINIAN:
case BULGARIAN:
case SERBIAN_CYRILLIC:
case SERBIAN_LATIN:
case ARMENIAN: case HINDI:
case SLOVAK: case HEBREW: case HEBREW_ISO: case ARABIC: case URDU: case GEORGIAN: case YIDDISH: case YIDDISH_ISO:
case BOSNIAN: case MOLDOVAN: case SLOVENE: case MACEDONIAN: case CROATIAN:
case LATVIAN: case LITHUANIAN: case MALTESE:
case RUSSIAN:
case BENGALI:
case KHMER:
case TAMIL: return "UTF-8";
default: return "ISO-8859-1";
}
}
/**
* Method used to make sure that file names are encoded specially for japanese users on AP0
* @param language the language to test
* @param forWindows is the file being downloaded on a windows device, where MS932 should be used for japanese
* @return the encoding used for the file name in the servlet
*/
public static String getFileNameServletEncoding(HumanLanguage language, boolean forWindows) {
switch (language.getLocale().getLanguage()) {
case JAPANESE: return forWindows ? "MS932" : "Shift_JIS";
default: return "UTF-8";
}
}
/**
* Return the user email encoding, which differs from the email encoding for
* Thai and Korean for reasons I don't understand.
* TODO: Why is this different from alternate? Really. What the hell
* @param language the language to test
* @return the default user email encoding charset to use for the language
*/
public static String getDefaultUserEmailEncoding(HumanLanguage language) {
switch (language.getLocale().getLanguage()) {
case JAPANESE: return "ISO-2022-JP";
case KOREAN: return "ks_c_5601-1987";
case CHINESE: return isSimplifiedChinese(language) ? "GB2312" : "Big5";
default: return "UTF-8";
}
}
/**
* @param language the language to test
* @return the email encoding to use when sending out emails in the given language
* Same as file encoding, except it wants to use UTF-8 whenever possible, unlike
* use emails. This is the old behavior from the old localizer
*/
public static String getSystemEmailEncoding(HumanLanguage language) {
switch (language.getLocale().getLanguage()) {
case JAPANESE: return "ISO-2022-JP";
case KOREAN: return "EUC-KR";
case CHINESE: return isSimplifiedChinese(language) ? "GB2312" : "Big5";
case THAI: return "TIS-620";
default: return "UTF-8";
}
}
/**
* Return the value with the "case folded" using the unicode algorithm
* for lowercase based on the current user language
* @param language the language to test
* @param input the string to be case folded (i.e. lowercased)
* @return the string with converted to lowercase
* @see CaseFolder#toFoldedCase(String, boolean)
*/
public static String toFoldedCase(HumanLanguage language, String input) {
if (language.getLocale().getLanguage().equals(GREEK)) {
// TODO: This should be removed when we move CaseFolder.java into i18n
// The grammatically correct handling of case folding (to lowercase)
// Greek sigma differs from the Unicode case folding mapping.
// There are two valid forms of lowercase sigma, σ and ς, and should be
// left as is. Uppercase sigma (Σ) maps to either form depending on where
// in the word the sigma is used, but because sfdcnames.xml and sfdcadjectives.xml
// only capitalizes the first letter of a word, uppercase sigma should always fold
// to lowercase sigma
// http://en.wikipedia.org/wiki/Sigma, http://www.tlg.uci.edu/~opoudjis/dist/sigma.html
if (input == null)
return null;
DeferredStringBuilder buf = new DeferredStringBuilder(input);
for (int i = 0; i < input.length(); ++i) {
char c = input.charAt(i);
char[] remap = null;
if (c != '\u03C2' && c != '\u03C3') { // ς or σ
// don't fold lowercase sigma
remap = CaseFolder.toFoldedCase(c, false);
}
if (remap == null) {
buf.append(c);
} else {
// found a match! remap the character
for (int j = 0; j < remap.length; ++j) {
buf.append(remap[j]);
}
}
}
return buf.toString();
}
return CaseFolder.toFoldedCase(input, language.hasTurkicCaseFolding());
}
/**
* Helper method for determining which language to use for "variant" languages with some opinions
* when there might be a conflict, as in Simplified vs Traditional Chinese.
*
* @return the language to use as the fallback language for translations
* The difference between this and fallback language is what the "fallback"
* for translations. So French doesn't fall back to English, because that would be wrong,
* just the fallback for _XX languages. This is only used for customer translations.
*
* Summary: Use this only for country language variants, not for anything else
* @param locale the locale representing a language where the label may be missing
*/
public static Locale getTranslationFallbackLanguageLocale(Locale locale) {
String country = locale.getCountry();
if (country.length() > 0) {
switch (locale.getLanguage()) {
case LanguageConstants.CHINESE:
switch (country) {
case "TW":
case "CN":
return null;
case "HK":
return Locale.TRADITIONAL_CHINESE;
default:
return Locale.SIMPLIFIED_CHINESE;
}
case LanguageConstants.ENGLISH:
switch (country) {
case "US":
return null; // English peculiarity, where en_US is for english.
case "GB":
case "CA":
case "IL":
return Locale.US;
default:
return Locale.UK;
}
default:
return new Locale(locale.getLanguage());
}
}
return null;
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy