net.sf.okapi.lib.xliff2.lang.LanguageData Maven / Gradle / Ivy
/*===========================================================================
* Adapted from Henri Sivonen's code
* --------------------------------------------------------------------------
* Copyright (c) 2006 Henri Sivonen
* Copyright (c) 2007-2010 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
===========================================================================*/
package net.sf.okapi.lib.xliff2.lang;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Pattern;
class LanguageData {
private static final Pattern HYPHEN = Pattern.compile("-");
private static final String[][] EMPTY_DOUBLE_STRING_ARRAY = {};
private static final String[] EMPTY_STRING_ARRAY = {};
private static final String PREFIX = "prefix: ";
private static final String SUPPRESS_SCRIPT = "suppress-script: ";
private static final String SUBTAG = "subtag: ";
private static final String TAG = "tag: ";
private static final String TYPE = "type: ";
private static final String DEPRECATED = "deprecated: ";
private static final String PREFERRED_VALUE = "preferred-value: ";
private SortedSet languageSet = new TreeSet<>();
private SortedSet extlangSet = new TreeSet<>();
private SortedSet scriptSet = new TreeSet<>();
private SortedSet regionSet = new TreeSet<>();
private SortedSet variantSet = new TreeSet<>();
private SortedSet grandfatheredSet = new TreeSet<>();
private SortedSet redundantSet = new TreeSet<>();
private SortedSet deprecatedLangSet = new TreeSet<>();
private SortedSet deprecatedSet = new TreeSet<>();
private Map suppressedScriptByLanguageMap = new HashMap<>();
private Map prefixByExtlangMap = new HashMap<>();
private Map preferredValueByLanguageMap = new HashMap<>();
private Map> prefixesByVariantMap = new HashMap<>();
private String[] languages = null;
private String[] extlangs = null;
private String[] scripts = null;
private String[] regions = null;
private String[] variants = null;
private String[] grandfathered = null;
private String[] redundant = null;
private String[] deprecatedLang = null;
private String[] deprecated = null;
private int[] suppressedScriptByLanguage = null;
private int[] prefixByExtlang = null;
private String[][][] prefixesByVariant = null;
LanguageData ()
throws IOException
{
try(
InputStream is = getClass().getResourceAsStream("/net/sf/okapi/lib/xliff2/language-subtag-registry.txt");
InputStreamReader isr = new InputStreamReader(is, StandardCharsets.UTF_8);
BufferedReader in = new BufferedReader(isr)) {
while ( consumeRecord(in) ) {
// loop
}
}
prepareArrays();
}
private void prepareArrays() throws IOException {
scripts = scriptSet.toArray(EMPTY_STRING_ARRAY);
regions = regionSet.toArray(EMPTY_STRING_ARRAY);
grandfathered = grandfatheredSet.toArray(EMPTY_STRING_ARRAY);
redundant = redundantSet.toArray(EMPTY_STRING_ARRAY);
deprecated = deprecatedSet.toArray(EMPTY_STRING_ARRAY);
deprecatedLang = deprecatedLangSet.toArray(EMPTY_STRING_ARRAY);
int i = 0;
languages = new String[languageSet.size()];
suppressedScriptByLanguage = new int[languageSet.size()];
for (String language : languageSet) {
languages[i] = language;
String suppressed = suppressedScriptByLanguageMap.get(language);
if (suppressed == null) {
suppressedScriptByLanguage[i] = -1;
}
else {
int index = Arrays.binarySearch(scripts, suppressed);
if (index < 0) {
throw new IOException(
"Malformed registry: reference to non-existent script.");
}
suppressedScriptByLanguage[i] = index;
}
i++;
}
i = 0;
extlangs = new String[extlangSet.size()];
prefixByExtlang = new int[extlangSet.size()];
for (String extlang : extlangSet) {
extlangs[i] = extlang;
String prefix = prefixByExtlangMap.get(extlang);
if (prefix == null) {
prefixByExtlang[i] = -1;
}
else {
int index = Arrays.binarySearch(languages, prefix);
if (index < 0) {
throw new IOException(
"Malformed registry: reference to non-existent prefix for extlang.");
}
prefixByExtlang[i] = index;
}
i++;
}
i = 0;
variants = new String[variantSet.size()];
prefixesByVariant = new String[variantSet.size()][][];
for (String variant : variantSet) {
variants[i] = variant;
Set prefixes = prefixesByVariantMap.get(variant);
if ( prefixes != null ) {
prefixesByVariant[i] = prefixes.toArray(EMPTY_DOUBLE_STRING_ARRAY);
}
else {
prefixesByVariant[i] = EMPTY_DOUBLE_STRING_ARRAY;
}
i++;
}
}
private boolean consumeRecord (BufferedReader in) throws IOException {
boolean hasMore = true;
String type = null;
String subtag = null;
String suppressScript = null;
String preferredValue = null;
Set prefixes = new HashSet<>();
String singlePrefix = null;
boolean depr = false;
String line = null;
for (;;) {
line = in.readLine();
if (line == null) {
hasMore = false;
break;
}
line = line.toLowerCase();
if ("%%".equals(line)) {
break;
} else if (line.startsWith(TYPE)) {
type = line.substring(TYPE.length()).trim().intern();
} else if (line.startsWith(SUBTAG)) {
subtag = line.substring(SUBTAG.length()).trim().intern();
} else if (line.startsWith(TAG)) {
subtag = line.substring(TAG.length()).trim().intern();
} else if (line.startsWith(SUPPRESS_SCRIPT)) {
suppressScript = line.substring(SUPPRESS_SCRIPT.length()).trim().intern();
} else if (line.startsWith(PREFIX)) {
String[] prefixSubtags = HYPHEN.split(line.substring(
PREFIX.length()).trim());
for (int i = 0; i < prefixSubtags.length; i++) {
prefixSubtags[i] = prefixSubtags[i].intern();
}
prefixes.add(prefixSubtags);
singlePrefix = prefixSubtags[0];
} else if (line.startsWith(DEPRECATED)) {
depr = true;
} else if (line.startsWith(PREFERRED_VALUE)) {
preferredValue = line.substring(PREFERRED_VALUE.length()).trim().intern();
preferredValueByLanguageMap.put(subtag, preferredValue);
}
}
if (subtag == null) {
return hasMore;
}
if (depr) {
if ("language".equals(type)) {
deprecatedLangSet.add(subtag);
} else {
deprecatedSet.add(subtag);
}
}
if ("language".equals(type)) {
languageSet.add(subtag);
suppressedScriptByLanguageMap.put(subtag, suppressScript);
}
if ("extlang".equals(type)) {
extlangSet.add(subtag);
prefixByExtlangMap.put(subtag, singlePrefix);
} else if ("region".equals(type)) {
regionSet.add(subtag);
} else if ("script".equals(type)) {
scriptSet.add(subtag);
} else if ("variant".equals(type)) {
variantSet.add(subtag);
prefixesByVariantMap.put(subtag, prefixes);
} else if ("grandfathered".equals(type)) {
grandfatheredSet.add(subtag);
} else if ("redundant".equals(type)) {
redundantSet.add(subtag);
}
return hasMore;
}
/**
* Returns the languages.
*
* @return the languages
*/
public String[] getLanguages() {
return languages;
}
public String[] getExtlangs() {
return extlangs;
}
/**
* Returns the prefixesByVariant.
*
* @return the prefixesByVariant
*/
public String[][][] getPrefixesByVariant() {
return prefixesByVariant;
}
public int[] getPrefixByExtlang() {
return prefixByExtlang;
}
/**
* Returns the regions.
*
* @return the regions
*/
public String[] getRegions() {
return regions;
}
/**
* Returns the scripts.
*
* @return the scripts
*/
public String[] getScripts() {
return scripts;
}
/**
* Returns the suppressedScriptByLanguage.
*
* @return the suppressedScriptByLanguage
*/
public int[] getSuppressedScriptByLanguage() {
return suppressedScriptByLanguage;
}
/**
* Returns the variants.
*
* @return the variants
*/
public String[] getVariants() {
return variants;
}
/**
* Returns the deprecated.
*
* @return the deprecated
*/
public String[] getDeprecated() {
return deprecated;
}
/**
* Returns the preferredValueByLanguageMap.
*
* @return the preferredValueByLanguageMap
*/
public Map getPreferredValueByLanguageMap() {
return preferredValueByLanguageMap;
}
/**
* Returns the grandfathered.
*
* @return the grandfathered
*/
public String[] getGrandfathered() {
return grandfathered;
}
/**
* Returns the redundant.
*
* @return the redundant
*/
public String[] getRedundant() {
return redundant;
}
/**
* Returns the deprecatedLang.
*
* @return the deprecatedLang
*/
public String[] getDeprecatedLang() {
return deprecatedLang;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy