All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.okapi.lib.xliff2.lang.LanguageData Maven / Gradle / Ivy

There is a newer version: 1.47.0
Show newest version
/*===========================================================================
 * Adapted from Henri Sivonen's code
 * --------------------------------------------------------------------------
 * Copyright (c) 2006 Henri Sivonen
 * Copyright (c) 2007-2010 Mozilla Foundation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a 
 * copy of this software and associated documentation files (the "Software"), 
 * to deal in the Software without restriction, including without limitation 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 * and/or sell copies of the Software, and to permit persons to whom the 
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in 
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 * DEALINGS IN THE SOFTWARE.
 ===========================================================================*/

package net.sf.okapi.lib.xliff2.lang;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Pattern;

class LanguageData {

    private static final Pattern HYPHEN = Pattern.compile("-");
    private static final String[][] EMPTY_DOUBLE_STRING_ARRAY = {};
    private static final String[] EMPTY_STRING_ARRAY = {};
    private static final String PREFIX = "prefix: ";
    private static final String SUPPRESS_SCRIPT = "suppress-script: ";
    private static final String SUBTAG = "subtag: ";
    private static final String TAG = "tag: ";
    private static final String TYPE = "type: ";
    private static final String DEPRECATED = "deprecated: ";
    private static final String PREFERRED_VALUE = "preferred-value: ";

    private SortedSet languageSet = new TreeSet<>();
    private SortedSet extlangSet = new TreeSet<>();
    private SortedSet scriptSet = new TreeSet<>();
    private SortedSet regionSet = new TreeSet<>();
    private SortedSet variantSet = new TreeSet<>();
    private SortedSet grandfatheredSet = new TreeSet<>();
    private SortedSet redundantSet = new TreeSet<>();
    private SortedSet deprecatedLangSet = new TreeSet<>();
    private SortedSet deprecatedSet = new TreeSet<>();
    private Map suppressedScriptByLanguageMap = new HashMap<>();
    private Map prefixByExtlangMap = new HashMap<>();
    private Map preferredValueByLanguageMap = new HashMap<>();
    private Map> prefixesByVariantMap = new HashMap<>();
    private String[] languages = null;
    private String[] extlangs = null;
    private String[] scripts = null;
    private String[] regions = null;
    private String[] variants = null;
    private String[] grandfathered = null;
    private String[] redundant = null;
    private String[] deprecatedLang = null;
    private String[] deprecated = null;
    private int[] suppressedScriptByLanguage = null;
    private int[] prefixByExtlang = null;
    private String[][][] prefixesByVariant = null;

	LanguageData () 
		throws IOException
	{
		try(
				InputStream is = getClass().getResourceAsStream("/net/sf/okapi/lib/xliff2/language-subtag-registry.txt");
				InputStreamReader isr = new InputStreamReader(is, StandardCharsets.UTF_8);
				BufferedReader in = new BufferedReader(isr)) {
			while ( consumeRecord(in) ) {
	            // loop
	        }
		}
		prepareArrays();
	}

    private void prepareArrays() throws IOException {
        scripts = scriptSet.toArray(EMPTY_STRING_ARRAY);
        regions = regionSet.toArray(EMPTY_STRING_ARRAY);
        grandfathered = grandfatheredSet.toArray(EMPTY_STRING_ARRAY);
        redundant = redundantSet.toArray(EMPTY_STRING_ARRAY);
        deprecated = deprecatedSet.toArray(EMPTY_STRING_ARRAY);
        deprecatedLang = deprecatedLangSet.toArray(EMPTY_STRING_ARRAY);

        int i = 0;
        languages = new String[languageSet.size()];
        suppressedScriptByLanguage = new int[languageSet.size()];
        for (String language : languageSet) {
            languages[i] = language;
            String suppressed = suppressedScriptByLanguageMap.get(language);
            if (suppressed == null) {
                suppressedScriptByLanguage[i] = -1;
            }
            else {
                int index = Arrays.binarySearch(scripts, suppressed);
                if (index < 0) {
                    throw new IOException(
                            "Malformed registry: reference to non-existent script.");
                }
                suppressedScriptByLanguage[i] = index;
            }
            i++;
        }

        i = 0;
        extlangs = new String[extlangSet.size()];
        prefixByExtlang = new int[extlangSet.size()];
        for (String extlang : extlangSet) {
            extlangs[i] = extlang;
            String prefix = prefixByExtlangMap.get(extlang);
            if (prefix == null) {
                prefixByExtlang[i] = -1;
            }
            else {
                int index = Arrays.binarySearch(languages, prefix);
                if (index < 0) {
                    throw new IOException(
                            "Malformed registry: reference to non-existent prefix for extlang.");
                }
                prefixByExtlang[i] = index;
            }
            i++;
        }

        i = 0;
        variants = new String[variantSet.size()];
        prefixesByVariant = new String[variantSet.size()][][];
        for (String variant : variantSet) {
            variants[i] = variant;
            Set prefixes = prefixesByVariantMap.get(variant);
			if ( prefixes != null ) {
                prefixesByVariant[i] = prefixes.toArray(EMPTY_DOUBLE_STRING_ARRAY);
            }
            else {
                prefixesByVariant[i] = EMPTY_DOUBLE_STRING_ARRAY;
            }
            i++;
        }
    }

    private boolean consumeRecord (BufferedReader in) throws IOException {
        boolean hasMore = true;
        String type = null;
        String subtag = null;
        String suppressScript = null;
        String preferredValue = null;
        Set prefixes = new HashSet<>();
        String singlePrefix = null;
        boolean depr = false;
        String line = null;
        for (;;) {
            line = in.readLine();
            if (line == null) {
                hasMore = false;
                break;
            }
            line = line.toLowerCase();
            if ("%%".equals(line)) {
                break;
            } else if (line.startsWith(TYPE)) {
                type = line.substring(TYPE.length()).trim().intern();
            } else if (line.startsWith(SUBTAG)) {
                subtag = line.substring(SUBTAG.length()).trim().intern();
            } else if (line.startsWith(TAG)) {
                subtag = line.substring(TAG.length()).trim().intern();
            } else if (line.startsWith(SUPPRESS_SCRIPT)) {
                suppressScript = line.substring(SUPPRESS_SCRIPT.length()).trim().intern();
            } else if (line.startsWith(PREFIX)) {
                String[] prefixSubtags = HYPHEN.split(line.substring(
                        PREFIX.length()).trim());
                for (int i = 0; i < prefixSubtags.length; i++) {
                    prefixSubtags[i] = prefixSubtags[i].intern();
                }
                prefixes.add(prefixSubtags);
                singlePrefix = prefixSubtags[0];
            } else if (line.startsWith(DEPRECATED)) {
                depr = true;
            } else if (line.startsWith(PREFERRED_VALUE)) {
                preferredValue = line.substring(PREFERRED_VALUE.length()).trim().intern();
                preferredValueByLanguageMap.put(subtag, preferredValue);
            }
        }
        if (subtag == null) {
            return hasMore;
        }
        if (depr) {
            if ("language".equals(type)) {
                deprecatedLangSet.add(subtag);
            } else {
                deprecatedSet.add(subtag);
            }
        }
        if ("language".equals(type)) {
            languageSet.add(subtag);
            suppressedScriptByLanguageMap.put(subtag, suppressScript);
        }
        if ("extlang".equals(type)) {
            extlangSet.add(subtag);
            prefixByExtlangMap.put(subtag, singlePrefix);
        } else if ("region".equals(type)) {
            regionSet.add(subtag);
        } else if ("script".equals(type)) {
            scriptSet.add(subtag);
        } else if ("variant".equals(type)) {
            variantSet.add(subtag);
            prefixesByVariantMap.put(subtag, prefixes);
        } else if ("grandfathered".equals(type)) {
            grandfatheredSet.add(subtag);
        } else if ("redundant".equals(type)) {
            redundantSet.add(subtag);
        }
        return hasMore;
    }

    /**
     * Returns the languages.
     * 
     * @return the languages
     */
    public String[] getLanguages() {
        return languages;
    }

    public String[] getExtlangs() {
        return extlangs;
    }

    /**
     * Returns the prefixesByVariant.
     * 
     * @return the prefixesByVariant
     */
    public String[][][] getPrefixesByVariant() {
        return prefixesByVariant;
    }

    public int[] getPrefixByExtlang() {
        return prefixByExtlang;
    }

    /**
     * Returns the regions.
     * 
     * @return the regions
     */
    public String[] getRegions() {
        return regions;
    }

    /**
     * Returns the scripts.
     * 
     * @return the scripts
     */
    public String[] getScripts() {
        return scripts;
    }

    /**
     * Returns the suppressedScriptByLanguage.
     * 
     * @return the suppressedScriptByLanguage
     */
    public int[] getSuppressedScriptByLanguage() {
        return suppressedScriptByLanguage;
    }

    /**
     * Returns the variants.
     * 
     * @return the variants
     */
    public String[] getVariants() {
        return variants;
    }

    /**
     * Returns the deprecated.
     * 
     * @return the deprecated
     */
    public String[] getDeprecated() {
        return deprecated;
    }

    /**
     * Returns the preferredValueByLanguageMap.
     * 
     * @return the preferredValueByLanguageMap
     */
    public Map getPreferredValueByLanguageMap() {
        return preferredValueByLanguageMap;
    }

    /**
     * Returns the grandfathered.
     * 
     * @return the grandfathered
     */
    public String[] getGrandfathered() {
        return grandfathered;
    }

    /**
     * Returns the redundant.
     * 
     * @return the redundant
     */
    public String[] getRedundant() {
        return redundant;
    }

    /**
     * Returns the deprecatedLang.
     * 
     * @return the deprecatedLang
     */
    public String[] getDeprecatedLang() {
        return deprecatedLang;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy