All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.locale.InternalLocaleBuilder Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 2009-2010, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
package com.ibm.icu.impl.locale;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

public final class InternalLocaleBuilder {

    private static final boolean JDKIMPL = false;

    private String _language = "";
    private String _script = "";
    private String _region = "";
    private String _variant = "";

    private static final CaseInsensitiveChar PRIVUSE_KEY = new CaseInsensitiveChar(LanguageTag.PRIVATEUSE.charAt(0));

    private HashMap _extensions;
    private HashSet _uattributes;
    private HashMap _ukeywords;


    public InternalLocaleBuilder() {
    }

    public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException {
        if (language == null || language.length() == 0) {
            _language = "";
        } else {
            if (!LanguageTag.isLanguage(language)) {
                throw new LocaleSyntaxException("Ill-formed language: " + language, 0);
            }
            _language = language;
        }
        return this;
    }

    public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException {
        if (script == null || script.length() == 0) {
            _script = "";
        } else {
            if (!LanguageTag.isScript(script)) {
                throw new LocaleSyntaxException("Ill-formed script: " + script, 0);
            }
            _script = script;
        }
        return this;
    }

    public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException {
        if (region == null || region.length() == 0) {
            _region = "";
        } else {
            if (!LanguageTag.isRegion(region)) {
                throw new LocaleSyntaxException("Ill-formed region: " + region, 0);
            }
            _region = region;
        }
        return this;
    }

    public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException {
        if (variant == null || variant.length() == 0) {
            _variant = "";
        } else {
            // normalize separators to "_"
            String var = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP);
            int errIdx = checkVariants(var, BaseLocale.SEP);
            if (errIdx != -1) {
                throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx);
            }
            _variant = var;
        }
        return this;
    }

    public InternalLocaleBuilder addUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException {
        if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) {
            throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute);
        }
        // Use case insensitive string to prevent duplication
        if (_uattributes == null) {
            _uattributes = new HashSet(4);
        }
        _uattributes.add(new CaseInsensitiveString(attribute));
        return this;
    }

    public InternalLocaleBuilder removeUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException {
        if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) {
            throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute);
        }
        if (_uattributes != null) {
            _uattributes.remove(new CaseInsensitiveString(attribute));
        }
        return this;
    }

    public InternalLocaleBuilder setUnicodeLocaleKeyword(String key, String type) throws LocaleSyntaxException {
        if (!UnicodeLocaleExtension.isKey(key)) {
            throw new LocaleSyntaxException("Ill-formed Unicode locale keyword key: " + key);
        }

        CaseInsensitiveString cikey = new CaseInsensitiveString(key);
        if (type == null) {
            if (_ukeywords != null) {
                // null type is used for remove the key
                _ukeywords.remove(cikey);
            }
        } else {
            if (type.length() != 0) {
                // normalize separator to "-"
                String tp = type.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
                // validate
                StringTokenIterator itr = new StringTokenIterator(tp, LanguageTag.SEP);
                while (!itr.isDone()) {
                    String s = itr.current();
                    if (!UnicodeLocaleExtension.isTypeSubtag(s)) {
                        throw new LocaleSyntaxException("Ill-formed Unicode locale keyword type: " + type, itr.currentStart());
                    }
                    itr.next();
                }
            }
            if (_ukeywords == null) {
                _ukeywords = new HashMap(4);
            }
            _ukeywords.put(cikey, type);
        }
        return this;
    }

    public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException {
        // validate key
        boolean isBcpPrivateuse = LanguageTag.isPrivateusePrefixChar(singleton);
        if (!isBcpPrivateuse && !LanguageTag.isExtensionSingletonChar(singleton)) {
            throw new LocaleSyntaxException("Ill-formed extension key: " + singleton);
        }

        boolean remove = (value == null || value.length() == 0);
        CaseInsensitiveChar key = new CaseInsensitiveChar(singleton);

        if (remove) {
            if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
                // clear entire Unicode locale extension
                if (_uattributes != null) {
                    _uattributes.clear();
                }
                if (_ukeywords != null) {
                    _ukeywords.clear();
                }
            } else {
                if (_extensions != null && _extensions.containsKey(key)) {
                    _extensions.remove(key);
                }
            }
        } else {
            // validate value
            String val = value.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
            StringTokenIterator itr = new StringTokenIterator(val, LanguageTag.SEP);
            while (!itr.isDone()) {
                String s = itr.current();
                boolean validSubtag;
                if (isBcpPrivateuse) {
                    validSubtag = LanguageTag.isPrivateuseSubtag(s);
                } else {
                    validSubtag = LanguageTag.isExtensionSubtag(s);
                }
                if (!validSubtag) {
                    throw new LocaleSyntaxException("Ill-formed extension value: " + s, itr.currentStart());
                }
                itr.next();
            }

            if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
                setUnicodeLocaleExtension(val);
            } else {
                if (_extensions == null) {
                    _extensions = new HashMap(4);
                }
                _extensions.put(key, val);
            }
        }
        return this;
    }

    /*
     * Set extension/private subtags in a single string representation
     */
    public InternalLocaleBuilder setExtensions(String subtags) throws LocaleSyntaxException {
        if (subtags == null || subtags.length() == 0) {
            clearExtensions();
            return this;
        }
        subtags = subtags.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
        StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP);

        List extensions = null;
        String privateuse = null;

        int parsed = 0;
        int start;

        // Make a list of extension subtags
        while (!itr.isDone()) {
            String s = itr.current();
            if (LanguageTag.isExtensionSingleton(s)) {
                start = itr.currentStart();
                String singleton = s;
                StringBuilder sb = new StringBuilder(singleton);

                itr.next();
                while (!itr.isDone()) {
                    s = itr.current();
                    if (LanguageTag.isExtensionSubtag(s)) {
                        sb.append(LanguageTag.SEP).append(s);
                        parsed = itr.currentEnd();
                    } else {
                        break;
                    }
                    itr.next();
                }

                if (parsed < start) {
                    throw new LocaleSyntaxException("Incomplete extension '" + singleton + "'", start);
                }

                if (extensions == null) {
                    extensions = new ArrayList(4);
                }
                extensions.add(sb.toString());
            } else {
                break;
            }
        }
        if (!itr.isDone()) {
            String s = itr.current();
            if (LanguageTag.isPrivateusePrefix(s)) {
                start = itr.currentStart();
                StringBuilder sb = new StringBuilder(s);

                itr.next();
                while (!itr.isDone()) {
                    s = itr.current();
                    if (!LanguageTag.isPrivateuseSubtag(s)) {
                        break;
                    }
                    sb.append(LanguageTag.SEP).append(s);
                    parsed = itr.currentEnd();

                    itr.next();
                }
                if (parsed <= start) {
                    throw new LocaleSyntaxException("Incomplete privateuse:" + subtags.substring(start), start);
                } else {
                    privateuse = sb.toString();
                }
            }
        }

        if (!itr.isDone()) {
            throw new LocaleSyntaxException("Ill-formed extension subtags:" + subtags.substring(itr.currentStart()), itr.currentStart());
        }

        return setExtensions(extensions, privateuse);
    }

    /*
     * Set a list of BCP47 extensions and private use subtags
     * BCP47 extensions are already validated and well-formed, but may contain duplicates
     */
    private InternalLocaleBuilder setExtensions(List bcpExtensions, String privateuse) {
        clearExtensions();

        if (bcpExtensions != null && bcpExtensions.size() > 0) {
            HashSet processedExtensions = new HashSet(bcpExtensions.size());
            for (String bcpExt : bcpExtensions) {
                CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt.charAt(0));
                // ignore duplicates
                if (!processedExtensions.contains(key)) {
                    // each extension string contains singleton, e.g. "a-abc-def"
                    if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
                        setUnicodeLocaleExtension(bcpExt.substring(2));
                    } else {
                        if (_extensions == null) {
                            _extensions = new HashMap(4);
                        }
                        _extensions.put(key, bcpExt.substring(2));
                    }
                }
            }
        }
        if (privateuse != null && privateuse.length() > 0) {
            // privateuse string contains prefix, e.g. "x-abc-def"
            if (_extensions == null) {
                _extensions = new HashMap(1);
            }
            _extensions.put(new CaseInsensitiveChar(privateuse.charAt(0)), privateuse.substring(2));
        }

        return this;
    }

    /*
     * Reset Builder's internal state with the given language tag
     */
    public InternalLocaleBuilder setLanguageTag(LanguageTag langtag) {
        clear();
        if (langtag.getExtlangs().size() > 0) {
            _language = langtag.getExtlangs().get(0);
        } else {
            String language = langtag.getLanguage();
            if (!language.equals(LanguageTag.UNDETERMINED)) {
                _language = language;
            }
        }
        _script = langtag.getScript();
        _region = langtag.getRegion();

        ArrayList bcpVariants = new ArrayList(langtag.getVariants());
        Collections.sort(bcpVariants);
        if (bcpVariants.size() > 0) {
            StringBuilder var = new StringBuilder(bcpVariants.get(0));
            for (int i = 1; i < bcpVariants.size(); i++) {
                var.append(BaseLocale.SEP).append(bcpVariants.get(i));
            }
            _variant = var.toString();
        }

        setExtensions(langtag.getExtensions(), langtag.getPrivateuse());

        return this;
    }

    public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions extensions) throws LocaleSyntaxException {
        String language = base.getLanguage();
        String script = base.getScript();
        String region = base.getRegion();
        String variant = base.getVariant();

        if (JDKIMPL) {
            // Special backward compatibility support

            // Exception 1 - ja_JP_JP
            if (language.equals("ja") && region.equals("JP") && variant.equals("JP")) {
                // When locale ja_JP_JP is created, ca-japanese is always there.
                // The builder ignores the variant "JP"
                assert("japanese".equals(extensions.getUnicodeLocaleType("ca")));
                variant = "";
            }
            // Exception 2 - th_TH_TH
            else if (language.equals("th") && region.equals("TH") && variant.equals("TH")) {
                // When locale th_TH_TH is created, nu-thai is always there.
                // The builder ignores the variant "TH"
                assert("thai".equals(extensions.getUnicodeLocaleType("nu")));
                variant = "";
            }
            // Exception 3 - no_NO_NY
            else if (language.equals("no") && region.equals("NO") && variant.equals("NY")) {
                // no_NO_NY is a valid locale and used by Java 6 or older versions.
                // The build ignores the variant "NY" and change the language to "nn".
                language = "nn";
                variant = "";
            }
        }

        // Validate base locale fields before updating internal state.
        // LocaleExtensions always store validated/canonicalized values,
        // so no checks are necessary.
        if (language.length() > 0 && !LanguageTag.isLanguage(language)) {
            throw new LocaleSyntaxException("Ill-formed language: " + language);
        }

        if (script.length() > 0 && !LanguageTag.isScript(script)) {
            throw new LocaleSyntaxException("Ill-formed script: " + script);
        }

        if (region.length() > 0 && !LanguageTag.isRegion(region)) {
            throw new LocaleSyntaxException("Ill-formed region: " + region);
        }

        if (variant.length() > 0) {
            int errIdx = checkVariants(variant, BaseLocale.SEP);
            if (errIdx != -1) {
                throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx);
            }
        }

        // The input locale is validated at this point.
        // Now, updating builder's internal fields.
        _language = language;
        _script = script;
        _region = region;
        _variant = variant;
        clearExtensions();

        Set extKeys = (extensions == null) ? null : extensions.getKeys();
        if (extKeys != null) {
            // map extensions back to builder's internal format
            for (Character key : extKeys) {
                Extension e = extensions.getExtension(key);
                if (e instanceof UnicodeLocaleExtension) {
                    UnicodeLocaleExtension ue = (UnicodeLocaleExtension)e;
                    for (String uatr : ue.getUnicodeLocaleAttributes()) {
                        if (_uattributes == null) {
                            _uattributes = new HashSet(4);
                        }
                        _uattributes.add(new CaseInsensitiveString(uatr));
                    }
                    for (String ukey : ue.getUnicodeLocaleKeys()) {
                        if (_ukeywords == null) {
                            _ukeywords = new HashMap(4);
                        }
                        _ukeywords.put(new CaseInsensitiveString(ukey), ue.getUnicodeLocaleType(ukey));
                    }
                } else {
                    if (_extensions == null) {
                        _extensions = new HashMap(4);
                    }
                    _extensions.put(new CaseInsensitiveChar(key.charValue()), e.getValue());
                }
            }
        }
        return this;
    }

    public InternalLocaleBuilder clear() {
        _language = "";
        _script = "";
        _region = "";
        _variant = "";
        clearExtensions();
        return this;
    }

    public InternalLocaleBuilder clearExtensions() {
        if (_extensions != null) {
            _extensions.clear();
        }
        if (_uattributes != null) {
            _uattributes.clear();
        }
        if (_ukeywords != null) {
            _ukeywords.clear();
        }
        return this;
    }

    public BaseLocale getBaseLocale() {
        String language = _language;
        String script = _script;
        String region = _region;
        String variant = _variant;

        // Special private use subtag sequence identified by "lvariant" will be
        // interpreted as Java variant.
        if (_extensions != null) {
            String privuse = _extensions.get(PRIVUSE_KEY);
            if (privuse != null) {
                StringTokenIterator itr = new StringTokenIterator(privuse, LanguageTag.SEP);
                boolean sawPrefix = false;
                int privVarStart = -1;
                while (!itr.isDone()) {
                    if (sawPrefix) {
                        privVarStart = itr.currentStart();
                        break;
                    }
                    if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) {
                        sawPrefix = true;
                    }
                    itr.next();
                }
                if (privVarStart != -1) {
                    StringBuilder sb = new StringBuilder(variant);
                    if (sb.length() != 0) {
                        sb.append(BaseLocale.SEP);
                    }
                    sb.append(privuse.substring(privVarStart).replaceAll(LanguageTag.SEP, BaseLocale.SEP));
                    variant = sb.toString();
                }
            }
        }

        return BaseLocale.getInstance(language, script, region, variant);
    }

    public LocaleExtensions getLocaleExtensions() {
        if ((_extensions == null || _extensions.size() == 0)
                && (_uattributes == null || _uattributes.size() == 0)
                && (_ukeywords == null || _ukeywords.size() == 0)) {
            return LocaleExtensions.EMPTY_EXTENSIONS;
        }

        return new LocaleExtensions(_extensions, _uattributes, _ukeywords);
    }

    /*
     * Remove special private use subtag sequence identified by "lvariant"
     * and return the rest. Only used by LocaleExtensions
     */
    static String removePrivateuseVariant(String privuseVal) {
        StringTokenIterator itr = new StringTokenIterator(privuseVal, LanguageTag.SEP);

        // Note: privateuse value "abc-lvariant" is unchanged
        // because no subtags after "lvariant".

        int prefixStart = -1;
        boolean sawPrivuseVar = false;
        while (!itr.isDone()) {
            if (prefixStart != -1) {
                // Note: privateuse value "abc-lvariant" is unchanged
                // because no subtags after "lvariant".
                sawPrivuseVar = true;
                break;
            }
            if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) {
                prefixStart = itr.currentStart();
            }
            itr.next();
        }
        if (!sawPrivuseVar) {
            return privuseVal;
        }

        assert(prefixStart == 0 || prefixStart > 1);
        return (prefixStart == 0) ? null : privuseVal.substring(0, prefixStart -1);
    }

    /*
     * Check if the given variant subtags separated by the given
     * separator(s) are valid
     */
    private int checkVariants(String variants, String sep) {
        StringTokenIterator itr = new StringTokenIterator(variants, sep);
        while (!itr.isDone()) {
            String s = itr.current();
            if (!LanguageTag.isVariant(s)) {
                return itr.currentStart();
            }
            itr.next();
        }
        return -1;
    }

    /*
     * Private methods parsing Unicode Locale Extension subtags.
     * Duplicated attributes/keywords will be ignored.
     * The input must be a valid extension subtags (excluding singleton).
     */
    private void setUnicodeLocaleExtension(String subtags) {
        // wipe out existing attributes/keywords
        if (_uattributes != null) {
            _uattributes.clear();
        }
        if (_ukeywords != null) {
            _ukeywords.clear();
        }

        StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP);

        // parse attributes
        while (!itr.isDone()) {
            if (!UnicodeLocaleExtension.isAttribute(itr.current())) {
                break;
            }
            if (_uattributes == null) {
                _uattributes = new HashSet(4);
            }
            _uattributes.add(new CaseInsensitiveString(itr.current()));
            itr.next();
        }

        // parse keywords
        CaseInsensitiveString key = null;
        String type;
        int typeStart = -1;
        int typeEnd = -1;
        while (!itr.isDone()) {
            if (key != null) {
                if (UnicodeLocaleExtension.isKey(itr.current())) {
                    // next keyword - emit previous one
                    assert(typeStart == -1 || typeEnd != -1);
                    type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd);
                    if (_ukeywords == null) {
                        _ukeywords = new HashMap(4);
                    }
                    _ukeywords.put(key, type);

                    // reset keyword info
                    CaseInsensitiveString tmpKey = new CaseInsensitiveString(itr.current());
                    key = _ukeywords.containsKey(tmpKey) ? null : tmpKey;
                    typeStart = typeEnd = -1;
                } else {
                    if (typeStart == -1) {
                        typeStart = itr.currentStart();
                    }
                    typeEnd = itr.currentEnd();
                }
            } else if (UnicodeLocaleExtension.isKey(itr.current())) {
                // 1. first keyword or
                // 2. next keyword, but previous one was duplicate
                key = new CaseInsensitiveString(itr.current());
                if (_ukeywords != null && _ukeywords.containsKey(key)) {
                    // duplicate
                    key = null;
                }
            }

            if (!itr.hasNext()) {
                if (key != null) {
                    // last keyword
                    assert(typeStart == -1 || typeEnd != -1);
                    type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd);
                    if (_ukeywords == null) {
                        _ukeywords = new HashMap(4);
                    }
                    _ukeywords.put(key, type);
                }
                break;
            }

            itr.next();
        }
    }

    static class CaseInsensitiveString {
        private String _s;

        CaseInsensitiveString(String s) {
            _s = s;
        }

        public String value() {
            return _s;
        }

        @Override
        public int hashCode() {
            return AsciiUtil.toLowerString(_s).hashCode();
        }

        @Override
        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (!(obj instanceof CaseInsensitiveString)) {
                return false;
            }
            return AsciiUtil.caseIgnoreMatch(_s, ((CaseInsensitiveString)obj).value());
        }
    }

    static class CaseInsensitiveChar {
        private char _c;

        CaseInsensitiveChar(char c) {
            _c = c;
        }

        public char value() {
            return _c;
        }

        @Override
        public int hashCode() {
            return AsciiUtil.toLower(_c);
        }

        @Override
        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (!(obj instanceof CaseInsensitiveChar)) {
                return false;
            }
            return _c ==  AsciiUtil.toLower(((CaseInsensitiveChar)obj).value());
        }

    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy