All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.abdera.i18n.rfc4646.Lang Maven / Gradle / Ivy

There is a newer version: 1.1.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  The ASF licenses this file to You
 * under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.  For additional information regarding
 * copyright in this work, please see the NOTICE file in the top level
 * directory of this distribution.
 */
package org.apache.abdera.i18n.rfc4646;

import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.abdera.i18n.rfc4646.Subtag.Type;

/**
 * Implementation of RFC 4646 Language Tags
 */
public final class Lang extends SubtagSet {

    private static final long serialVersionUID = -7095560018906537331L;
    private final Locale locale;

    /**
     * Create a Lang object using the default locale
     */
    public Lang() {
        this(init(Locale.getDefault()));
    }

    /**
     * Create a Lang object using the specified locale
     */
    public Lang(Locale locale) {
        this(init(locale));
    }

    private static Subtag init(Locale locale) {
        try {
            return parse(locale.toString()).primary;
        } catch (Exception e) {
            Subtag c = null, primary = new Subtag(Type.PRIMARY, locale.getLanguage());
            String country = locale.getCountry();
            String variant = locale.getVariant();
            if (country != null)
                c = new Subtag(Type.REGION, country, primary);
            if (variant != null)
                new Subtag(Type.VARIANT, variant, c);
            return primary;
        }
    }

    /**
     * Create a lang object
     */
    public Lang(String lang) {
        this(parse(lang).primary);
    }

    Lang(Subtag primary) {
        super(primary);
        this.locale = initLocale();
    }

    private Locale initLocale() {
        Subtag primary = getLanguage();
        Subtag region = getRegion();
        Subtag variant = getVariant();
        if (variant != null && region != null)
            return new Locale(primary.toString(), region.toString(), variant.toString());
        else if (region != null)
            return new Locale(primary.toString(), region.toString());
        else
            return new Locale(primary.toString());
    }

    /**
     * Get the Language subtag
     */
    public Subtag getLanguage() {
        return primary;
    }

    /**
     * Get a Locale object derived from this language tag
     */
    public Locale getLocale() {
        return locale;
    }

    /**
     * Get the Extlang tag. If there are multiple extlang tags, this will return the first one. The rest can be
     * retrieved by following Subtag.getNext()
     */
    public Subtag getExtLang() {
        for (Subtag subtag : this) {
            switch (subtag.getType()) {
                case PRIMARY:
                    break;
                case EXTLANG:
                    return subtag;
                default:
                    return null;
            }
        }
        return null;
    }

    /**
     * Get the Script subtag
     */
    public Subtag getScript() {
        for (Subtag subtag : this) {
            switch (subtag.getType()) {
                case PRIMARY:
                case EXTLANG:
                    break;
                case SCRIPT:
                    return subtag;
                default:
                    return null;
            }
        }
        return null;
    }

    /**
     * Get the Region subtag
     */
    public Subtag getRegion() {
        for (Subtag subtag : this) {
            switch (subtag.getType()) {
                case PRIMARY:
                case EXTLANG:
                case SCRIPT:
                    break;
                case REGION:
                    return subtag;
                default:
                    return null;
            }
        }
        return null;
    }

    /**
     * Get the Variant subtag
     */
    public Subtag getVariant() {
        for (Subtag subtag : this) {
            switch (subtag.getType()) {
                case PRIMARY:
                case EXTLANG:
                case SCRIPT:
                case REGION:
                    break;
                case VARIANT:
                    return subtag;
                default:
                    return null;
            }
        }
        return null;
    }

    /**
     * Get the beginning of the extension section. This will return the first prefix subtag of the first set of
     * extension subtags.
     */
    public Subtag getExtension() {
        for (Subtag subtag : this) {
            switch (subtag.getType()) {
                case PRIMARY:
                case EXTLANG:
                case SCRIPT:
                case REGION:
                case VARIANT:
                    break;
                case EXTENSION:
                    return subtag.getPrevious();
                default:
                    return null;
            }
        }
        return null;
    }

    /**
     * Get the beginning of the private-use section. This will return the x prefix subtag
     */
    public Subtag getPrivateUse() {
        for (Subtag subtag : this) {
            switch (subtag.getType()) {
                case PRIMARY:
                case EXTLANG:
                case SCRIPT:
                case VARIANT:
                case REGION:
                case EXTENSION:
                    break;
                case PRIVATEUSE:
                    return subtag.getPrevious();
                default:
                    return null;
            }
        }
        return null;
    }

    /**
     * Get this Lang as a Language-Range for use with matching
     */
    public Range asRange() {
        return new Range(toString());
    }

    /**
     * Clone this Language tag
     */
    public Lang clone() {
        return new Lang(primary.clone());
    }

    /**
     * Produce a canonicalized copy of this lang tag
     */
    public Lang canonicalize() {
        Subtag primary = null, current = null;
        int p = -1, t = -1;
        List tags = new LinkedList();
        for (Subtag tag : this) {
            tags.add(tag);
        }
        List ext = new LinkedList();
        for (Subtag tag : tags) {
            if (tag.getType() == Subtag.Type.SINGLETON) {
                if (!tag.getName().equalsIgnoreCase("x")) {
                    ext.add(tag);
                }
            }
        }
        if (ext.size() > 0) {
            p = tags.indexOf(ext.get(0));
            t = tags.indexOf(ext.get(ext.size() - 1));
        }
        Collections.sort(ext, new Comparator() {
            public int compare(Subtag o1, Subtag o2) {
                return o1.getName().compareTo(o2.getName());
            }
        });

        List extchain = new LinkedList();
        for (Subtag tag : ext) {
            extchain.add(tag);
            current = tag.getNext();
            while (current != null && current.getType() == Subtag.Type.EXTENSION) {
                extchain.add(current);
                current = current.getNext();
            }
        }
        List result = new LinkedList();
        result.addAll(tags.subList(0, p));
        result.addAll(extchain);
        result.addAll(tags.subList(t + 2, tags.size()));

        current = null;
        for (Subtag tag : result) {
            tag = tag.canonicalize();
            if (primary == null) {
                primary = tag;
                current = primary;
            } else {
                current.setNext(tag);
                current = tag;
            }
        }
        return new Lang(primary);
    }

    /**
     * Return true if this lang tag contains any deprecated subtags
     */
    public boolean isDeprecated() {
        for (Subtag tag : this)
            if (tag.isDeprecated())
                return true;
        return false;
    }

    /**
     * Get a Lang tag that drops the last subtag
     */
    public Lang getParent() {
        Lang lang = clone();
        Subtag last = null;
        for (Subtag tag : lang)
            last = tag;
        if (last.getPrevious() == null)
            return null;
        last.getPrevious().setNext(null);
        return lang;
    }

    /**
     * Return true if the specified lang tag is the parent of this one
     */
    public boolean isChildOf(Lang lang) {
        Range range = new Range(lang).appendWildcard();
        return range.matches(this);
    }

    /**
     * Return true if the specified lang tag is the child of this one
     */
    public boolean isParentOf(Lang lang) {
        return lang.isChildOf(this);
    }

    // Parsing Logic

    private static final String language = "((?:[a-zA-Z]{2,3}(?:[-_][a-zA-Z]{3}){0,3})|[a-zA-Z]{4}|[a-zA-Z]{5,8})";
    private static final String script = "((?:[-_][a-zA-Z]{4})?)";
    private static final String region = "((?:[-_](?:(?:[a-zA-Z]{2})|(?:[0-9]{3})))?)";
    private static final String variant = "((?:[-_](?:(?:[a-zA-Z0-9]{5,8})|(?:[0-9][a-zA-Z0-9]{3})))*)";
    private static final String extension = "((?:[-_][a-wy-zA-WY-Z0-9](?:[-_][a-zA-Z0-9]{2,8})+)*)";
    private static final String privateuse = "[xX](?:[-_][a-zA-Z0-9]{2,8})+";
    private static final String _privateuse = "((?:[-_]" + privateuse + ")?)";
    private static final String grandfathered =
        "^(?:art[-_]lojban|cel[-_]gaulish|en[-_]GB[-_]oed|i[-_]ami|i[-_]bnn|i[-_]default|i[-_]enochian|i[-_]hak|i[-_]klingon|i[-_]lux|i[-_]mingo|i[-_]navajo|i[-_]pwn|i[-_]tao||i[-_]tay|i[-_]tsu|no[-_]bok|no[-_]nyn|sgn[-_]BE[-_]fr|sgn[-_]BE[-_]nl|sgn[-_]CH[-_]de|zh[-_]cmn|zh[-_]cmn[-_]Hans|zh[-_]cmn[-_]Hant|zh[-_]gan|zh[-_]guoyu|zh[-_]hakka|zh[-_]min|zh[-_]min[-_]nan|zh[-_]wuu|zh[-_]xiang|zh[-_]yue)$";
    private static final String langtag = "^" + language + script + region + variant + extension + _privateuse + "$";

    private static final Pattern p_langtag = Pattern.compile(langtag);
    private static final Pattern p_privateuse = Pattern.compile("^" + privateuse + "$");
    private static final Pattern p_grandfathered = Pattern.compile(grandfathered);

    /**
     * Parse a Lang tag
     */
    public static Lang parse(String lang) {
        Subtag primary = null;
        Matcher m = p_grandfathered.matcher(lang);
        if (m.find()) {
            String[] tags = lang.split("[-_]");
            Subtag current = null;
            for (String tag : tags) {
                if (current == null) {
                    primary = new Subtag(Type.GRANDFATHERED, tag, null);
                    current = primary;
                } else {
                    current = new Subtag(Type.GRANDFATHERED, tag, current);
                }
            }
            return new Lang(primary);
        }
        m = p_privateuse.matcher(lang);
        if (m.find()) {
            String[] tags = lang.split("[-_]");
            Subtag current = null;
            for (String tag : tags) {
                if (current == null) {
                    primary = new Subtag(Type.SINGLETON, tag, null);
                    current = primary;
                } else {
                    current = new Subtag(Type.PRIVATEUSE, tag, current);
                }
            }
            return new Lang(primary);
        }
        m = p_langtag.matcher(lang);
        if (m.find()) {
            String langtag = m.group(1);
            String script = m.group(2);
            String region = m.group(3);
            String variant = m.group(4);
            String extension = m.group(5);
            String privateuse = m.group(6);
            Subtag current = null;
            String[] tags = langtag.split("[-_]");
            for (String tag : tags) {
                if (current == null) {
                    primary = new Subtag(Type.PRIMARY, tag);
                    current = primary;
                } else {
                    current = new Subtag(Type.EXTLANG, tag, current);
                }
            }
            if (script != null && script.length() > 0)
                current = new Subtag(Type.SCRIPT, script.substring(1), current);
            if (region != null && region.length() > 0)
                current = new Subtag(Type.REGION, region.substring(1), current);
            if (variant != null && variant.length() > 0) {
                variant = variant.substring(1);
                tags = variant.split("-");
                for (String tag : tags)
                    current = new Subtag(Type.VARIANT, tag, current);
            }
            if (extension != null && extension.length() > 0) {
                extension = extension.substring(1);
                tags = extension.split("-");
                current = new Subtag(Type.SINGLETON, tags[0], current);
                for (int i = 1; i < tags.length; i++) {
                    String tag = tags[i];
                    current = new Subtag(tag.length() == 1 ? Type.SINGLETON : Type.EXTENSION, tag, current);
                }
            }
            if (privateuse != null && privateuse.length() > 0) {
                privateuse = privateuse.substring(1);
                tags = privateuse.split("-");
                current = new Subtag(Type.SINGLETON, tags[0], current);
                for (int i = 1; i < tags.length; i++) {
                    current = new Subtag(Type.PRIVATEUSE, tags[i], current);
                }
            }
            return new Lang(primary);
        }
        throw new IllegalArgumentException();
    }

    public static String fromLocale(Locale locale) {
        return new Lang(locale).toString();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy