All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.util.Region Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

The newest version!
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 2011-2016, International Business Machines Corporation
 * All Rights Reserved.
 *******************************************************************************
 */
package com.ibm.icu.util;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;

/**
 * Region is the class representing a Unicode Region Code, also known as a 
 * Unicode Region Subtag, which is defined based upon the BCP 47 standard. We often think of
 * "regions" as "countries" when defining the characteristics of a locale.  Region codes There are different
 * types of region codes that are important to distinguish.
 * 

* Macroregion - A code for a "macro geographical (continental) region, geographical sub-region, or * selected economic and other grouping" as defined in * UN M.49 (http://unstats.un.org/unsd/methods/m49/m49regin.htm). * These are typically 3-digit codes, but contain some 2-letter codes, such as the LDML code QO * added for Outlying Oceania. Not all UNM.49 codes are defined in LDML, but most of them are. * Macroregions are represented in ICU by one of three region types: WORLD ( region code 001 ), * CONTINENTS ( regions contained directly by WORLD ), and SUBCONTINENTS ( things contained directly * by a continent ). *

* TERRITORY - A Region that is not a Macroregion. These are typically codes for countries, but also * include areas that are not separate countries, such as the code "AQ" for Antarctica or the code * "HK" for Hong Kong (SAR China). Overseas dependencies of countries may or may not have separate * codes. The codes are typically 2-letter codes aligned with the ISO 3166 standard, but BCP47 allows * for the use of 3-digit codes in the future. *

* UNKNOWN - The code ZZ is defined by Unicode LDML for use to indicate that the Region is unknown, * or that the value supplied as a region was invalid. *

* DEPRECATED - Region codes that have been defined in the past but are no longer in modern usage, * usually due to a country splitting into multiple territories or changing its name. *

* GROUPING - A widely understood grouping of territories that has a well defined membership such * that a region code has been assigned for it. Some of these are UNM.49 codes that do't fall into * the world/continent/sub-continent hierarchy, while others are just well known groupings that have * their own region code. Region "EU" (European Union) is one such region code that is a grouping. * Groupings will never be returned by the getContainingRegion() API, since a different type of region * ( WORLD, CONTINENT, or SUBCONTINENT ) will always be the containing region instead. * * @author John Emmons * @stable ICU 50 */ public class Region implements Comparable { /** * RegionType is an enumeration defining the different types of regions. Current possible * values are WORLD, CONTINENT, SUBCONTINENT, TERRITORY, GROUPING, DEPRECATED, and UNKNOWN. * * @stable ICU 50 */ public enum RegionType { /** * Type representing the unknown region. * @stable ICU 50 */ UNKNOWN, /** * Type representing a territory. * @stable ICU 50 */ TERRITORY, /** * Type representing the whole world. * @stable ICU 50 */ WORLD, /** * Type representing a continent. * @stable ICU 50 */ CONTINENT, /** * Type representing a sub-continent. * @stable ICU 50 */ SUBCONTINENT, /** * Type representing a grouping of territories that is not to be used in * the normal WORLD/CONTINENT/SUBCONTINENT/TERRITORY containment tree. * @stable ICU 50 */ GROUPING, /** * Type representing a region whose code has been deprecated, usually * due to a country splitting into multiple territories or changing its name. * @stable ICU 50 */ DEPRECATED, } private String id; private int code; private RegionType type; private Region containingRegion = null; private Set containedRegions = new TreeSet(); private List preferredValues = null; private static boolean regionDataIsLoaded = false; private static Map regionIDMap = null; // Map from ID the regions private static Map numericCodeMap = null; // Map from numeric code to the regions private static Map regionAliases = null; // Aliases private static ArrayList regions = null; // This is the main data structure where the Regions are stored. private static ArrayList> availableRegions = null; private static final String UNKNOWN_REGION_ID = "ZZ"; private static final String OUTLYING_OCEANIA_REGION_ID = "QO"; private static final String WORLD_ID = "001"; /* * Private default constructor. Use factory methods only. */ private Region () {} /* * Initializes the region data from the ICU resource bundles. The region data * contains the basic relationships such as which regions are known, what the numeric * codes are, any known aliases, and the territory containment data. * * If the region data has already loaded, then this method simply returns without doing * anything meaningful. * */ private static synchronized void loadRegionData() { if ( regionDataIsLoaded ) { return; } regionAliases = new HashMap(); regionIDMap = new HashMap(); numericCodeMap = new HashMap(); availableRegions = new ArrayList>(RegionType.values().length); UResourceBundle metadataAlias = null; UResourceBundle territoryAlias = null; UResourceBundle codeMappings = null; UResourceBundle idValidity = null; UResourceBundle regionList = null; UResourceBundle regionRegular = null; UResourceBundle regionMacro = null; UResourceBundle regionUnknown = null; UResourceBundle worldContainment = null; UResourceBundle territoryContainment = null; UResourceBundle groupingContainment = null; UResourceBundle metadata = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"metadata",ICUResourceBundle.ICU_DATA_CLASS_LOADER); metadataAlias = metadata.get("alias"); territoryAlias = metadataAlias.get("territory"); UResourceBundle supplementalData = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"supplementalData", ICUResourceBundle.ICU_DATA_CLASS_LOADER); codeMappings = supplementalData.get("codeMappings"); idValidity = supplementalData.get("idValidity"); regionList = idValidity.get("region"); regionRegular = regionList.get("regular"); regionMacro = regionList.get("macroregion"); regionUnknown = regionList.get("unknown"); territoryContainment = supplementalData.get("territoryContainment"); worldContainment = territoryContainment.get("001"); groupingContainment = territoryContainment.get("grouping"); String[] continentsArr = worldContainment.getStringArray(); List continents = Arrays.asList(continentsArr); Enumeration groupings = groupingContainment.getKeys(); List regionCodes = new ArrayList(); List allRegions = new ArrayList(); allRegions.addAll(Arrays.asList(regionRegular.getStringArray())); allRegions.addAll(Arrays.asList(regionMacro.getStringArray())); allRegions.add(regionUnknown.getString()); for ( String r : allRegions ) { int rangeMarkerLocation = r.indexOf("~"); if ( rangeMarkerLocation > 0 ) { StringBuilder regionName = new StringBuilder(r); char endRange = regionName.charAt(rangeMarkerLocation+1); regionName.setLength(rangeMarkerLocation); char lastChar = regionName.charAt(rangeMarkerLocation-1); while ( lastChar <= endRange ) { String newRegion = regionName.toString(); regionCodes.add(newRegion); lastChar++; regionName.setCharAt(rangeMarkerLocation-1,lastChar); } } else { regionCodes.add(r); } } regions = new ArrayList(regionCodes.size()); // First process the region codes and create the primary array of regions. for ( String id : regionCodes) { Region r = new Region(); r.id = id; r.type = RegionType.TERRITORY; // Only temporary - figure out the real type later once the aliases are known. regionIDMap.put(id, r); if ( id.matches("[0-9]{3}")) { r.code = Integer.valueOf(id).intValue(); numericCodeMap.put(r.code, r); r.type = RegionType.SUBCONTINENT; } else { r.code = -1; } regions.add(r); } // Process the territory aliases for ( int i = 0 ; i < territoryAlias.getSize(); i++ ) { UResourceBundle res = territoryAlias.get(i); String aliasFrom = res.getKey(); String aliasTo = res.get("replacement").getString(); if ( regionIDMap.containsKey(aliasTo) && !regionIDMap.containsKey(aliasFrom) ) { // This is just an alias from some string to a region regionAliases.put(aliasFrom, regionIDMap.get(aliasTo)); } else { Region r; if ( regionIDMap.containsKey(aliasFrom) ) { // This is a deprecated region r = regionIDMap.get(aliasFrom); } else { // Deprecated region code not in the primary codes list - so need to create a deprecated region for it. r = new Region(); r.id = aliasFrom; regionIDMap.put(aliasFrom, r); if ( aliasFrom.matches("[0-9]{3}")) { r.code = Integer.valueOf(aliasFrom).intValue(); numericCodeMap.put(r.code, r); } else { r.code = -1; } regions.add(r); } r.type = RegionType.DEPRECATED; List aliasToRegionStrings = Arrays.asList(aliasTo.split(" ")); r.preferredValues = new ArrayList(); for ( String s : aliasToRegionStrings ) { if (regionIDMap.containsKey(s)) { r.preferredValues.add(regionIDMap.get(s)); } } } } // Process the code mappings - This will allow us to assign numeric codes to most of the territories. for ( int i = 0 ; i < codeMappings.getSize(); i++ ) { UResourceBundle mapping = codeMappings.get(i); if ( mapping.getType() == UResourceBundle.ARRAY ) { String [] codeMappingStrings = mapping.getStringArray(); String codeMappingID = codeMappingStrings[0]; Integer codeMappingNumber = Integer.valueOf(codeMappingStrings[1]); String codeMapping3Letter = codeMappingStrings[2]; if ( regionIDMap.containsKey(codeMappingID)) { Region r = regionIDMap.get(codeMappingID); r.code = codeMappingNumber.intValue(); numericCodeMap.put(r.code, r); regionAliases.put(codeMapping3Letter, r); } } } // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS Region r; if ( regionIDMap.containsKey(WORLD_ID)) { r = regionIDMap.get(WORLD_ID); r.type = RegionType.WORLD; } if ( regionIDMap.containsKey(UNKNOWN_REGION_ID)) { r = regionIDMap.get(UNKNOWN_REGION_ID); r.type = RegionType.UNKNOWN; } for ( String continent : continents ) { if (regionIDMap.containsKey(continent)) { r = regionIDMap.get(continent); r.type = RegionType.CONTINENT; } } while ( groupings.hasMoreElements() ) { String grouping = groupings.nextElement(); if (regionIDMap.containsKey(grouping)) { r = regionIDMap.get(grouping); r.type = RegionType.GROUPING; } } // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR // even though it looks like a territory code. Need to handle it here. if ( regionIDMap.containsKey(OUTLYING_OCEANIA_REGION_ID)) { r = regionIDMap.get(OUTLYING_OCEANIA_REGION_ID); r.type = RegionType.SUBCONTINENT; } // Load territory containment info from the supplemental data. for ( int i = 0 ; i < territoryContainment.getSize(); i++ ) { UResourceBundle mapping = territoryContainment.get(i); String parent = mapping.getKey(); if (parent.equals("containedGroupings") || parent.equals("deprecated") || parent.equals("grouping")) { continue; // handle new pseudo-parent types added in ICU data per cldrbug 7808; for now just skip. // #11232 is to do something useful with these. // Also skip "grouping" which has multi-level structure below from CLDR 34. } Region parentRegion = regionIDMap.get(parent); for ( int j = 0 ; j < mapping.getSize(); j++ ) { String child = mapping.getString(j); Region childRegion = regionIDMap.get(child); if ( parentRegion != null && childRegion != null ) { // Add the child region to the set of regions contained by the parent parentRegion.containedRegions.add(childRegion); // Set the parent region to be the containing region of the child. // Regions of type GROUPING can't be set as the parent, since another region // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent. if ( parentRegion.getType() != RegionType.GROUPING) { childRegion.containingRegion = parentRegion; } } } } // Fill in the grouping containment resource as well for ( int i = 0 ; i < groupingContainment.getSize(); i++ ) { UResourceBundle mapping = groupingContainment.get(i); String parent = mapping.getKey(); Region parentRegion = regionIDMap.get(parent); for ( int j = 0 ; j < mapping.getSize(); j++ ) { String child = mapping.getString(j); Region childRegion = regionIDMap.get(child); if ( parentRegion != null && childRegion != null ) { // Add the child region to the set of regions contained by the parent parentRegion.containedRegions.add(childRegion); // Do NOT change the parent of the child region, since groupings are // never the primary parent of a region. } } } // Create the availableRegions lists for (int i = 0 ; i < RegionType.values().length ; i++) { availableRegions.add(new TreeSet()); } for ( Region ar : regions ) { Set currentSet = availableRegions.get(ar.type.ordinal()); currentSet.add(ar); availableRegions.set(ar.type.ordinal(),currentSet); } regionDataIsLoaded = true; } /** Returns a Region using the given region ID. The region ID can be either a 2-letter ISO code, * 3-letter ISO code, UNM.49 numeric code, or other valid Unicode Region Code as defined by the CLDR. * @param id The id of the region to be retrieved. * @return The corresponding region. * @throws NullPointerException if the supplied id is null. * @throws IllegalArgumentException if the supplied ID cannot be canonicalized to a Region ID that is known by ICU. * @stable ICU 50 */ public static Region getInstance(String id) { if ( id == null ) { throw new NullPointerException(); } loadRegionData(); Region r = regionIDMap.get(id); if ( r == null ) { r = regionAliases.get(id); } if ( r == null ) { throw new IllegalArgumentException("Unknown region id: " + id); } if ( r.type == RegionType.DEPRECATED && r.preferredValues.size() == 1) { r = r.preferredValues.get(0); } return r; } /** Returns a Region using the given numeric code as defined by UNM.49 * @param code The numeric code of the region to be retrieved. * @return The corresponding region. * @throws IllegalArgumentException if the supplied numeric code is not recognized. * @stable ICU 50 */ public static Region getInstance(int code) { loadRegionData(); Region r = numericCodeMap.get(code); if ( r == null ) { // Just in case there's an alias that's numeric, try to find it. String pad = ""; if ( code < 10 ) { pad = "00"; } else if ( code < 100 ) { pad = "0"; } String id = pad + Integer.toString(code); r = regionAliases.get(id); } if ( r == null ) { throw new IllegalArgumentException("Unknown region code: " + code); } if ( r.type == RegionType.DEPRECATED && r.preferredValues.size() == 1) { r = r.preferredValues.get(0); } return r; } /** Used to retrieve all available regions of a specific type. * * @param type The type of regions to be returned ( TERRITORY, MACROREGION, etc. ) * @return An unmodifiable set of all known regions that match the given type. * @stable ICU 50 */ public static Set getAvailable(RegionType type) { loadRegionData(); return Collections.unmodifiableSet(availableRegions.get(type.ordinal())); } /** Used to determine the macroregion that geographically contains this region. * * @return The region that geographically contains this region. Returns NULL if this region is * code "001" (World) or "ZZ" (Unknown region). For example, calling this method with region "IT" (Italy) * returns the region "039" (Southern Europe). * @stable ICU 50 */ public Region getContainingRegion() { loadRegionData(); return containingRegion; } /** Used to determine the macroregion that geographically contains this region and that matches the given type. * * @return The region that geographically contains this region and matches the given type. May return NULL if * no containing region can be found that matches the given type. For example, calling this method with region "IT" (Italy) * and type CONTINENT returns the region "150" (Europe). * @stable ICU 50 */ public Region getContainingRegion(RegionType type) { loadRegionData(); if ( containingRegion == null ) { return null; } if ( containingRegion.type.equals(type)) { return containingRegion; } else { return containingRegion.getContainingRegion(type); } } /** Used to determine the sub-regions that are contained within this region. * * @return An unmodifiable set containing all the regions that are immediate children * of this region in the region hierarchy. These returned regions could be either macro * regions, territories, or a mixture of the two, depending on the containment data as defined * in CLDR. This API may return an empty set if this region doesn't have any sub-regions. * For example, calling this method with region "150" (Europe) returns a set containing * the various sub regions of Europe - "039" (Southern Europe) - "151" (Eastern Europe) * - "154" (Northern Europe) and "155" (Western Europe). * * @stable ICU 50 */ public Set getContainedRegions() { loadRegionData(); return Collections.unmodifiableSet(containedRegions); } /** Used to determine all the regions that are contained within this region and that match the given type * * @return An unmodifiable set containing all the regions that are children of this region * anywhere in the region hierarchy and match the given type. This API may return an empty set * if this region doesn't have any sub-regions that match the given type. * For example, calling this method with region "150" (Europe) and type "TERRITORY" returns a set * containing all the territories in Europe ( "FR" (France) - "IT" (Italy) - "DE" (Germany) etc. ) * @stable ICU 50 */ public Set getContainedRegions(RegionType type) { loadRegionData(); Set result = new TreeSet(); Set cr = getContainedRegions(); for ( Region r : cr ) { if ( r.getType() == type ) { result.add(r); } else { result.addAll(r.getContainedRegions(type)); } } return Collections.unmodifiableSet(result); } /** * @return For deprecated regions, return an unmodifiable list of the regions that are the preferred replacement regions for this region. * Returns null for a non-deprecated region. For example, calling this method with region "SU" (Soviet Union) would * return a list of the regions containing "RU" (Russia), "AM" (Armenia), "AZ" (Azerbaijan), etc... * * @stable ICU 50 */ public List getPreferredValues() { loadRegionData(); if ( type == RegionType.DEPRECATED) { return Collections.unmodifiableList(preferredValues); } else { return null; } } /** * @return Returns true if this region contains the supplied other region anywhere in the region hierarchy. * * @stable ICU 50 */ public boolean contains(Region other) { loadRegionData(); if (containedRegions.contains(other)) { return true; } else { for (Region cr : containedRegions) { if (cr.contains(other)) { return true; } } } return false; } /** Returns the string representation of this region * * @return The string representation of this region, which is its ID. * * @stable ICU 50 */ public String toString() { return id; } /** * Returns the numeric code for this region * * @return The numeric code for this region. Returns a negative value if the given region does not have a numeric * code assigned to it. This is a very rare case and only occurs for a few very small territories. * * @stable ICU 50 */ public int getNumericCode() { return code; } /** Returns this region's type. * * @return This region's type classification, such as MACROREGION or TERRITORY. * * @stable ICU 50 */ public RegionType getType() { return type; } /** * {@inheritDoc} * @stable ICU 50 */ public int compareTo(Region other) { return id.compareTo(other.id); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy