All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.util.Region Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 77.1
Show newest version
/*
 *******************************************************************************
 * Copyright (C) 2011, International Business Machines Corporation             *
 * All Rights Reserved.                                                        *
 *******************************************************************************
 */
package com.ibm.icu.util;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

import com.ibm.icu.impl.ICUResourceBundle;

/**
 * Region is the class representing a Unicode Region Code, also known as a 
 * Unicode Region Subtag, which is defined based upon the BCP 47 standard. We often think of
 * "regions" as "countries" when defining the characteristics of a locale.  Region codes There are different
 * types of region codes that are important to distinguish.
 * 

* Macroregion - A code for a "macro geographical (continental) region, geographical sub-region, or * selected economic and other grouping" as defined in * UN M.49 (http://unstats.un.org/unsd/methods/m49/m49regin.htm). * These are typically 3-digit codes, but contain some 2-letter codes, such as the LDML code QO * added for Outlying Oceania. Not all UNM.49 codes are defined in LDML, but most of them are. * Macroregions are represented in ICU by one of three region types: WORLD ( region code 001 ), * CONTINENTS ( regions contained directly by WORLD ), and SUBCONTINENTS ( things contained directly * by a continent ). *

* TERRITORY - A Region that is not a Macroregion. These are typically codes for countries, but also * include areas that are not separate countries, such as the code "AQ" for Antarctica or the code * "HK" for Hong Kong (SAR China). Overseas dependencies of countries may or may not have separate * codes. The codes are typically 2-letter codes aligned with the ISO 3166 standard, but BCP47 allows * for the use of 3-digit codes in the future. *

* UNKNOWN - The code ZZ is defined by Unicode LDML for use to indicate that the Region is unknown, * or that the value supplied as a region was invalid. *

* DEPRECATED - Region codes that have been defined in the past but are no longer in modern usage, * usually due to a country splitting into multiple territories or changing its name. *

* GROUPING - A widely understood grouping of territories that has a well defined membership such * that a region code has been assigned for it. Some of these are UNM.49 codes that do't fall into * the world/continent/sub-continent hierarchy, while others are just well known groupings that have * their own region code. Region "EU" (European Union) is one such region code that is a grouping. * Groupings will never be returned by the getContainingRegion() API, since a different type of region * ( WORLD, CONTINENT, or SUBCONTINENT ) will always be the containing region instead. * * @author John Emmons * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public class Region implements Comparable { /** * RegionType is an enumeration defining the different types of regions. Current possible * values are WORLD, CONTINENT, SUBCONTINENT, TERRITORY, GROUPING, DEPRECATED, and UNKNOWN. * * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public enum RegionType { /** * Type representing the unknown region. * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ UNKNOWN, /** * Type representing a territory. * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ TERRITORY, /** * Type representing the whole world. * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ WORLD, /** * Type representing a continent. * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ CONTINENT, /** * Type representing a sub-continent. * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ SUBCONTINENT, /** * Type representing a grouping of territories that is not to be used in * the normal WORLD/CONTINENT/SUBCONTINENT/TERRITORY containment tree. * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ GROUPING, /** * Type representing a region whose code has been deprecated, usually * due to a country splitting into multiple territories or changing its name. * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ DEPRECATED, } /** * A constant used for unknown numeric region code. * @see #getNumericCode() * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public static final int UNDEFINED_NUMERIC_CODE = -1; private String id; private int code; private RegionType type; private static boolean hasData = false; private static boolean hasContainmentData = false; private static Map regionIndexMap = null; // Map from ID to position in the table private static Map numericIndexMap = null; // Map from numeric code to position in the table private static Map territoryAliasMap = null; // Aliases private static Map numericCodeMap = null; // Map of all possible IDs to numeric codes private static Region[] regions = null; private static BitSet[] subRegionData = null; private static Integer[] containingRegionData = null; private static ArrayList> availableRegions = null; private static final String UNKNOWN_REGION_ID = "ZZ"; private static final String WORLD_ID = "001"; /* * Private default constructor. Use factory methods only. */ private Region () {} /* * Initializes the region data from the ICU resource bundles. The region data * contains the basic relationships such as which regions are known, what the numeric * codes are, and any known aliases. It does not contain the territory containment data. * Territory containment data only gets loaded if someone calls an API that is actually * going to use that data. * * If the region data has already loaded, then this method simply returns without doing * anything meaningful. * */ private static synchronized void initRegionData() { if ( hasData ) { return; } territoryAliasMap = new HashMap(); numericCodeMap = new HashMap(); regionIndexMap = new HashMap(); numericIndexMap = new HashMap(); availableRegions = new ArrayList>(RegionType.values().length); for (int i = 0 ; i < RegionType.values().length ; i++) { availableRegions.add(null); } UResourceBundle regionCodes = null; UResourceBundle territoryAlias = null; UResourceBundle codeMappings = null; UResourceBundle worldContainment = null; UResourceBundle territoryContainment = null; UResourceBundle groupingContainment = null; UResourceBundle rb = UResourceBundle.getBundleInstance( ICUResourceBundle.ICU_BASE_NAME, "metadata", ICUResourceBundle.ICU_DATA_CLASS_LOADER); regionCodes = rb.get("regionCodes"); territoryAlias = rb.get("territoryAlias"); UResourceBundle rb2 = UResourceBundle.getBundleInstance( ICUResourceBundle.ICU_BASE_NAME, "supplementalData", ICUResourceBundle.ICU_DATA_CLASS_LOADER); codeMappings = rb2.get("codeMappings"); // Right now only fetch as much territory containment as we need in order to determine // types. Only fetch the rest if we have to. // territoryContainment = rb2.get("territoryContainment"); worldContainment = territoryContainment.get("001"); groupingContainment = territoryContainment.get("grouping"); String[] continentsArr = worldContainment.getStringArray(); List continents = Arrays.asList(continentsArr); String[] groupingArr = groupingContainment.getStringArray(); List groupings = Arrays.asList(groupingArr); // First put alias mappings for iso3 and numeric code mappings for ( int i = 0 ; i < codeMappings.getSize(); i++ ) { UResourceBundle mapping = codeMappings.get(i); if ( mapping.getType() == UResourceBundle.ARRAY ) { String [] codeStrings = mapping.getStringArray(); if ( !territoryAliasMap.containsKey(codeStrings[1])) { territoryAliasMap.put(codeStrings[1],codeStrings[0]); // Put alias from the numeric to the iso2 code } territoryAliasMap.put(codeStrings[2],codeStrings[0]); // Put alias from the iso3 to the iso2 code. numericCodeMap.put(codeStrings[0], Integer.valueOf(codeStrings[1])); // Create the mapping from the iso2 code to its numeric value } } for ( int i = 0 ; i < territoryAlias.getSize(); i++ ) { UResourceBundle res = territoryAlias.get(i); String key = res.getKey(); String value = res.getString(); if ( !territoryAliasMap.containsKey(key)) { territoryAliasMap.put(key, value); } } regions = new Region[regionCodes.getSize()]; for ( int i = 0 ; i < regions.length ; i++ ) { regions[i] = new Region(); String id = regionCodes.getString(i); regions[i].id = id; regionIndexMap.put(id, Integer.valueOf(i)); if ( id.matches("[0-9]{3}")) { regions[i].code = Integer.valueOf(id).intValue(); numericIndexMap.put(regions[i].code, Integer.valueOf(i)); } else if (numericCodeMap.containsKey(id)) { regions[i].code = numericCodeMap.get(id).intValue(); if ( !numericIndexMap.containsKey(regions[i].code)) { numericIndexMap.put(regions[i].code, Integer.valueOf(i)); } } else { regions[i].code = UNDEFINED_NUMERIC_CODE; } if ( territoryAliasMap.containsKey(id)){ regions[i].type = RegionType.DEPRECATED; } else if ( id.equals(WORLD_ID) ) { regions[i].type = RegionType.WORLD; } else if ( id.equals(UNKNOWN_REGION_ID) ) { regions[i].type = RegionType.UNKNOWN; } else if ( continents.contains(id) ) { regions[i].type = RegionType.CONTINENT; } else if ( groupings.contains(id) ) { regions[i].type = RegionType.GROUPING; } else if ( id.matches("[0-9]{3}|QO") ) { regions[i].type = RegionType.SUBCONTINENT; } else { regions[i].type = RegionType.TERRITORY; } } hasData = true; } /* * Initializes the containment data from the ICU resource bundles. The containment data * defines the relationships between different regions, such as which regions are contained * within other regions. * * Territory containment data only gets loaded if someone calls an API that is actually * going to use that data. Since you have to have the basic region data as well, this * method will attempt to load the basic region data if it hasn't been loaded already. * * If the containment data has already loaded, then this method simply returns without doing * anything meaningful. * */ private static synchronized void initContainmentData() { if ( hasContainmentData ) { return; } initRegionData(); subRegionData = new BitSet[regions.length]; containingRegionData = new Integer[regions.length]; for ( int i = 0 ; i < regions.length ; i++ ) { subRegionData[i] = new BitSet(regions.length); containingRegionData[i] = null; } UResourceBundle territoryContainment = null; UResourceBundle rb = UResourceBundle.getBundleInstance( ICUResourceBundle.ICU_BASE_NAME, "supplementalData", ICUResourceBundle.ICU_DATA_CLASS_LOADER); territoryContainment = rb.get("territoryContainment"); // Get territory containment info from the supplemental data. for ( int i = 0 ; i < territoryContainment.getSize(); i++ ) { UResourceBundle mapping = territoryContainment.get(i); String parent = mapping.getKey(); Integer parentRegionIndex = regionIndexMap.get(parent); for ( int j = 0 ; j < mapping.getSize(); j++ ) { String child = mapping.getString(j); Integer childRegionIndex = regionIndexMap.get(child); if ( parentRegionIndex != null && childRegionIndex != null ) { subRegionData[parentRegionIndex.intValue()].set(childRegionIndex.intValue()); // Set the containment bit for this pair // Regions of type GROUPING can't be set as the parent, since another region // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent. if ( !regions[parentRegionIndex].isOfType(RegionType.GROUPING)) { containingRegionData[childRegionIndex] = parentRegionIndex; } } } } hasContainmentData = true; } /** Returns a Region using the given region ID. The region ID can be either a 2-letter ISO code, * 3-letter ISO code, UNM.49 numeric code, or other valid Unicode Region Code as defined by the CLDR. * @param id The id of the region to be retrieved. * @return The corresponding region. * @throws NullPointerException if the supplied id is null. * @throws IllegalArgumentException if the supplied ID cannot be canonicalized to a Region ID that is known by ICU. * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public static Region get(String id) { if ( id == null ) { throw new NullPointerException(); } String canonicalID = canonicalize(id); if (canonicalID.equals(UNKNOWN_REGION_ID) && !id.equals(UNKNOWN_REGION_ID)) { throw new IllegalArgumentException("Unknown region id: " + id); } return regions[regionIndexMap.get(canonicalID)]; } /** Returns a Region using the given numeric code as defined by UNM.49 * @param code The numeric code of the region to be retrieved. * @return The corresponding region. * @throws IllegalArgumentException if the supplied numeric code is not recognized. * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public static Region get(int code) { Integer index = numericIndexMap.get(Integer.valueOf(code)); if ( index != null ) { Region r = regions[index]; // Since a deprecated region will have the same numeric code as its new region code // we get by id which will make sure we get the canonicalized one. return Region.get(r.id); } else { throw new IllegalArgumentException("Unknown region code: " + code); } } /** Returns the canonicalized (preferred) form of the Region code. For territories, it will * convert the string to the 2-letter ISO 3166 code if at all possible, and will convert any * known aliases to their modern counterparts. * * @param id The string representing the region code to be canonicalized. * @return The canonicalized (preferred) form of the region code. If the supplied region * code is not recognized, the unknown region ( code "ZZ" ) is returned. * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public static String canonicalize(String id) { initRegionData(); String result = territoryAliasMap.get(id); if ( result != null && regionIndexMap.containsKey(result)) { return result; } if ( regionIndexMap.containsKey(id)) { return id; } return UNKNOWN_REGION_ID; } /** Returns true if the supplied region code is already in its canonical ( preferred ) form. * * @param id The string representing the region code to be checked. * @return TRUE if the supplied region code is canonical, FALSE otherwise. * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public static boolean isCanonical(String id) { return ( canonicalize(id).equals(id)); } /** Used to retrieve all available regions of a specific type. * * @param type The type of regions to be returned ( TERRITORY, MACROREGION, etc. ) * @return An unmodifiable set of all known regions that match the given type. * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public static Set getAvailable(RegionType type) { initRegionData(); if ( availableRegions.get(type.ordinal()) == null) { Set result = new TreeSet(); for ( Region r : regions ) { if ( r.type == type ) { result.add(r); } } availableRegions.set(type.ordinal(), Collections.unmodifiableSet(result)); } return availableRegions.get(type.ordinal()); } /** Used to determine the macroregion that geographically contains this region. * * @return The region that geographically contains this region. Returns NULL if this region is * code "001" (World) or "ZZ" (Unknown region). For example, calling this method with region "IT" (Italy) * returns the region "039" (Southern Europe). * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public Region getContainingRegion() { initContainmentData(); Integer index = regionIndexMap.get(id); assert(index!=null); if ( containingRegionData[index] == null ) { return null; } else { return regions[containingRegionData[index]]; } } /** Used to determine the sub-regions that are contained within this region. * * @return An unmodifiable set containing all the regions that are immediate children * of this region in the region hierarchy. These returned regions could be either macro * regions, territories, or a mixture of the two, depending on the containment data as defined * in CLDR. This API may return an empty set if this region doesn't have any sub-regions. * For example, calling this method with region "150" (Europe) returns a set containing * the various sub regions of Europe - "039" (Southern Europe) - "151" (Eastern Europe) * - "154" (Northern Europe) and "155" (Western Europe). * * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public Set getSubRegions() { initContainmentData(); Set result = new TreeSet(); Integer index = regionIndexMap.get(id); BitSet contains = subRegionData[index]; for( int i = contains.nextSetBit(0); i>=0; i=contains.nextSetBit(i+1)) { result.add(regions[i]); } return Collections.unmodifiableSet(result); } /** Used to determine all the territories that are contained within this region. * * @return An unmodifiable set containing all the territories that are children of this * region anywhere in the region hierarchy. If this region is already a territory, * the empty set is returned, since territories by definition do not contain other regions. * For example, calling this method with region "150" (Europe) returns a set containing all * the territories in Europe ( "FR" (France) - "IT" (Italy) - "DE" (Germany) etc. ) * * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public Set getContainedTerritories() { initContainmentData(); Set result = new TreeSet(); Set subRegions = getSubRegions(); Iterator it = subRegions.iterator(); while ( it.hasNext() ) { Region r = it.next(); if ( r.isOfType(RegionType.TERRITORY) ) { result.add(r); } else if ( r.isOfType(RegionType.CONTINENT) || r.isOfType(RegionType.SUBCONTINENT)) { result.addAll(r.getContainedTerritories()); // Recursion!!! } } return Collections.unmodifiableSet(result); } /** Returns the string representation of this region * * @return The string representation of this region, which is its canonical ID. * * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public String toString() { return id; } /** Returns the numeric code for this region * * @return The numeric code for this region. Returns UNDEFINED_NUMERIC_CODE (-1) if the * given region does not have a numeric code assigned to it. This is a very rare case and * only occurs for a few very small territories. * * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public int getNumericCode() { return code; } /** Returns this region's type. * * @return This region's type classification, such as MACROREGION or TERRITORY. * * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public RegionType getType() { return type; } /** Checks to see if this region is of a specific type. * * @return Returns TRUE if this region matches the supplied type. * * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public boolean isOfType(RegionType type) { return this.type.equals(type); } /** * {@inheritDoc} * @internal ICU 4.8 technology preview * @deprecated This API might change or be removed in a future release. */ public int compareTo(Region other) { return id.compareTo(other.id); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy