commons.validator.routines.DomainValidator Maven / Gradle / Ivy
Show all versions of android-saripaar Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package commons.validator.routines;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Arrays;
import java.util.Locale;
/**
* Domain name validation routines.
*
*
* This validator provides methods for validating Internet domain names
* and top-level domains.
*
*
* Domain names are evaluated according
* to the standards RFC1034,
* section 3, and RFC1123,
* section 2.1. No accommodation is provided for the specialized needs of
* other applications; if the domain name has been URL-encoded, for example,
* validation will fail even though the equivalent plaintext version of the
* same name would have passed.
*
*
*
* Validation is also provided for top-level domains (TLDs) as defined and
* maintained by the Internet Assigned Numbers Authority (IANA):
*
*
*
* - {@link #isValidInfrastructureTld} - validates infrastructure TLDs
* (
.arpa
, etc.)
* - {@link #isValidGenericTld} - validates generic TLDs
* (
.com, .org
, etc.)
* - {@link #isValidCountryCodeTld} - validates country code TLDs
* (
.us, .uk, .cn
, etc.)
*
*
*
* (NOTE: This class does not provide IP address lookup for domain names or
* methods to ensure that a given domain name matches a specific IP; see
* {@link java.net.InetAddress} for that functionality.)
*
*
* @version $Revision$
* @since Validator 1.4
*/
public class DomainValidator implements Serializable {
private static final long serialVersionUID = -4407125112880174009L;
// Regular expression strings for hostnames (derived from RFC2396 and RFC 1123)
// RFC2396: domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
// Max 63 characters
private static final String DOMAIN_LABEL_REGEX = "\\p{Alnum}(?>[\\p{Alnum}-]{0,61}\\p{Alnum})?";
// RFC2396 toplabel = alpha | alpha *( alphanum | "-" ) alphanum
// Max 63 characters
private static final String TOP_LABEL_REGEX = "\\p{Alpha}(?>[\\p{Alnum}-]{0,61}\\p{Alnum})?";
// RFC2396 hostname = *( domainlabel "." ) toplabel [ "." ]
// Note that the regex currently requires both a domain label and a top level label, whereas
// the RFC does not. This is because the regex is used to detect if a TLD is present.
// If the match fails, input is checked against DOMAIN_LABEL_REGEX (hostnameRegex)
// RFC1123 sec 2.1 allows hostnames to start with a digit
private static final String DOMAIN_NAME_REGEX =
"^(?:" + DOMAIN_LABEL_REGEX + "\\.)+" + "(" + TOP_LABEL_REGEX + ")\\.?$";
private final boolean allowLocal;
/**
* Singleton instance of this validator, which
* doesn't consider local addresses as valid.
*/
private static final DomainValidator DOMAIN_VALIDATOR = new DomainValidator(false);
/**
* Singleton instance of this validator, which does
* consider local addresses valid.
*/
private static final DomainValidator DOMAIN_VALIDATOR_WITH_LOCAL = new DomainValidator(true);
/**
* RegexValidator for matching domains.
*/
private final RegexValidator domainRegex =
new RegexValidator(DOMAIN_NAME_REGEX);
/**
* RegexValidator for matching a local hostname
*/
// RFC1123 sec 2.1 allows hostnames to start with a digit
private final RegexValidator hostnameRegex =
new RegexValidator(DOMAIN_LABEL_REGEX);
/**
* Returns the singleton instance of this validator. It
* will not consider local addresses as valid.
* @return the singleton instance of this validator
*/
public static DomainValidator getInstance() {
return DOMAIN_VALIDATOR;
}
/**
* Returns the singleton instance of this validator,
* with local validation as required.
* @param allowLocal Should local addresses be considered valid?
* @return the singleton instance of this validator
*/
public static DomainValidator getInstance(boolean allowLocal) {
if(allowLocal) {
return DOMAIN_VALIDATOR_WITH_LOCAL;
}
return DOMAIN_VALIDATOR;
}
/** Private constructor. */
private DomainValidator(boolean allowLocal) {
this.allowLocal = allowLocal;
}
/**
* Returns true if the specified String
parses
* as a valid domain name with a recognized top-level domain.
* The parsing is case-insensitive.
* @param domain the parameter to check for domain name syntax
* @return true if the parameter is a valid domain name
*/
public boolean isValid(String domain) {
if (domain == null) {
return false;
}
domain = unicodeToASCII(domain);
// hosts must be equally reachable via punycode and Unicode;
// Unicode is never shorter than punycode, so check punycode
// if domain did not convert, then it will be caught by ASCII
// checks in the regexes below
if (domain.length() > 253) {
return false;
}
String[] groups = domainRegex.match(domain);
if (groups != null && groups.length > 0) {
return isValidTld(groups[0]);
}
return allowLocal && hostnameRegex.isValid(domain);
}
// package protected for unit test access
// must agree with isValid() above
final boolean isValidDomainSyntax(String domain) {
if (domain == null) {
return false;
}
domain = unicodeToASCII(domain);
// hosts must be equally reachable via punycode and Unicode;
// Unicode is never shorter than punycode, so check punycode
// if domain did not convert, then it will be caught by ASCII
// checks in the regexes below
if (domain.length() > 253) {
return false;
}
String[] groups = domainRegex.match(domain);
return (groups != null && groups.length > 0)
|| hostnameRegex.isValid(domain);
}
/**
* Returns true if the specified String
matches any
* IANA-defined top-level domain. Leading dots are ignored if present.
* The search is case-insensitive.
* @param tld the parameter to check for TLD status, not null
* @return true if the parameter is a TLD
*/
public boolean isValidTld(String tld) {
tld = unicodeToASCII(tld);
if(allowLocal && isValidLocalTld(tld)) {
return true;
}
return isValidInfrastructureTld(tld)
|| isValidGenericTld(tld)
|| isValidCountryCodeTld(tld);
}
/**
* Returns true if the specified String
matches any
* IANA-defined infrastructure top-level domain. Leading dots are
* ignored if present. The search is case-insensitive.
* @param iTld the parameter to check for infrastructure TLD status, not null
* @return true if the parameter is an infrastructure TLD
*/
public boolean isValidInfrastructureTld(String iTld) {
iTld = unicodeToASCII(iTld);
return Arrays.binarySearch(INFRASTRUCTURE_TLDS, (chompLeadingDot(iTld.toLowerCase(Locale.ENGLISH)))) >= 0;
}
/**
* Returns true if the specified String
matches any
* IANA-defined generic top-level domain. Leading dots are ignored
* if present. The search is case-insensitive.
* @param gTld the parameter to check for generic TLD status, not null
* @return true if the parameter is a generic TLD
*/
public boolean isValidGenericTld(String gTld) {
gTld = unicodeToASCII(gTld);
return Arrays.binarySearch(GENERIC_TLDS, chompLeadingDot(gTld.toLowerCase(Locale.ENGLISH))) >= 0;
}
/**
* Returns true if the specified String
matches any
* IANA-defined country code top-level domain. Leading dots are
* ignored if present. The search is case-insensitive.
* @param ccTld the parameter to check for country code TLD status, not null
* @return true if the parameter is a country code TLD
*/
public boolean isValidCountryCodeTld(String ccTld) {
ccTld = unicodeToASCII(ccTld);
return Arrays.binarySearch(COUNTRY_CODE_TLDS, chompLeadingDot(ccTld.toLowerCase(Locale.ENGLISH))) >= 0;
}
/**
* Returns true if the specified String
matches any
* widely used "local" domains (localhost or localdomain). Leading dots are
* ignored if present. The search is case-insensitive.
* @param lTld the parameter to check for local TLD status, not null
* @return true if the parameter is an local TLD
*/
public boolean isValidLocalTld(String lTld) {
lTld = unicodeToASCII(lTld);
return Arrays.binarySearch(LOCAL_TLDS, chompLeadingDot(lTld.toLowerCase(Locale.ENGLISH))) >= 0;
}
private String chompLeadingDot(String str) {
if (str.startsWith(".")) {
return str.substring(1);
}
return str;
}
// ---------------------------------------------
// ----- TLDs defined by IANA
// ----- Authoritative and comprehensive list at:
// ----- http://data.iana.org/TLD/tlds-alpha-by-domain.txt
// Note that the above list is in UPPER case.
// The code currently converts strings to lower case (as per the tables below)
// IANA also provide an HTML list at http://www.iana.org/domains/root/db
// Note that this contains several country code entries which are NOT in
// the text file. These all have the "Not assigned" in the "Sponsoring Organisation" column
// For example (as of 2015-01-02):
// .bl country-code Not assigned
// .um country-code Not assigned
// WARNING: this array MUST be sorted, others it cannot be searched reliably using binary search
private static final String[] INFRASTRUCTURE_TLDS = new String[] {
"arpa", // internet infrastructure
};
// WARNING: this array MUST be sorted, others it cannot be searched reliably using binary search
private static final String[] GENERIC_TLDS = new String[] {
"abogado",
"academy",
"accountants",
"active",
"actor",
"adult",
"aero",
"agency",
"airforce",
"allfinanz",
"alsace",
"amsterdam",
"android",
"aquarelle",
"archi",
"army",
"arpa",
"asia",
"associates",
"attorney",
"auction",
"audio",
"autos",
"axa",
"band",
"bar",
"bargains",
"bayern",
"beer",
"berlin",
"best",
"bid",
"bike",
"bio",
"biz",
"black",
"blackfriday",
"bloomberg",
"blue",
"bmw",
"bnpparibas",
"boo",
"boutique",
"brussels",
"budapest",
"build",
"builders",
"business",
"buzz",
"bzh",
"cab",
"cal",
"camera",
"camp",
"cancerresearch",
"capetown",
"capital",
"caravan",
"cards",
"care",
"career",
"careers",
"cartier",
"casa",
"cash",
"cat",
"catering",
"center",
"ceo",
"cern",
"channel",
"cheap",
"christmas",
"chrome",
"church",
"citic",
"city",
"claims",
"cleaning",
"click",
"clinic",
"clothing",
"club",
"coach",
"codes",
"coffee",
"college",
"cologne",
"com",
"community",
"company",
"computer",
"condos",
"construction",
"consulting",
"contractors",
"cooking",
"cool",
"coop",
"country",
"credit",
"creditcard",
"cricket",
"crs",
"cruises",
"cuisinella",
"cymru",
"dad",
"dance",
"dating",
"day",
"deals",
"degree",
"delivery",
"democrat",
"dental",
"dentist",
"desi",
"dev",
"diamonds",
"diet",
"digital",
"direct",
"directory",
"discount",
"dnp",
"docs",
"domains",
"doosan",
"durban",
"dvag",
"eat",
"edu",
"education",
"email",
"emerck",
"energy",
"engineer",
"engineering",
"enterprises",
"equipment",
"esq",
"estate",
"eurovision",
"eus",
"events",
"everbank",
"exchange",
"expert",
"exposed",
"fail",
"farm",
"fashion",
"feedback",
"finance",
"financial",
"firmdale",
"fish",
"fishing",
"fitness",
"flights",
"florist",
"flowers",
"flsmidth",
"fly",
"foo",
"forsale",
"foundation",
"frl",
"frogans",
"fund",
"furniture",
"futbol",
"gal",
"gallery",
"garden",
"gbiz",
"gent",
"ggee",
"gift",
"gifts",
"gives",
"glass",
"gle",
"global",
"globo",
"gmail",
"gmo",
"gmx",
"google",
"gop",
"gov",
"graphics",
"gratis",
"green",
"gripe",
"guide",
"guitars",
"guru",
"hamburg",
"haus",
"healthcare",
"help",
"here",
"hiphop",
"hiv",
"holdings",
"holiday",
"homes",
"horse",
"host",
"hosting",
"house",
"how",
"ibm",
"immo",
"immobilien",
"industries",
"info",
"ing",
"ink",
"institute",
"insure",
"int",
"international",
"investments",
"irish",
"iwc",
"jetzt",
"jobs",
"joburg",
"juegos",
"kaufen",
"kim",
"kitchen",
"kiwi",
"koeln",
"krd",
"kred",
"lacaixa",
"land",
"latrobe",
"lawyer",
"lds",
"lease",
"legal",
"lgbt",
"lidl",
"life",
"lighting",
"limited",
"limo",
"link",
"loans",
"london",
"lotto",
"ltda",
"luxe",
"luxury",
"madrid",
"maison",
"management",
"mango",
"market",
"marketing",
"media",
"meet",
"melbourne",
"meme",
"memorial",
"menu",
"miami",
"mil",
"mini",
"mobi",
"moda",
"moe",
"monash",
"money",
"mormon",
"mortgage",
"moscow",
"motorcycles",
"mov",
"museum",
"nagoya",
"name",
"navy",
"net",
"network",
"neustar",
"new",
"nexus",
"ngo",
"nhk",
"ninja",
"nra",
"nrw",
"nyc",
"okinawa",
"ong",
"onl",
"ooo",
"org",
"organic",
"osaka",
"otsuka",
"ovh",
"paris",
"partners",
"parts",
"party",
"pharmacy",
"photo",
"photography",
"photos",
"physio",
"pics",
"pictures",
"pink",
"pizza",
"place",
"plumbing",
"pohl",
"poker",
"porn",
"post",
"praxi",
"press",
"pro",
"prod",
"productions",
"prof",
"properties",
"property",
"pub",
"qpon",
"quebec",
"realtor",
"recipes",
"red",
"rehab",
"reise",
"reisen",
"reit",
"ren",
"rentals",
"repair",
"report",
"republican",
"rest",
"restaurant",
"reviews",
"rich",
"rio",
"rip",
"rocks",
"rodeo",
"rsvp",
"ruhr",
"ryukyu",
"saarland",
"sale",
"samsung",
"sarl",
"sca",
"scb",
"schmidt",
"schule",
"schwarz",
"science",
"scot",
"services",
"sew",
"sexy",
"shiksha",
"shoes",
"shriram",
"singles",
"sky",
"social",
"software",
"sohu",
"solar",
"solutions",
"soy",
"space",
"spiegel",
"supplies",
"supply",
"support",
"surf",
"surgery",
"suzuki",
"sydney",
"systems",
"taipei",
"tatar",
"tattoo",
"tax",
"technology",
"tel",
"tienda",
"tips",
"tires",
"tirol",
"today",
"tokyo",
"tools",
"top",
"town",
"toys",
"trade",
"training",
"travel",
"trust",
"tui",
"university",
"uno",
"uol",
"vacations",
"vegas",
"ventures",
"versicherung",
"vet",
"viajes",
"video",
"villas",
"vision",
"vlaanderen",
"vodka",
"vote",
"voting",
"voto",
"voyage",
"wales",
"wang",
"watch",
"webcam",
"website",
"wed",
"wedding",
"whoswho",
"wien",
"wiki",
"williamhill",
"wme",
"work",
"works",
"world",
"wtc",
"wtf",
"xn--1qqw23a", // 佛山 Guangzhou YU Wei Information Technology Co., Ltd.
"xn--3bst00m", // 集团 Eagle Horizon Limited
"xn--3ds443g", // 在线 TLD REGISTRY LIMITED
"xn--45q11c", // 八卦 Zodiac Scorpio Limited
"xn--4gbrim", // موقع Suhub Electronic Establishment
"xn--55qw42g", // 公益 China Organizational Name Administration Center
"xn--55qx5d", // 公司 Computer Network Information Center of Chinese Academy of Sciences (China Internet Network Information Center)
"xn--6frz82g", // 移动 Afilias Limited
"xn--6qq986b3xl", // 我爱你 Tycoon Treasure Limited
"xn--80adxhks", // москва Foundation for Assistance for Internet Technologies and Infrastructure Development (FAITID)
"xn--80asehdb", // онлайн CORE Association
"xn--80aswg", // сайт CORE Association
"xn--c1avg", // орг Public Interest Registry
"xn--cg4bki", // 삼성 SAMSUNG SDS CO., LTD
"xn--czr694b", // 商标 HU YI GLOBAL INFORMATION RESOURCES(HOLDING) COMPANY.HONGKONG LIMITED
"xn--czrs0t", // 商店 Wild Island, LLC
"xn--czru2d", // 商城 Zodiac Aquarius Limited
"xn--d1acj3b", // дети The Foundation for Network Initiatives “The Smart Internet”
"xn--fiq228c5hs", // 中文网 TLD REGISTRY LIMITED
"xn--fiq64b", // 中信 CITIC Group Corporation
"xn--flw351e", // 谷歌 Charleston Road Registry Inc.
"xn--hxt814e", // 网店 Zodiac Libra Limited
"xn--i1b6b1a6a2e", // संगठन Public Interest Registry
"xn--io0a7i", // 网络 Computer Network Information Center of Chinese Academy of Sciences (China Internet Network Information Center)
"xn--kput3i", // 手机 Beijing RITT-Net Technology Development Co., Ltd
"xn--mgbab2bd", // بازار CORE Association
"xn--ngbc5azd", // شبكة International Domain Registry Pty. Ltd.
"xn--nqv7f", // 机构 Public Interest Registry
"xn--nqv7fs00ema", // 组织机构 Public Interest Registry
"xn--p1acf", // рус Rusnames Limited
"xn--q9jyb4c", // みんな Charleston Road Registry Inc.
"xn--qcka1pmc", // グーグル Charleston Road Registry Inc.
"xn--rhqv96g", // 世界 Stable Tone Limited
"xn--ses554g", // 网址 HU YI GLOBAL INFORMATION RESOURCES (HOLDING) COMPANY. HONGKONG LIMITED
"xn--unup4y", // 游戏 Spring Fields, LLC
"xn--vermgensberater-ctb", // vermögensberater Deutsche Vermögensberatung Aktiengesellschaft DVAG
"xn--vermgensberatung-pwb", // vermögensberatung Deutsche Vermögensberatung Aktiengesellschaft DVAG
"xn--vhquv", // 企业 Dash McCook, LLC
"xn--xhq521b", // 广东 Guangzhou YU Wei Information Technology Co., Ltd.
"xn--zfr164b", // 政务 China Organizational Name Administration Center
"xxx",
"xyz",
"yachts",
"yandex",
"yoga",
"yokohama",
"youtube",
"zip",
"zone",
"zuerich",
};
// WARNING: this array MUST be sorted, others it cannot be searched reliably using binary search
private static final String[] COUNTRY_CODE_TLDS = new String[] {
"ac", // Ascension Island
"ad", // Andorra
"ae", // United Arab Emirates
"af", // Afghanistan
"ag", // Antigua and Barbuda
"ai", // Anguilla
"al", // Albania
"am", // Armenia
"an", // Netherlands Antilles
"ao", // Angola
"aq", // Antarctica
"ar", // Argentina
"as", // American Samoa
"at", // Austria
"au", // Australia (includes Ashmore and Cartier Islands and Coral Sea Islands)
"aw", // Aruba
"ax", // Åland
"az", // Azerbaijan
"ba", // Bosnia and Herzegovina
"bb", // Barbados
"bd", // Bangladesh
"be", // Belgium
"bf", // Burkina Faso
"bg", // Bulgaria
"bh", // Bahrain
"bi", // Burundi
"bj", // Benin
"bm", // Bermuda
"bn", // Brunei Darussalam
"bo", // Bolivia
"br", // Brazil
"bs", // Bahamas
"bt", // Bhutan
"bv", // Bouvet Island
"bw", // Botswana
"by", // Belarus
"bz", // Belize
"ca", // Canada
"cc", // Cocos (Keeling) Islands
"cd", // Democratic Republic of the Congo (formerly Zaire)
"cf", // Central African Republic
"cg", // Republic of the Congo
"ch", // Switzerland
"ci", // Côte d'Ivoire
"ck", // Cook Islands
"cl", // Chile
"cm", // Cameroon
"cn", // China, mainland
"co", // Colombia
"cr", // Costa Rica
"cu", // Cuba
"cv", // Cape Verde
"cw", // Curaçao
"cx", // Christmas Island
"cy", // Cyprus
"cz", // Czech Republic
"de", // Germany
"dj", // Djibouti
"dk", // Denmark
"dm", // Dominica
"do", // Dominican Republic
"dz", // Algeria
"ec", // Ecuador
"ee", // Estonia
"eg", // Egypt
"er", // Eritrea
"es", // Spain
"et", // Ethiopia
"eu", // European Union
"fi", // Finland
"fj", // Fiji
"fk", // Falkland Islands
"fm", // Federated States of Micronesia
"fo", // Faroe Islands
"fr", // France
"ga", // Gabon
"gb", // Great Britain (United Kingdom)
"gd", // Grenada
"ge", // Georgia
"gf", // French Guiana
"gg", // Guernsey
"gh", // Ghana
"gi", // Gibraltar
"gl", // Greenland
"gm", // The Gambia
"gn", // Guinea
"gp", // Guadeloupe
"gq", // Equatorial Guinea
"gr", // Greece
"gs", // South Georgia and the South Sandwich Islands
"gt", // Guatemala
"gu", // Guam
"gw", // Guinea-Bissau
"gy", // Guyana
"hk", // Hong Kong
"hm", // Heard Island and McDonald Islands
"hn", // Honduras
"hr", // Croatia (Hrvatska)
"ht", // Haiti
"hu", // Hungary
"id", // Indonesia
"ie", // Ireland (Éire)
"il", // Israel
"im", // Isle of Man
"in", // India
"io", // British Indian Ocean Territory
"iq", // Iraq
"ir", // Iran
"is", // Iceland
"it", // Italy
"je", // Jersey
"jm", // Jamaica
"jo", // Jordan
"jp", // Japan
"ke", // Kenya
"kg", // Kyrgyzstan
"kh", // Cambodia (Khmer)
"ki", // Kiribati
"km", // Comoros
"kn", // Saint Kitts and Nevis
"kp", // North Korea
"kr", // South Korea
"kw", // Kuwait
"ky", // Cayman Islands
"kz", // Kazakhstan
"la", // Laos (currently being marketed as the official domain for Los Angeles)
"lb", // Lebanon
"lc", // Saint Lucia
"li", // Liechtenstein
"lk", // Sri Lanka
"lr", // Liberia
"ls", // Lesotho
"lt", // Lithuania
"lu", // Luxembourg
"lv", // Latvia
"ly", // Libya
"ma", // Morocco
"mc", // Monaco
"md", // Moldova
"me", // Montenegro
"mg", // Madagascar
"mh", // Marshall Islands
"mk", // Republic of Macedonia
"ml", // Mali
"mm", // Myanmar
"mn", // Mongolia
"mo", // Macau
"mp", // Northern Mariana Islands
"mq", // Martinique
"mr", // Mauritania
"ms", // Montserrat
"mt", // Malta
"mu", // Mauritius
"mv", // Maldives
"mw", // Malawi
"mx", // Mexico
"my", // Malaysia
"mz", // Mozambique
"na", // Namibia
"nc", // New Caledonia
"ne", // Niger
"nf", // Norfolk Island
"ng", // Nigeria
"ni", // Nicaragua
"nl", // Netherlands
"no", // Norway
"np", // Nepal
"nr", // Nauru
"nu", // Niue
"nz", // New Zealand
"om", // Oman
"pa", // Panama
"pe", // Peru
"pf", // French Polynesia With Clipperton Island
"pg", // Papua New Guinea
"ph", // Philippines
"pk", // Pakistan
"pl", // Poland
"pm", // Saint-Pierre and Miquelon
"pn", // Pitcairn Islands
"pr", // Puerto Rico
"ps", // Palestinian territories (PA-controlled West Bank and Gaza Strip)
"pt", // Portugal
"pw", // Palau
"py", // Paraguay
"qa", // Qatar
"re", // Réunion
"ro", // Romania
"rs", // Serbia
"ru", // Russia
"rw", // Rwanda
"sa", // Saudi Arabia
"sb", // Solomon Islands
"sc", // Seychelles
"sd", // Sudan
"se", // Sweden
"sg", // Singapore
"sh", // Saint Helena
"si", // Slovenia
"sj", // Svalbard and Jan Mayen Islands Not in use (Norwegian dependencies; see .no)
"sk", // Slovakia
"sl", // Sierra Leone
"sm", // San Marino
"sn", // Senegal
"so", // Somalia
"sr", // Suriname
"st", // São Tomé and Príncipe
"su", // Soviet Union (deprecated)
"sv", // El Salvador
"sx", // Sint Maarten
"sy", // Syria
"sz", // Swaziland
"tc", // Turks and Caicos Islands
"td", // Chad
"tf", // French Southern and Antarctic Lands
"tg", // Togo
"th", // Thailand
"tj", // Tajikistan
"tk", // Tokelau
"tl", // East Timor (deprecated old code)
"tm", // Turkmenistan
"tn", // Tunisia
"to", // Tonga
"tp", // East Timor
"tr", // Turkey
"tt", // Trinidad and Tobago
"tv", // Tuvalu
"tw", // Taiwan, Republic of China
"tz", // Tanzania
"ua", // Ukraine
"ug", // Uganda
"uk", // United Kingdom
"us", // United States of America
"uy", // Uruguay
"uz", // Uzbekistan
"va", // Vatican City State
"vc", // Saint Vincent and the Grenadines
"ve", // Venezuela
"vg", // British Virgin Islands
"vi", // U.S. Virgin Islands
"vn", // Vietnam
"vu", // Vanuatu
"wf", // Wallis and Futuna
"ws", // Samoa (formerly Western Samoa)
"xn--3e0b707e", // 한국 KISA (Korea Internet & Security Agency)
"xn--45brj9c", // ভারত National Internet Exchange of India
"xn--80ao21a", // қаз Association of IT Companies of Kazakhstan
"xn--90a3ac", // срб Serbian National Internet Domain Registry (RNIDS)
"xn--clchc0ea0b2g2a9gcd", // சிங்கப்பூர் Singapore Network Information Centre (SGNIC) Pte Ltd
"xn--d1alf", // мкд Macedonian Academic Research Network Skopje
"xn--fiqs8s", // 中国 China Internet Network Information Center
"xn--fiqz9s", // 中國 China Internet Network Information Center
"xn--fpcrj9c3d", // భారత్ National Internet Exchange of India
"xn--fzc2c9e2c", // ලංකා LK Domain Registry
"xn--gecrj9c", // ભારત National Internet Exchange of India
"xn--h2brj9c", // भारत National Internet Exchange of India
"xn--j1amh", // укр Ukrainian Network Information Centre (UANIC), Inc.
"xn--j6w193g", // 香港 Hong Kong Internet Registration Corporation Ltd.
"xn--kprw13d", // 台湾 Taiwan Network Information Center (TWNIC)
"xn--kpry57d", // 台灣 Taiwan Network Information Center (TWNIC)
"xn--l1acc", // мон Datacom Co.,Ltd
"xn--lgbbat1ad8j", // الجزائر CERIST
"xn--mgb9awbf", // عمان Telecommunications Regulatory Authority (TRA)
"xn--mgba3a4f16a", // ایران Institute for Research in Fundamental Sciences (IPM)
"xn--mgbaam7a8h", // امارات Telecommunications Regulatory Authority (TRA)
"xn--mgbayh7gpa", // الاردن National Information Technology Center (NITC)
"xn--mgbbh1a71e", // بھارت National Internet Exchange of India
"xn--mgbc0a9azcg", // المغرب Agence Nationale de Réglementation des Télécommunications (ANRT)
"xn--mgberp4a5d4ar", // السعودية Communications and Information Technology Commission
"xn--mgbx4cd0ab", // مليسيا MYNIC Berhad
"xn--node", // გე Information Technologies Development Center (ITDC)
"xn--o3cw4h", // ไทย Thai Network Information Center Foundation
"xn--ogbpf8fl", // سورية National Agency for Network Services (NANS)
"xn--p1ai", // рф Coordination Center for TLD RU
"xn--pgbs0dh", // تونس Agence Tunisienne d'Internet
"xn--s9brj9c", // ਭਾਰਤ National Internet Exchange of India
"xn--wgbh1c", // مصر National Telecommunication Regulatory Authority - NTRA
"xn--wgbl6a", // قطر Communications Regulatory Authority
"xn--xkc2al3hye2a", // இலங்கை LK Domain Registry
"xn--xkc2dl3a5ee0h", // இந்தியா National Internet Exchange of India
"xn--yfro4i67o", // 新加坡 Singapore Network Information Centre (SGNIC) Pte Ltd
"xn--ygbi2ammx", // فلسطين Ministry of Telecom & Information Technology (MTIT)
"ye", // Yemen
"yt", // Mayotte
"za", // South Africa
"zm", // Zambia
"zw", // Zimbabwe
};
// WARNING: this array MUST be sorted, others it cannot be searched reliably using binary search
private static final String[] LOCAL_TLDS = new String[] {
"localdomain", // Also widely used as localhost.localdomain
"localhost", // RFC2606 defined
};
/**
* Converts potentially Unicode input to punycode.
* If conversion fails, returns the original input.
*
* @param input the string to convert, not null
* @return converted input, or original input if conversion fails
*/
// Needed by UrlValidator
static String unicodeToASCII(String input) {
try {
return /* java.net.IDN. */ toASCII(input);
} catch (IllegalArgumentException e) { // input is not valid
return input;
}
}
// ================= Code needed for Java 1.4 and 1.5 compatibility ===============
private static class IDNHolder {
private static Method getMethod() {
try {
Class clazz = Class.forName("java.net.IDN", false, DomainValidator.class.getClassLoader());
return clazz.getDeclaredMethod("toASCII", new Class[]{String.class});
} catch (Exception e) {
return null;
}
}
private static final Method JAVA_NET_IDN_TO_ASCII = getMethod();
}
/*
* Helper method to invoke java.net.IDN.toAscii(String).
* Allows code to be compiled with Java 1.4 and 1.5
* @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification
*/
private static final String toASCII(String line) throws IllegalArgumentException {
// java.net.IDN.toASCII(line); // Java 1.6+
// implementation for Java 1.4 and 1.5
// effectively this is done by IDN.toASCII but we want to skip the entire call
if (isOnlyASCII(line)) {
return line;
}
Method m = IDNHolder.JAVA_NET_IDN_TO_ASCII;
if (m == null) { // avoid NPE
return line;
}
try {
return (String) m.invoke(null, new String[]{line.toLowerCase(Locale.ENGLISH)});
} catch (IllegalAccessException e) {
throw new RuntimeException(e); // Should not happen
} catch (InvocationTargetException e) {
Throwable t = e.getCause();
if (t instanceof IllegalArgumentException) { // this is expected from toASCII method
throw (IllegalArgumentException) t;
}
throw new RuntimeException(e); // Should not happen
}
}
/*
* Check if input contains only ASCII
* Treats null as all ASCII
*/
private static boolean isOnlyASCII(String input) {
if (input == null) {
return true;
}
for(int i=0; i < input.length(); i++) {
if (input.charAt(i) > 0x7F) {
return false;
}
}
return true;
}
}