All Downloads are FREE. Search and download functionalities are using the official Maven repository.

pl.edu.icm.unity.stdext.utils.GenerateAdditionalValidTLDsList Maven / Gradle / Ivy

Go to download

Standard plugins which are distributed with the system: attribute syntaxes, identity types, credentials

There is a newer version: 4.0.2
Show newest version
/*
 * Copyright (c) 2020 Bixbit - Krzysztof Benedyczak. All rights reserved.
 * See LICENCE.txt file for licensing information.
 */
package pl.edu.icm.unity.stdext.utils;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

import org.apache.commons.io.IOUtils;
import org.apache.commons.validator.routines.DomainValidator;
import org.apache.commons.validator.routines.DomainValidator.ArrayType;

/**
 * Generates a list of valid TLDs which are not available for apache commons EmailValidator. 
 * See {@link EmailUtils}. Can be run from time to time and results put there, so that we properly validate all real TLDs.
 */
public class GenerateAdditionalValidTLDsList
{
	private static final String IANA_REGISTRY = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt";
	
	public static void main(String[] args) throws Exception
	{
		List allTlds = readDomains();
		Set known = getKnown();
		List missing = allTlds.stream()
				.map(String::toLowerCase)
				.filter(s -> !known.contains(s))
				.sorted()
				.collect(Collectors.toList());
		System.out.println(missing.stream().collect(Collectors.joining("\",\n\"", "{\"", "\"};")));
	}

	private static List readDomains() throws IOException
	{
		URL url = new URL(IANA_REGISTRY);
		InputStream is = url.openStream();
		List ret = IOUtils.readLines(is, StandardCharsets.US_ASCII);
		ret.remove(0); //comment line
		return ret;
	}
	
	private static Set getKnown()
	{
		Set allKnown = new HashSet<>();
		Collections.addAll(allKnown, DomainValidator.getTLDEntries(ArrayType.GENERIC_RO));
		Collections.addAll(allKnown, DomainValidator.getTLDEntries(ArrayType.COUNTRY_CODE_RO));
		Collections.addAll(allKnown, DomainValidator.getTLDEntries(ArrayType.INFRASTRUCTURE_RO));
		return allKnown;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy