All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.kiwiproject.net.KiwiUrls Maven / Gradle / Ivy

Go to download

Kiwi is a utility library. We really like Google's Guava, and also use Apache Commons. But if they don't have something we need, and we think it is useful, this is where we put it.

There is a newer version: 4.5.2
Show newest version
package org.kiwiproject.net;

import static java.util.Objects.isNull;
import static java.util.Objects.nonNull;
import static java.util.stream.Collectors.joining;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toMap;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.kiwiproject.base.KiwiStrings.f;
import static org.kiwiproject.collect.KiwiLists.first;
import static org.kiwiproject.collect.KiwiMaps.isNullOrEmpty;

import com.google.common.base.Splitter;
import lombok.Builder;
import lombok.Getter;
import lombok.experimental.UtilityClass;
import org.apache.commons.lang3.StringUtils;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.regex.Pattern;

/**
 * Static utilities for creating URLs
 */
@UtilityClass
public class KiwiUrls {

    public static final int UNKNOWN_PORT = -1;

    public static final String FTP_PROTOCOL = "ftp";
    public static final String HTTP_PROTOCOL = "http";
    public static final String HTTPS_PROTOCOL = "https";
    public static final String SFTP_PROTOCOL = "sftp";

    /**
     * FYI, this "?" section in the regular expression below allows us to create "named groups" for pattern
     * matching. This means instead of having to determine what the index of a given group is (can you identify what
     * the index of port is quickly?), we can just reference the name.
     * 

* Look at the (lengthy) preamble in {@link java.util.regex.Pattern} for more information. */ private static final Pattern URL_PATTERN = Pattern.compile( "((?[a-z]+)://)?(?(?[a-z0-9-]+)(\\.(?[a-z0-9.-]+))?)(:(?[0-9]+))?(?/.*)?", Pattern.CASE_INSENSITIVE ); public static final String SCHEME_GROUP = "scheme"; public static final String CANONICAL_GROUP = "canonical"; public static final String SUB_DOMAIN_GROUP = "subdomain"; public static final String DOMAIN_GROUP = "domain"; public static final String PORT_GROUP = "port"; public static final String PATH_GROUP = "path"; public static final Splitter COMMA_SPLITTER = Splitter.on(',').omitEmptyStrings().trimResults(); /** * Create a well-formed URL (String) from the given protocol, hostname, and port. * * @param protocol the protocol * @param hostname the host name * @param port the port * @return the URL as a {@link String} object */ public static String createUrl(String protocol, String hostname, int port) { return createUrlObject(protocol, hostname, port).toString(); } /** * Create a well-formed URL from the given protocol, hostname, and port. * * @param protocol the protocol * @param hostname the host name * @param port the port * @return the URL as a {@link URL} object */ public static URL createUrlObject(String protocol, String hostname, int port) { return createUrlObject(protocol, hostname, port, ""); } /** * Create a well-formed URL (String) from the given protocol, hostname, port and path. * * @param protocol the protocol * @param hostname the host name * @param port the port * @param path the path * @return the URL as a {@link String} object */ public static String createUrl(String protocol, String hostname, int port, String path) { return createUrlObject(protocol, hostname, port, path).toString(); } /** * Create a well-formed URL from the given protocol, hostname, port, and path. * * @param protocol the protocol * @param hostname the host name * @param port the port * @param path the path * @return the URL as a {@link URL} object */ public static URL createUrlObject(String protocol, String hostname, int port, String path) { try { var pathWithLeadingSlash = StringUtils.isBlank(path) ? "" : prependLeadingSlash(path); return new URL(protocol, hostname, port, pathWithLeadingSlash); } catch (MalformedURLException e) { var message = f("Error constructing URL from protocol [%s], hostname [%s], port [%s], path [%s]", protocol, hostname, port, path); throw new UncheckedMalformedURLException(message, e); } } /** * Wrapper around URL's constructor which throws a checked {@link MalformedURLException}. This instead assumes * the given {@code urlSpec} is valid and throws {@link UncheckedMalformedURLException} in case it is actually * not valid. * * @param urlSpec the String to parse as a URL * @return a new {@link URL} instance * @throws UncheckedMalformedURLException that wraps a {@link MalformedURLException} if any error occurs */ public static URL createUrlObject(String urlSpec) { try { return new URL(urlSpec); } catch (MalformedURLException e) { throw new UncheckedMalformedURLException(e); } } /** * Create a well-formed URL string from the given {@code schemeHostPort} and zero or more path components. * * @param schemeHostPort a string containing the scheme, host, and port parts, e.g. http://acme.com:8080 * @param pathComponents zero or more path components to append * @return the constructed URL as a {@link String} */ public static String createUrl(String schemeHostPort, String... pathComponents) { return createUrlObject(schemeHostPort, pathComponents).toString(); } /** * Create a well-formed URL from the given {@code schemeHostPort} and zero or more path components. * * @param schemeHostPort a string containing the scheme, host, and port parts, e.g. http://acme.com:8080 * @param pathComponents zero or more path components to append * @return the constructed URL as a {@link URL} */ public static URL createUrlObject(String schemeHostPort, String... pathComponents) { var rawBaseUri = URI.create(schemeHostPort); if (pathComponents.length == 0) { return toURL(rawBaseUri); } var baseUri = stripTrailingSlash(rawBaseUri.toString()); var path = Paths.get("/", pathComponents).toString(); var fullUrlString = baseUri + path; return toURL(URI.create(fullUrlString)); } /** * Tries to convert the given {@code uri} into a {@link URL}, throwing an unchecked exception if the conversion * fails. The thrown unchecked exception wraps the original checked {@link MalformedURLException}. * * @param uri the URI to convert * @return a {@link URL} instance * @throws UncheckedMalformedURLException if conversion from {@link URI} to {@link URL} fails * @see URI#toURL() */ public static URL toURL(URI uri) { try { return uri.toURL(); } catch (MalformedURLException e) { var message = "Error creating URL from: " + uri.toString(); throw new UncheckedMalformedURLException(message, e); } } /** * Trims {@code path} and, if a leading slash is not present, adds it. * * @param path a path * @return a new String with a leading slash */ public static String prependLeadingSlash(String path) { var trimmedPath = path.trim(); if (trimmedPath.charAt(0) == '/') { return trimmedPath; } return '/' + trimmedPath; } /** * Create a well-formed HTTP URL (String) from the given hostname and port. * * @param hostname the host name * @param port the port * @return a URL as a {@link String} */ public static String createHttpUrl(String hostname, int port) { return createHttpUrlObject(hostname, port).toString(); } /** * Create a well-formed HTTP URL from the given hostname and port. * * @param hostname the host name * @param port the port * @return a URL as a {@link URL} */ public static URL createHttpUrlObject(String hostname, int port) { return createUrlObject(HTTP_PROTOCOL, hostname, port); } /** * Create a well-formed HTTP URL (String) from the given hostname, port and path. * * @param hostname the host name * @param port the port * @param path the path * @return a URL as a {@link String} */ public static String createHttpUrl(String hostname, int port, String path) { return createHttpUrlObject(hostname, port, path).toString(); } /** * Create a well-formed HTTP URL from the given hostname, port and path. * * @param hostname the host name * @param port the port * @param path the path * @return a URL as a {@link URL} */ public static URL createHttpUrlObject(String hostname, int port, String path) { return createUrlObject(HTTP_PROTOCOL, hostname, port, path); } /** * Create a well-formed HTTPS URL (String) from the given hostname and port. * * @param hostname the host name * @param port the port * @return a URL as a {@link String} */ public static String createHttpsUrl(String hostname, int port) { return createHttpsUrlObject(hostname, port).toString(); } /** * Create a well-formed HTTPS URL from the given hostname and port. * * @param hostname the host name * @param port the port * @return a URL as a {@link URL} */ public static URL createHttpsUrlObject(String hostname, int port) { return createUrlObject(HTTPS_PROTOCOL, hostname, port); } /** * Create a well-formed HTTPS URL (String) from the given hostname, port and path. * * @param hostname the host name * @param port the port * @param path the path * @return a URL as a {@link String} */ public static String createHttpsUrl(String hostname, int port, String path) { return createHttpsUrlObject(hostname, port, path).toString(); } /** * Create a well-formed HTTPS URL from the given hostname, port and path. * * @param hostname the host name * @param port the port * @param path the path * @return a URL as a {@link URL} */ public static URL createHttpsUrlObject(String hostname, int port, String path) { return createUrlObject(HTTPS_PROTOCOL, hostname, port, path); } /** * Extract all of the relevant sections from the given {@code uri}. *

* As an example, if given https://news.bbc.co.uk:8080/a-news-article" this would return the following: *

    *
  • scheme = "https"
  • *
  • subDomainName = "news"
  • *
  • domainName = "bbc.co.uk"
  • *
  • canonicalName = "news.bbc.co.uk"
  • *
  • port = 8080
  • *
  • path = "a-news-article"
  • *
* * @param url the URL to analyze * @return the {@link Components} found or an empty {@link Components} object if the URL was invalid * @implNote This method does not check if the URL is valid or not. */ public static Components extractAllFrom(String url) { var matcher = URL_PATTERN.matcher(url); if (matcher.matches()) { var portString = matcher.group(PORT_GROUP); var scheme = matcher.group(SCHEME_GROUP); int port; if (isBlank(portString)) { port = defaultPortForScheme(scheme); } else { port = Integer.parseInt(portString); } var components = Components.builder() .scheme(scheme) .subDomainName(matcher.group(SUB_DOMAIN_GROUP)) .domainName(matcher.group(DOMAIN_GROUP)) .canonicalName(matcher.group(CANONICAL_GROUP)) .path(matcher.group(PATH_GROUP)) .build(); if (port > 0) { components = components.toBuilder() .port(port) .build(); } return components; } return Components.builder().build(); } /** * Trims {@code url} and, if present, strips the trailing slash * * @param url the URL * @return the URL minus any trailing slash */ public static String stripTrailingSlash(String url) { var trimmedUrl = url.trim(); if (trimmedUrl.endsWith("/")) { return trimmedUrl.substring(0, trimmedUrl.length() - 1); } return trimmedUrl; } /** * Trims each URL in {@code urls} and strips any trailing slashes * * @param urls a list of URLs * @return a list of URLs matching the input URLs minus any trailing slash */ public static List stripTrailingSlashes(List urls) { return urls.stream().map(KiwiUrls::stripTrailingSlash).collect(toList()); } /** * Extracts the canonical server name from a given URL. *

* As an example, if given "https://news.bbc.co.uk:8080/a-news-article" this method would return * "news.bbc.co.uk" *

* * @param url the URL to evaluate * @return an {@link Optional} containing the canonical server name or {@link Optional#empty()} if it could not * be found. * @implNote This method does not check if the URL is valid or not. Also, if you will need to extract more than * one section of the URL, you should instead use {@link KiwiUrls#extractAllFrom(String)}. */ public static Optional extractCanonicalNameFrom(String url) { return findGroupInUrl(url, CANONICAL_GROUP); } /** * Extracts the server's domain name from a given URL. *

* As an example, if given "https://news.bbc.co.uk:8080/a-news-article" this method would return * "bbc.co.uk" *

* * @param url the URL to evaluate * @return an {@link Optional} containing the server's domain name or {@link Optional#empty()} if it could not * be found. * @implNote This method does not check if the URL is valid or not. Also, if you will need to extract more than * one section of the URL, you should instead use {@link KiwiUrls#extractAllFrom(String)}. */ public static Optional extractDomainNameFrom(String url) { return findGroupInUrl(url, DOMAIN_GROUP); } /** * Extracts the path from a given URL. *

* As an example, if given "https://news.bbc.co.uk:8080/a-news-article" this method would return * "a-news-article" *

* * @param url the URL to evaluate * @return an {@link Optional} containing the path or {@link Optional#empty()} if it could not * be found. * @implNote This method does not check if the URL is valid or not. Also, if you will need to extract more than * one section of the URL, you should instead use {@link KiwiUrls#extractAllFrom(String)}. */ public static Optional extractPathFrom(String url) { var result = findGroupInUrl(url, PATH_GROUP); if (result.isPresent()) { var value = result.get(); if ("/".equals(value)) { return Optional.empty(); } return result; } return Optional.empty(); } /** * Extracts the port from a given URL. *

* As an example, if given "https://news.bbc.co.uk:8080/a-news-article" this method would return * "8080" (represented by an int). *

* * @param url the URL to evaluate * @return an {@link Optional} containing the port or {@link Optional#empty()} if it could not * be found. * @implNote This method does not check if the URL is valid or not. Also, if you will need to extract more than * one section of the URL, you should instead use {@link KiwiUrls#extractAllFrom(String)}. */ public static OptionalInt extractPortFrom(String url) { var optionalPort = findGroupInUrl(url, PORT_GROUP); var port = optionalPort.map(Integer::parseInt) .orElseGet(() -> extractSchemeFrom(url).map(KiwiUrls::defaultPortForScheme).orElse(UNKNOWN_PORT)); if (port > 0) { return OptionalInt.of(port); } return OptionalInt.empty(); } /** * Extracts the scheme from a given URL. *

* As an example, if given "https://news.bbc.co.uk:8080/a-news-article" this method would return * "https" *

* * @param url the URL to evaluate * @return an {@link Optional} containing the canonical server name or {@link Optional#empty()} if it could not * be found. * @implNote This method does not check if the URL is valid or not. Also, if you will need to extract more than * one section of the URL, you should instead use {@link KiwiUrls#extractAllFrom(String)}. */ public static Optional extractSchemeFrom(String url) { return findGroupInUrl(url, SCHEME_GROUP); } /** * Extracts the simple server name from a given URL. *

* As an example, if given "https://news.bbc.co.uk:8080/a-news-article" this method would return * "news" *

* * @param url the URL to evaluate * @return an {@link Optional} containing the simple server name or {@link Optional#empty()} if it could not * be found. * @implNote This method does not check if the URL is valid or not. Also, if you will need to extract more than * one section of the URL, you should instead use {@link KiwiUrls#extractAllFrom(String)}. */ public static Optional extractSubDomainNameFrom(String url) { return findGroupInUrl(url, SUB_DOMAIN_GROUP); } /** * Searches the {@code commaDelimitedUrls} for its domains, and if found, replaces all entries with * {@code replacmentDomain}. The {@code commaDelimitedUrls} can be a standalone URL. * * @param commaDelimitedUrls the comma delimited URLs to search * @param replacementDomain the domain to replace if found * @return the updated comma-delimited URLs if a domain is found, otherwise {@code commaDelimitedUrls} unchanged * @implNote This method assumes that the domains are the same for all URLs in the {@code commaDelimitedUrls}; it only * checks the first URL to obtain the domain. */ public static String replaceDomainsIn(String commaDelimitedUrls, String replacementDomain) { var urls = COMMA_SPLITTER.splitToList(commaDelimitedUrls); var originalDomain = domainOrNull(first(urls)); if (isNull(originalDomain)) { return commaDelimitedUrls; } return urls.stream() .map(KiwiUrls::createUrlObject) .map(url -> replaceDomain(url, originalDomain, replacementDomain)) .collect(joining(",")); } private static String domainOrNull(String url) { var matcher = URL_PATTERN.matcher(url); if (matcher.matches()) { var oldDomain = matcher.group(DOMAIN_GROUP); if (nonNull(oldDomain)) { return oldDomain; } } return null; } /** * Replace {@code expectedDomain} in {@code url} with {@code replacementDomain} if and only if {@code url}'s domain * is equal to {@code expectedDomain}. If the given {@code url} has no domain, or is not the expected domain, b * lenient and just return the original URL. */ private static String replaceDomain(URL url, String expectedDomain, String replacementDomain) { var host = url.getHost(); var index = host.indexOf('.'); if (index < 0) { return url.toString(); } var actualDomain = host.substring(index + 1); if (!actualDomain.equals(expectedDomain)) { return url.toString(); } var hostMinusDomain = host.substring(0, index); var newHost = hostMinusDomain + '.' + replacementDomain; var port = url.getPort() == -1 ? "" : (":" + url.getPort()); var path = isBlank(url.getPath()) ? "" : url.getPath(); return url.getProtocol() + "://" + newHost + port + path; } private static int defaultPortForScheme(String scheme) { switch (scheme) { case HTTPS_PROTOCOL: return 443; case HTTP_PROTOCOL: return 80; case SFTP_PROTOCOL: return 22; case FTP_PROTOCOL: return 21; default: return UNKNOWN_PORT; } } /** * Converts a query string (comprised of key/value pairs separated by '&' characters) into a Map of String of * Strings * * @param queryString the query string to create the map from * @return a map of the query params * @see #toQueryString(Map) for inverse */ public static Map queryStringToMap(String queryString) { if (isBlank(queryString)) { return new HashMap<>(); } return Arrays.stream(queryString.split("&")) .map(keyValue -> keyValue.split("=")) .collect(toMap(splat -> splat[0], splat -> splat[1])); } /** * Converts a Map of String of Strings into one (potentially long) string of key/value parameters (each key/value * parameter is separated by an '=' character), with each parameter pair separated by an '&' character. * * @param parameters the map of the parameters to create the query string from * @return a concatenated query string * @see #queryStringToMap(String) for inverse */ public static String toQueryString(Map parameters) { if (isNullOrEmpty(parameters)) { return ""; } return parameters.entrySet().stream() .map(entry -> entry.getKey() + "=" + entry.getValue()) .collect(joining("&")); } private static Optional findGroupInUrl(String url, String matchGroup) { var matcher = URL_PATTERN.matcher(url); if (matcher.matches()) { return Optional.ofNullable(matcher.group(matchGroup)); } return Optional.empty(); } /** * A simple value class to hold the various parts of the URL. */ @Getter @Builder(toBuilder = true) public static final class Components { private final String scheme; private final String subDomainName; private final String domainName; private final String canonicalName; private final Integer port; private final String path; public Optional getPort() { return Optional.ofNullable(port); } public Optional getPath() { if ("/".equals(path)) { return Optional.empty(); } return Optional.ofNullable(path); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy