All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.common.net.InternetDomainName Maven / Gradle / Ivy

Go to download

Guava is a suite of core and expanded libraries that include utility classes, google's collections, io classes, and much much more. This project is a complete packaging of all the Guava libraries into a single jar. Individual portions of Guava can be used by downloading the appropriate module and its dependencies. Guava (complete) has only one code dependency - javax.annotation, per the JSR-305 spec.

There is a newer version: 33.2.0-jre
Show newest version
/*
 * Copyright (C) 2009 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.common.net;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;

import com.google.common.annotations.Beta;
import com.google.common.annotations.GwtCompatible;
import com.google.common.base.CharMatcher;
import com.google.common.base.Joiner;
import com.google.common.base.Objects;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;

import java.util.List;

import javax.annotation.Nullable;

/**
 * An immutable well-formed internet domain name, as defined by
 * RFC 1035, with the
 * exception that names ending in {@code "."} are not supported (as they are not
 * generally used in browsers, email, and other end-user applications. Examples
 * include {@code com} and {@code foo.co.uk}. Only syntactic analysis is
 * performed; no DNS lookups or other network interactions take place. Thus
 * there is no guarantee that the domain actually exists on the internet.
 * Invalid domain names throw {@link IllegalArgumentException} on construction.
 *
 * 

It is often the case that domains of interest are those under a * {@linkplain #isPublicSuffix() public suffix} but not themselves a public * suffix; {@link #hasPublicSuffix()} and {@link #isTopPrivateDomain()} test for * this. Similarly, one often needs to obtain the domain consisting of the * public suffix plus one subdomain level, typically to obtain the highest-level * domain for which cookies may be set. Use {@link #topPrivateDomain()} for this * purpose. * *

{@linkplain #equals(Object) Equality} of domain names is case-insensitive, * so for convenience, the {@link #name()} and {@link #parts()} methods return * the lowercase form of the name. * *

* internationalized domain names (IDN) such as {@code 网络.cn} are * supported. * * @author Craig Berry * @since 5 */ @Beta @GwtCompatible public final class InternetDomainName { private static final Splitter DOT_SPLITTER = Splitter.on('.'); private static final Joiner DOT_JOINER = Joiner.on('.'); /** * Value of {@link #publicSuffixIndex} which indicates that no public suffix * was found. */ private static final int NO_PUBLIC_SUFFIX_FOUND = -1; private static final String DOT_REGEX = "\\."; /** * The full domain name, converted to lower case. */ private final String name; /** * The parts of the domain name, converted to lower case. */ private final ImmutableList parts; /** * The index in the {@link #parts()} list at which the public suffix begins. * For example, for the domain name {@code www.google.co.uk}, the value would * be 2 (the index of the {@code co} part). The value is negative * (specifically, {@link #NO_PUBLIC_SUFFIX_FOUND}) if no public suffix was * found. */ private final int publicSuffixIndex; /** * Private constructor used to implement {@link #from(String)}. */ private InternetDomainName(String name) { this.name = name; this.parts = ImmutableList.copyOf(DOT_SPLITTER.split(name)); checkArgument(validateSyntax(parts), "Not a valid domain name: '%s'", name); this.publicSuffixIndex = findPublicSuffix(); } /** * Private constructor used to implement {@link #ancestor(int)}. Argument * parts are assumed to be valid, as they always come from an existing domain. */ private InternetDomainName(List parts) { checkArgument(!parts.isEmpty()); this.parts = ImmutableList.copyOf(parts); this.name = DOT_JOINER.join(parts); this.publicSuffixIndex = findPublicSuffix(); } /** * Returns the index of the leftmost part of the public suffix, or -1 if not * found. */ private int findPublicSuffix() { final int partsSize = parts.size(); for (int i = 0; i < partsSize; i++) { String ancestorName = DOT_JOINER.join(parts.subList(i, partsSize)); if (isPublicSuffixInternal(ancestorName)) { return i; } } return NO_PUBLIC_SUFFIX_FOUND; } /** * A factory method for creating {@code InternetDomainName} objects. * * @param domain A domain name (not IP address) * @throws IllegalArgumentException If name is not syntactically valid */ public static InternetDomainName from(String domain) { // RFC 1035 defines domain names to be case-insensitive; normalizing // to lower case allows us to simplify matching. return new InternetDomainName(domain.toLowerCase()); } // TODO: For the moment, we validate that all parts of a domain // * Start and end with an alphanumeric character // * Have alphanumeric, dash, or underscore characters internally // An additional constraint is that the first character of the last part // may not be numeric. // All of this is a compromise to allow relatively accurate and efficient // checking. We may soon move to using java.net.IDN for this purpose in // non-GWT code. /** * Validation method used by {@from} to ensure that the domain name is * syntactically valid according to RFC 1035. * * @return Is the domain name syntactically valid? */ private static boolean validateSyntax(List parts) { final int lastIndex = parts.size() - 1; // Validate the last part specially, as it has different syntax rules. if (!validatePart(parts.get(lastIndex), true)) { return false; } for (int i = 0; i < lastIndex; i++) { String part = parts.get(i); if (!validatePart(part, false)) { return false; } } return true; } /** * The maximum size of a single part of a domain name. */ private static final int MAX_DOMAIN_PART_LENGTH = 63; private static final CharMatcher DASH_MATCHER = CharMatcher.anyOf("-_"); private static final CharMatcher PART_CHAR_MATCHER = CharMatcher.JAVA_LETTER_OR_DIGIT.or(DASH_MATCHER); /** * Helper method for {@link #validateSyntax(List)}. Validates that one part of * a domain name is valid. * * @param part The domain name part to be validated * @param isFinalPart Is this the final (rightmost) domain part? * @return Whether the part is valid */ private static boolean validatePart(String part, boolean isFinalPart) { // These tests could be collapsed into one big boolean expression, but // they have been left as independent tests for clarity. if (part.length() < 1 || part.length() > MAX_DOMAIN_PART_LENGTH) { return false; } // GWT claims to support java.lang.Character's char-classification // methods, but it actually only works for ASCII. So for now, // assume anything with non-ASCII characters is valid. // The only place this seems to be documented is here: // http://osdir.com/ml/GoogleWebToolkitContributors/2010-03/msg00178.html if (!CharMatcher.ASCII.matchesAllOf(part)) { return true; } if (!PART_CHAR_MATCHER.matchesAllOf(part)) { return false; } if (DASH_MATCHER.matches(part.charAt(0)) || DASH_MATCHER.matches(part.charAt(part.length() - 1))) { return false; } if (isFinalPart && CharMatcher.DIGIT.matches(part.charAt(0))) { return false; } return true; } /** * Returns the domain name, normalized to all lower case. */ public String name() { return name; } /** * Returns the individual components of this domain name, normalized to all * lower case. For example, for the domain name {@code mail.google.com}, this * method returns the list {@code ["mail", "google", "com"]}. */ public ImmutableList parts() { return parts; } /** * Old location of {@link #isPublicSuffix()}. * * @deprecated use {@link #isPublicSuffix()} */ @Deprecated public boolean isRecognizedTld() { return isPublicSuffix(); } /** * Old location of {@link #isUnderPublicSuffix()}. * * @deprecated use {@link #isUnderPublicSuffix()} */ @Deprecated public boolean isUnderRecognizedTld() { return isUnderPublicSuffix(); } /** * Old location of {@link #hasPublicSuffix()}. * * @deprecated use {@link #hasPublicSuffix()} */ @Deprecated public boolean hasRecognizedTld() { return hasPublicSuffix(); } /** * Old location of {@link #publicSuffix()}. * * @deprecated use {@link #publicSuffix()} */ @Deprecated public InternetDomainName recognizedTld() { return publicSuffix(); } /** * Old location of {@link #isTopPrivateDomain()}. * * @deprecated use {@link #isTopPrivateDomain()} */ @Deprecated public boolean isImmediatelyUnderTld() { return isTopPrivateDomain(); } /** * Old location of {@link #topPrivateDomain()}. * * @deprecated use {@link #topPrivateDomain()} */ @Deprecated public InternetDomainName topCookieDomain() { return topPrivateDomain(); } /** * Returns the rightmost non-{@linkplain #isRecognizedTld() TLD} domain name * part. For example * {@code new InternetDomainName("www.google.com").rightmostNonTldPart()} * returns {@code "google"}. Returns null if either no * {@linkplain #isRecognizedTld() TLD} is found, or the whole domain name is * itself a {@linkplain #isRecognizedTld() TLD}. * * @deprecated use the first {@linkplain #parts part} of the {@link * #topPrivateDomain()} */ @Deprecated public String rightmostNonTldPart() { return publicSuffixIndex >= 1 ? parts.get(publicSuffixIndex - 1) : null; } /** * Indicates whether this domain name represents a public suffix, as * defined by the Mozilla Foundation's * Public Suffix List (PSL). A public * suffix is one under which Internet users can directly register names, such * as {@code com}, {@code co.uk} or {@code pvt.k12.wy.us}. Examples of domain * names that are not public suffixes include {@code google}, {@code * google.com} and {@code foo.co.uk}. * * @return {@code true} if this domain name appears exactly on the public * suffix list * @since 6 */ public boolean isPublicSuffix() { return publicSuffixIndex == 0; } /** * Indicates whether this domain name ends in a {@linkplain #isPublicSuffix() * public suffix}, including if it is a public suffix itself. For example, * returns {@code true} for {@code www.google.com}, {@code foo.co.uk} and * {@code com}, but not for {@code google} or {@code google.foo}. * * @since 6 */ public boolean hasPublicSuffix() { return publicSuffixIndex != NO_PUBLIC_SUFFIX_FOUND; } /** * Returns the {@linkplain #isPublicSuffix() public suffix} portion of the * domain name, or {@code null} if no public suffix is present. * * @since 6 */ public InternetDomainName publicSuffix() { return hasPublicSuffix() ? ancestor(publicSuffixIndex) : null; } /** * Indicates whether this domain name ends in a {@linkplain #isPublicSuffix() * public suffix}, while not being a public suffix itself. For example, * returns {@code true} for {@code www.google.com}, {@code foo.co.uk} and * {@code bar.ca.us}, but not for {@code google}, {@code com}, or {@code * google.foo}. * * @since 6 */ public boolean isUnderPublicSuffix() { return publicSuffixIndex > 0; } /** * Indicates whether this domain name is composed of exactly one subdomain * component followed by a {@linkplain #isPublicSuffix() public suffix}. For * example, returns {@code true} for {@code google.com} and {@code foo.co.uk}, * but not for {@code www.google.com} or {@code co.uk}. * * @since 6 */ public boolean isTopPrivateDomain() { return publicSuffixIndex == 1; } /** * Returns the portion of this domain name that is one level beneath the * public suffix. For example, for {@code x.adwords.google.co.uk} it returns * {@code google.co.uk}, since {@code co.uk} is a public suffix. This is the * highest-level parent of this domain for which cookies may be set, as * cookies cannot be set on a public suffix itself. * *

If {@link #isTopPrivateDomain()} is true, the current domain name * instance is returned. * * @throws IllegalStateException if this domain does not end with a * public suffix * @since 6 */ public InternetDomainName topPrivateDomain() { if (isTopPrivateDomain()) { return this; } checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name); return ancestor(publicSuffixIndex - 1); } /** * Indicates whether this domain is composed of two or more parts. */ public boolean hasParent() { return parts.size() > 1; } /** * Returns an {@code InternetDomainName} that is the immediate ancestor of * this one; that is, the current domain with the leftmost part removed. For * example, the parent of {@code www.google.com} is {@code google.com}. * * @throws IllegalStateException if the domain has no parent, as determined * by {@link #hasParent} */ public InternetDomainName parent() { checkState(hasParent(), "Domain '%s' has no parent", name); return ancestor(1); } /** * Returns the ancestor of the current domain at the given number of levels * "higher" (rightward) in the subdomain list. The number of levels must be * non-negative, and less than {@code N-1}, where {@code N} is the number of * parts in the domain. * *

TODO: Reasonable candidate for addition to public API. */ private InternetDomainName ancestor(int levels) { return new InternetDomainName(parts.subList(levels, parts.size())); } /** * Creates and returns a new {@code InternetDomainName} by prepending the * argument and a dot to the current name. For example, {@code * InternetDomainName.from("foo.com").child("www.bar")} returns a new {@code * InternetDomainName} with the value {@code www.bar.foo.com}. * * @throws NullPointerException if leftParts is null * @throws IllegalArgumentException if the resulting name is not valid */ public InternetDomainName child(String leftParts) { return InternetDomainName.from(checkNotNull(leftParts) + "." + name); } /** * Indicates whether the argument is a syntactically valid domain name. This * method is intended for the case where a {@link String} must be validated as * a valid domain name, but no further work with that {@link String} as an * {@link InternetDomainName} will be required. Code like the following will * unnecessarily repeat the work of validation:

   {@code
   *
   *   if (InternetDomainName.isValid(name)) {
   *     domainName = InternetDomainName.from(name);
   *   } else {
   *     domainName = DEFAULT_DOMAIN;
   *   }}
* * Such code could instead be written as follows:
   {@code
   *
   *   try {
   *     domainName = InternetDomainName.from(name);
   *   } catch (IllegalArgumentException e) {
   *     domainName = DEFAULT_DOMAIN;
   *   }}
*/ public static boolean isValid(String name) { try { from(name); return true; } catch (IllegalArgumentException e) { return false; } } /** * Does the domain name satisfy the Mozilla criteria for a {@linkplain * #isPublicSuffix() public suffix}? */ private static boolean isPublicSuffixInternal(String domain) { return TldPatterns.EXACT.contains(domain) || (!TldPatterns.EXCLUDED.contains(domain) && matchesWildcardPublicSuffix(domain)); } /** * Does the domain name match one of the "wildcard" patterns (e.g. "*.ar")? */ private static boolean matchesWildcardPublicSuffix(String domain) { final String[] pieces = domain.split(DOT_REGEX, 2); return pieces.length == 2 && TldPatterns.UNDER.contains(pieces[1]); } // TODO: specify this to return the same as name(); remove name() @Override public String toString() { return Objects.toStringHelper(this).add("name", name).toString(); } @Override public boolean equals(@Nullable Object object) { if (object == this) { return true; } if (object instanceof InternetDomainName) { InternetDomainName that = (InternetDomainName) object; return this.name.equals(that.name); } return false; } @Override public int hashCode() { return name.hashCode(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy