All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.norconex.commons.lang.url.HttpURL Maven / Gradle / Ivy

Go to download

Norconex Commons Lang is a Java library containing utility classes that complements the Java API and are not found in commonly available libraries (such as the great Apache Commons Lang, which it relies on).

There is a newer version: 2.0.2
Show newest version
/* Copyright 2010-2016 Norconex Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.norconex.commons.lang.url;

import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Locale;

import org.apache.commons.lang3.CharEncoding;
import org.apache.commons.lang3.CharUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;

/**
 * This class act as a mutable URL, which could be a replacement
 * or "wrapper" to the {@link URL} class. It can also be used as a safer way 
 * to build a {@link URL} or a {@link URI} instance as it will properly escape 
 * appropriate characters before creating those.
 * 
 * @author Pascal Essiembre
 */
@SuppressWarnings("nls")
public class HttpURL implements Serializable {

    private static final long serialVersionUID = -8886393027925815099L;
    
    /** Default URL HTTP Port. */
    public static final int DEFAULT_HTTP_PORT = 80;
    /** Default Secure URL HTTP Port. */
    public static final int DEFAULT_HTTPS_PORT = 443;
    
    /** Constant for "http" protocol. */
    public static final String PROTOCOL_HTTP = "http";
    /** Constant for "https" protocol. */
    public static final String PROTOCOL_HTTPS = "https";
    
    private QueryString queryString;
    private String host;
    private int port = DEFAULT_HTTP_PORT;
    private String path;
    private String protocol;
    private final String encoding;
    private String fragment;
    
    /**
     * Creates a blank HttpURL using UTF-8 for URL encoding.
     */
    public HttpURL() {
        this("", null);
    }

    /**
     * Creates a new HttpURL from the URL object using UTF-8 for URL encoding.
     * @param url a URL
     */
    public HttpURL(URL url) {
        this(url.toString());
    }
    /**
     * Creates a new HttpURL from the URL string using UTF-8 for URL encoding.
     * @param url a URL
     */
    public HttpURL(String url) {
        this(url, null);
    }

    /**
     * Creates a new HttpURL from the URL object using the provided encoding
     * for URL encoding.
     * @param url a URL
     * @param encoding character encoding
     * @since 1.7.0
     */
    public HttpURL(URL url, String encoding) {
        this(url.toString(), encoding);
    }
    /**
     * Creates a new HttpURL from the URL string using the provided encoding
     * for URL encoding.
     * @param url a URL string
     * @param encoding character encoding
     * @since 1.7.0
     */
    public HttpURL(String url, String encoding) {
        if (StringUtils.isBlank(encoding)) {
            this.encoding = CharEncoding.UTF_8;
        } else {
            this.encoding = encoding;
        }
        if (StringUtils.startsWithIgnoreCase(url, PROTOCOL_HTTP)) {
            URL urlwrap;
            try {
                urlwrap = new URL(url);
            } catch (MalformedURLException e) {
                throw new URLException("Could not interpret URL: " + url, e);
            }
            protocol = StringUtils.substringBefore(url, ":");
            host = urlwrap.getHost();
            port = urlwrap.getPort();
            if (port < 0) {
                if (StringUtils.startsWithIgnoreCase(url, PROTOCOL_HTTPS)) {
                    port = DEFAULT_HTTPS_PORT;
                } else {
                    port = DEFAULT_HTTP_PORT;
                }
            }
            path = urlwrap.getPath();
            fragment = urlwrap.getRef();
        }
        
        // Parameters
        if (StringUtils.contains(url, "?")) {
            queryString = new QueryString(url, encoding);
        }
    }

    
    /**
     * Gets the character encoding. Default is UTF-8.
     * @return character encoding
     * @since 1.7.0
     */
    public String getEncoding() {
        return encoding;
    }

    /**
     * Gets the URL path.
     * @return URL path
     */
    public String getPath() {
        return path;
    }
    /**
     * Sets the URL path.
     * @param path url path
     */
    public void setPath(String path) {
        this.path = path;
    }

    /**
     * Gets the URL query string.
     * @return URL query string, or null if none
     */
    public QueryString getQueryString() {
        return queryString;
    }
    /**
     * Sets the URL query string.
     * @param queryString the query string
     */
    public void setQueryString(QueryString queryString) {
        this.queryString = queryString;
    }
    
    /**
     * Gets the host portion of the URL.
     * @return the host portion of the URL
     */
    public String getHost() {
        return host;
    }
    /**
     * Sets the host portion of the URL.
     * @param host the host portion of the URL
     */
    public void setHost(String host) {
        this.host = host;
    }

    /**
     * Gets the protocol portion of the URL (e.g. http, https);
     * @return the protocol portion of the URL
     */
    public String getProtocol() {
        return protocol;
    }
    /**
     * Sets the protocol portion of the URL.
     * @param protocol the protocol portion of the URL
     */
    public void setProtocol(String protocol) {
        this.protocol = protocol;
    }
    /**
     * Whether this URL is secure (e.g. https).
     * @return true if protocol is secure
     */
    public boolean isSecure() {
        return getProtocol().equalsIgnoreCase(PROTOCOL_HTTPS);
    }

    /**
     * Gets the URL port.
     * @return the URL port
     */
    public int getPort() {
        return port;
    }
    /**
     * Sets the URL port.
     * @param port the URL port
     */
    public void setPort(int port) {
        this.port = port;
    }
    
    /**
     * Gets the URL fragment.
     * @return the fragment
     * @since 1.8.0
     */
    public String getFragment() {
        return fragment;
    }
    /**
     * Sets the URL fragment.
     * @param fragment the fragment to set
     * @since 1.8.0
     */
    public void setFragment(String fragment) {
        this.fragment = fragment;
    }

    /**
     * Gets the last URL path segment without the query string.
     * If there are segment to return, 
     * an empty string will be returned instead.
     * @return the last URL path segment
     */
    public String getLastPathSegment() {
        if (StringUtils.isBlank(path)) {
            return StringUtils.EMPTY;
        }
        String segment = path;
        segment = StringUtils.substringAfterLast(segment, "/");
        return segment;
    }
    /**
     * Converts this HttpURL to a regular {@link URL}, making sure 
     * appropriate characters are escaped properly.
     * @return a URL
     * @throws URLException when URL is malformed
     */
    public URL toURL() {
        String url = toString();
        try {
            return new URL(url);
        } catch (MalformedURLException e) {
            throw new URLException("Cannot convert to URL: " + url, e);
        }
    }
    
    /**
     * Gets the root of this HttpUrl. That is the left part of a URL up to 
     * and including the host name. A null or empty string returns
     * a null document root.
     * @return left part of a URL up to (and including the host name
     * @throws URLException when URL is malformed
     * @since 1.8.0
     */
    public String getRoot() {
        return getRoot(toString());
    }
    
    /**
     * Converts this HttpURL to a {@link URI}, making sure 
     * appropriate characters are escaped properly.
     * @return a URI
     * @since 1.7.0
     * @throws URLException when URL is malformed
     */
    public URI toURI() {
        String url = toString();
        try {
            return new URI(url);
        } catch (URISyntaxException e) {
            throw new URLException("Cannot convert to URI: " + url, e);
        }
    }
    /**
     * 

* Converts the supplied URL to a {@link URL}, making sure * appropriate characters are encoded properly using UTF-8. This method * is a short form of:
* new HttpURL("http://example.com").toURL(); *

* @param url a URL string * @return a URL object * @since 1.7.0 * @throws URLException when URL is malformed */ public static URL toURL(String url) { return new HttpURL(url).toURL(); } /** *

Converts the supplied URL to a {@link URI}, making sure * appropriate characters are encoded properly using UTF-8. This method * is a short form of:
* new HttpURL("http://example.com").toURI(); *

* @param url a URL string * @return a URI object * @since 1.7.0 * @throws URLException when URL is malformed */ public static URI toURI(String url) { return new HttpURL(url).toURI(); } /** *

Gets the root of a URL. That is the left part of a URL up to and * including the host name. A null or empty string returns * a null document root. * This method is a short form of:
* new HttpURL("http://example.com/path").getRoot(); *

* @param url a URL string * @return left part of a URL up to (and including the host name * @since 1.8.0 */ public static String getRoot(String url) { if (StringUtils.isBlank(url)) { return null; } return StringUtils.replacePattern(url, "(.*?://.*?)([/?#].*)", "$1"); } /** * Returns a string representation of this URL, properly encoded. * @return URL as a string */ @Override public String toString() { StringBuilder b = new StringBuilder(); b.append(protocol); b.append("://"); b.append(host); if (!isPortDefault()) { b.append(':'); b.append(port); } if (StringUtils.isNotBlank(path)) { if (!path.startsWith("/")) { b.append('/'); } b.append(encodePath(path)); } if (queryString != null && !queryString.isEmpty()) { b.append(queryString.toString()); } if (fragment != null) { b.append("#"); b.append(encodePath(fragment)); } return b.toString(); } /** * Whether this URL uses the default port for the protocol. The default * port is 80 for "http" protocol, and 443 for "https". Other protocols * are not supported and this method will always return false * for them. * @return true if the URL is using the default port. * @since 1.8.0 */ public boolean isPortDefault() { return PROTOCOL_HTTPS.equalsIgnoreCase(protocol) && port == DEFAULT_HTTPS_PORT || PROTOCOL_HTTP.equalsIgnoreCase(protocol) && port == DEFAULT_HTTP_PORT; } /** *

URL-Encodes the query string portion of a URL. The entire * string supplied is assumed to be a query string. * @param queryString URL query string * @return encoded path * @since 1.8.0 */ public static String encodeQueryString(String queryString) { if (StringUtils.isBlank(queryString)) { return queryString; } return new QueryString(queryString).toString(); } /** *

URL-Encodes a URL path. The entire string supplied is assumed * to be a URL path. Unsafe characters are percent-encoded using UTF-8 * (as specified by W3C standard). * @param path path portion of a URL * @return encoded path * @since 1.7.0 */ public static String encodePath(String path) { // Any characters that are not one of the following are // percent-encoded (including spaces): // a-z A-Z 0-9 . - _ ~ ! $ & ' ( ) * + , ; = : @ / % if (StringUtils.isBlank(path)) { return path; } StringBuilder sb = new StringBuilder(); for (char ch : path.toCharArray()) { // Space to plus sign if (ch == ' ') { sb.append("%20"); // Valid: keep it as is. } else if (CharUtils.isAsciiAlphanumeric(ch) || ".-_~!$&'()*+,;=:@/%".indexOf(ch) != -1) { sb.append(ch); // Invalid: encode it } else { byte[] bytes; try { bytes = Character.toString(ch).getBytes(CharEncoding.UTF_8); } catch (UnsupportedEncodingException e) { throw new URLException("UTF-8 not supported.", e); } for (byte b : bytes) { sb.append('%'); int upper = (((int) b) >> 4) & 0xf; sb.append(Integer.toHexString( upper).toUpperCase(Locale.US)); int lower = ((int) b) & 0xf; sb.append(Integer.toHexString( lower).toUpperCase(Locale.US)); } } } return sb.toString(); } /** * Converts a relative URL to an absolute one, based on the supplied * base URL. The base URL is assumed to be a valid URL. Behavior * is unexpected when base URL is invalid. * @param baseURL URL to the reference is relative to * @param relativeURL the relative URL portion to transform to absolute * @return absolute URL * @since 1.8.0 */ public static String toAbsolute(String baseURL, String relativeURL) { String relURL = relativeURL; // Relative to protocol if (relURL.startsWith("//")) { return StringUtils.substringBefore(baseURL, "//") + "//" + StringUtils.substringAfter(relURL, "//"); } // Relative to domain name if (relURL.startsWith("/")) { return getRoot(baseURL) + relURL; } // Relative to full full page URL minus ? or # if (relURL.startsWith("?") || relURL.startsWith("#")) { // this is a relative url and should have the full page base return baseURL.replaceFirst("(.*?)([\\?\\#])(.*)", "$1") + relURL; } // Relative to last directory/segment if (!relURL.contains("://")) { String base = baseURL.replaceFirst("(.*?)([\\?\\#])(.*)", "$1"); if (StringUtils.countMatches(base, '/') > 2) { base = base.replaceFirst("(.*/)(.*)", "$1"); } if (base.endsWith("/")) { // This is a URL relative to the last URL segment relURL = base + relURL; } else { relURL = base + "/" + relURL; } } // Not detected as relative, so return as is return relURL; } @Override public int hashCode() { return new HashCodeBuilder() .append(host) .append(path) .append(port) .append(protocol) .append(queryString) .append(fragment) .append(encoding) .toHashCode(); } @Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj == null) { return false; } if (!(obj instanceof HttpURL)) { return false; } HttpURL other = (HttpURL) obj; return new EqualsBuilder() .append(host, other.host) .append(path, other.path) .append(port, other.port) .append(protocol, other.protocol) .append(queryString, other.queryString) .append(fragment, other.fragment) .append(encoding, other.encoding) .isEquals(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy