All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fr.cenotelie.commons.utils.http.URIUtils Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright (c) 2016 Association Cénotélie (cenotelie.fr)
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General
 * Public License along with this program.
 * If not, see .
 ******************************************************************************/

package fr.cenotelie.commons.utils.http;

import fr.cenotelie.commons.utils.IOUtils;

import java.util.ArrayList;
import java.util.List;
import java.util.Stack;

/**
 * Utility APIs for URIs
 *
 * @author Laurent Wouters
 */
public class URIUtils {
    /**
     * Identifier of the scheme component of an URI
     */
    public static final int COMPONENT_SCHEME = 0;
    /**
     * Identifier of the authority component of an URI
     */
    public static final int COMPONENT_AUTHORITY = 1;
    /**
     * Identifier of the path component of an URI
     */
    public static final int COMPONENT_PATH = 2;
    /**
     * Identifier of the query component of an URI
     */
    public static final int COMPONENT_QUERY = 3;
    /**
     * Identifier of the fragment component of an URI
     */
    public static final int COMPONENT_FRAGMENT = 4;

    /**
     * Determines whether the specified URI is absolute.
     * An URI is considered absolute if it has a scheme component, as well as either an authority or path component
     * (See RFC 3986 - 4.3)
     *
     * @param uri An URI
     * @return true
     */
    public static boolean isAbsolute(String uri) {
        // look for a scheme
        int start = uri.indexOf(":");
        if (start == -1) {
            // no scheme
            return false;
        } else {
            start++;
        }
        // look for the authority
        if (uri.startsWith("//", start)) {
            // found an authority
            return true;
        }
        // no authority
        if (start == uri.length())
            // at the end
            return false;
        char c = uri.charAt(start);
        // we expect a path, if a query or fragment is found instead the path is missing (no authority)
        return (c != '?' && c != '#');
    }

    /**
     * Resolves a relative URI against a base
     * Implements the RFC 3986 relative resolution algorithm
     * (See RFC 3986 - 5.2)
     *
     * @param base      The base URI to resolve against
     * @param reference The URI to resolve
     * @return The resolved URI
     */
    public static String resolveRelative(String base, String reference) {
        if (reference == null || reference.isEmpty())
            return base;
        if (base == null || base.isEmpty())
            return reference;
        // RFC 3986: 5.2.1 - Pre-parse the Base URI
        String[] uriBase = parse(base);
        if (uriBase == null)
            return null;
        // RFC 3986: 5.2.2 - Transform References
        // (R.scheme, R.authority, R.path, R.query, R.fragment) = parse(R);
        String[] uriReference = parse(reference);
        if (uriReference == null)
            return null;

        String targetScheme;
        String targetAuthority;
        String targetPath;
        String targetQuery;
        String targetFragment;
        if (uriReference[COMPONENT_SCHEME] != null) {
            targetScheme = uriReference[COMPONENT_SCHEME];
            targetAuthority = uriReference[COMPONENT_AUTHORITY];
            targetPath = removeDotSegments(uriReference[COMPONENT_PATH]);
            targetQuery = uriReference[COMPONENT_QUERY];
        } else {
            if (uriReference[COMPONENT_AUTHORITY] != null) {
                targetAuthority = uriReference[COMPONENT_AUTHORITY];
                targetPath = removeDotSegments(uriReference[COMPONENT_PATH]);
                targetQuery = uriReference[COMPONENT_QUERY];
            } else {
                if (uriReference[COMPONENT_PATH] == null || uriReference[COMPONENT_PATH].isEmpty()) {
                    targetPath = uriBase[COMPONENT_PATH];
                    targetQuery = uriReference[COMPONENT_QUERY] != null ? uriReference[COMPONENT_QUERY] : uriBase[COMPONENT_QUERY];
                } else {
                    if (uriReference[COMPONENT_PATH].startsWith("/")) {
                        targetPath = removeDotSegments(uriReference[COMPONENT_PATH]);
                    } else {
                        targetPath = mergePaths(uriBase[COMPONENT_AUTHORITY], uriBase[COMPONENT_PATH], uriReference[COMPONENT_PATH]);
                        targetPath = removeDotSegments(targetPath);
                    }
                    targetQuery = uriReference[COMPONENT_QUERY];
                }
                targetAuthority = uriBase[COMPONENT_AUTHORITY];
            }
            targetScheme = uriBase[COMPONENT_SCHEME];
        }
        targetFragment = uriReference[COMPONENT_FRAGMENT];

        // RFC 3986: 5.3 - Transform References
        return recompose(targetScheme, targetAuthority, targetPath, targetQuery, targetFragment);
    }

    /**
     * Parses an URI to decomposes it into the 5 components (scheme, authority, path, query and fragment)
     * An undefined component is represented by the null value.
     * An empty component is represented by the empty string
     * (See RFC 3986 - 3)
     *
     * @param uri The URI to compose
     * @return The 5 URI components in order in an array, or null if the syntax is incorrect
     */
    public static String[] parse(String uri) {
        if (!allLegalCharacters(uri))
            throw new IllegalArgumentException("URI contains illegal characters");
        String[] components = new String[5];

        // retrieve the scheme
        int start = uri.indexOf(":");
        if (start == -1) {
            // no scheme
            start = 0;
        } else {
            components[COMPONENT_SCHEME] = uri.substring(0, start);
            start++;
        }

        // retrieve the authority
        int nextNumber;
        int nextQMark;
        int min;
        if (uri.startsWith("//", start)) {
            start += 2;
            int nextSlash = uri.indexOf("/", start);
            nextNumber = uri.indexOf("#", start);
            nextQMark = uri.indexOf("?", start);
            min = uri.length();
            if (nextSlash >= 0 && nextSlash < min)
                min = nextSlash;
            if (nextNumber >= 0 && nextNumber < min)
                min = nextNumber;
            if (nextQMark >= 0 && nextQMark < min)
                min = nextQMark;
            components[COMPONENT_AUTHORITY] = uri.substring(start, min);
            start = min;
        }
        if (start == uri.length())
            return components;

        // retrieve the path
        nextNumber = uri.indexOf("#", start);
        nextQMark = uri.indexOf("?", start);
        min = uri.length();
        if (nextNumber >= 0 && nextNumber < min)
            min = nextNumber;
        if (nextQMark >= 0 && nextQMark < min)
            min = nextQMark;
        if (components[COMPONENT_AUTHORITY] != null) {
            // uri has authority component
            // path must be empty, or start with /
            if (min == start) {
                // path is empty
                components[COMPONENT_PATH] = "";
                start = min;
            } else if (uri.charAt(start) != '/') {
                // error, must start with /
                throw new IllegalArgumentException("Path in URI with authority must start by / or be empty");
            } else {
                components[COMPONENT_PATH] = uri.substring(start, min);
                start = min;
            }
        } else if (uri.startsWith("//", start)) {
            // error, with no authority, the path cannot start with //
            throw new IllegalArgumentException("Path in URI with authority cannot start by //");
        } else {
            components[COMPONENT_PATH] = uri.substring(start, min);
            start = min;
        }
        if (start == uri.length())
            return components;

        if (uri.startsWith("?", start)) {
            start++;
            // this is a query component
            nextNumber = uri.indexOf("#", start);
            min = uri.length();
            if (nextNumber >= 0 && nextNumber < min)
                min = nextNumber;
            components[COMPONENT_QUERY] = uri.substring(start, min);
            start = min;
            if (start == uri.length())
                return components;
        }

        // at this point we are facing a fragment
        start++;
        components[COMPONENT_FRAGMENT] = start == uri.length() ? "" : uri.substring(start);
        return components;
    }

    /**
     * Recomposes the components of a URI to form the full one
     * Implements the RFC 3986 component re-composition algorithm
     * (See RFC 3986 - 5.3)
     *
     * @param scheme    The scheme component
     * @param authority The authority component
     * @param path      The path component
     * @param query     The query component
     * @param fragment  The fragment component
     * @return The recomposed URI
     */
    public static String recompose(String scheme, String authority, String path, String query, String fragment) {
        StringBuilder builder = new StringBuilder();
        if (scheme != null) {
            builder.append(scheme);
            builder.append(":");
        }
        if (authority != null) {
            builder.append("//");
            builder.append(authority);
        }
        if (path != null)
            builder.append(path);
        if (query != null) {
            builder.append("?");
            builder.append(query);
        }
        if (fragment != null) {
            builder.append("#");
            builder.append(fragment);
        }
        return builder.toString();
    }

    /**
     * Removes the dot segments from the path component of an URI
     * Implements the RFC 3986 remove dot segments algorithm
     * (See RFC 3986 - 5.2.4)
     *
     * @param path The original path component
     * @return The resulting path component
     */
    private static String removeDotSegments(String path) {
        if (path == null)
            return null;
        if (path.isEmpty())
            return path;
        List output = getSegments(path);
        StringBuilder result = new StringBuilder();
        for (int i = 0; i != output.size(); i++) {
            if (i != 0)
                result.append("/");
            result.append(output.get(i));
        }
        return result.toString();
    }

    /**
     * Gets the segments in the specified path, with the dot segments resolved
     *
     * @param path The path
     * @return The segments
     */
    public static List getSegments(String path) {
        if (path == null)
            return new ArrayList<>();
        if (path.isEmpty())
            return new ArrayList<>();
        boolean isAbsolute = path.startsWith("/");
        List input = splitSegments(path);
        Stack output = new Stack<>();
        for (int i = 0; i != input.size(); i++) {
            String head = input.get(i);
            if ("..".equals(head)) {
                if (output.isEmpty())
                    continue;
                if (isAbsolute && output.size() == 1 && output.get(0).isEmpty())
                    // cannot remove the first empty segment for absolute paths
                    continue;
                output.pop();
                if (i == input.size() - 1)
                    // the last one, to not lose the trailing / push an empty segment
                    output.push("");
            } else if (".".equals(head)) {
                if (i == input.size() - 1)
                    // the last one, to not lose the trailing / push an empty segment
                    output.push("");
            } else {
                output.push(head);
            }
        }
        return output;
    }

    /**
     * Splits the path component of an URI into segments
     * This method leaves empty segments at the beginning (resp. end) when the path begins (resp. ends) with a /, as well as in the middle
     *
     * @param path The path component of an URI
     * @return The list of segments
     */
    private static List splitSegments(String path) {
        List result = new ArrayList<>();
        int start = 0;
        int index = path.indexOf("/");
        while (index != -1) {
            if (start == index)
                result.add("");
            else
                result.add(path.substring(start, index));
            start = index + 1;
            if (start == path.length()) {
                result.add("");
                return result;
            }
            index = path.indexOf("/", start);
        }
        if (start < path.length())
            result.add(path.substring(start));
        return result;
    }

    /**
     * Merges the paths components of two URIs
     * Implements the RFC 3986 merge paths algorithm
     * (See RFC 3986 - 5.2.3)
     *
     * @param baseAuthority The authority component of the base URI
     * @param basePath      The path component of the base URI
     * @param refPath       The path component of the reference URI
     * @return The merged target path
     */
    private static String mergePaths(String baseAuthority, String basePath, String refPath) {
        if (baseAuthority != null && (basePath == null || basePath.isEmpty())) {
            return "/" + refPath;
        } else if (basePath == null || basePath.isEmpty()) {
            return refPath;
        } else {
            int index = basePath.lastIndexOf("/");
            if (index == -1)
                return refPath;
            return basePath.substring(0, index + 1) + refPath;
        }
    }

    /**
     * Determines whether the specified URI only contains valid characters.
     * This method does the check whether the URI is well-formed
     *
     * @param uri An URI
     * @return true if the URI only contains valid characters
     */
    public static boolean allLegalCharacters(String uri) {
        for (int i = 0; i != uri.length(); i++)
            if (!isLegalCharacter(uri.charAt(i)))
                return false;
        return true;
    }

    /**
     * Determines whether the specified character is a valid character in an URI
     *
     * @param c A character
     * @return true if the character is valid
     */
    private static boolean isLegalCharacter(char c) {
        return (!Character.isWhitespace(c) && !Character.isISOControl(c) && c != '<' && c != '>');
    }

    /**
     * Encodes an URI component
     *
     * @param original The original value
     * @return The encoded value
     */
    public static String encodeComponent(String original) {
        if (original == null)
            return null;
        if (original.isEmpty())
            return original;
        StringBuilder builder = new StringBuilder();
        for (int i = 0; i != original.length(); i++) {
            char c = original.charAt(i);
            if (Character.isHighSurrogate(c)) {
                String sub = original.substring(i, i + 2);
                byte[] bytes = sub.getBytes(IOUtils.UTF8);
                for (int j = 0; j != bytes.length; j++) {
                    int n = (int) bytes[j] & 0xff;
                    builder.append("%");
                    if (n < 0x10) {
                        builder.append("0");
                    }
                    builder.append(Integer.toString(n, 16));
                }
                i++;
            } else if (c == '-' || c == '_' || c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' || c == ')' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) {
                builder.append(c);
            } else {
                byte[] bytes = Character.toString(c).getBytes(IOUtils.UTF8);
                for (int j = 0; j != bytes.length; j++) {
                    int n = (int) bytes[j] & 0xff;
                    builder.append("%");
                    if (n < 0x10) {
                        builder.append("0");
                    }
                    builder.append(Integer.toString(n, 16));
                }
            }
        }
        return builder.toString();
    }

    /**
     * Decodes an URI component
     *
     * @param original The original value
     * @return The decoded value
     */
    public static String decodeComponent(String original) {
        if (original == null)
            return null;
        if (original.isEmpty())
            return original;
        StringBuilder builder = new StringBuilder();
        for (int i = 0; i != original.length(); i++) {
            char c = original.charAt(i);
            if (c == '+') {
                builder.append(' ');
                continue;
            } else if (c != '%') {
                builder.append(c);
                continue;
            }
            if (original.length() < i + 3)
                continue;
            int b0 = Integer.parseInt(original.substring(i + 1, i + 3), 16);
            if ((b0 & 0x80) == 0x00) {
                // 0xxxxxxx - one byte UTF-8
                builder.append(new String(new byte[]{(byte) b0}, IOUtils.UTF8));
                i += 2;
            } else if ((b0 & 0xE0) == 0xC0) {
                // 110xxxxx - two bytes UTF-8
                if (original.length() < i + 6)
                    continue;
                int b1 = Integer.parseInt(original.substring(i + 4, i + 6), 16);
                builder.append(new String(new byte[]{(byte) b0, (byte) b1}, IOUtils.UTF8));
                i += 5;
            } else if ((b0 & 0xF0) == 0xE0) {
                // 1110xxxx - three bytes UTF-8
                if (original.length() < i + 9)
                    continue;
                int b1 = Integer.parseInt(original.substring(i + 4, i + 6), 16);
                int b2 = Integer.parseInt(original.substring(i + 7, i + 9), 16);
                builder.append(new String(new byte[]{(byte) b0, (byte) b1, (byte) b2}, IOUtils.UTF8));
                i += 8;
            } else if ((b0 & 0xF8) == 0xF0) {
                // 11110xxx - four bytes UTF-8
                if (original.length() < i + 12)
                    continue;
                int b1 = Integer.parseInt(original.substring(i + 4, i + 6), 16);
                int b2 = Integer.parseInt(original.substring(i + 7, i + 9), 16);
                int b3 = Integer.parseInt(original.substring(i + 10, i + 12), 16);
                builder.append(new String(new byte[]{(byte) b0, (byte) b1, (byte) b2, (byte) b3}, IOUtils.UTF8));
                i += 11;
            }
        }
        return builder.toString();
    }

    /**
     * Gets the sanitized single value of a parameter
     *
     * @param values The current values
     * @return The single value, or null if there is none
     */
    public static String getSingleParameter(String[] values) {
        if (values == null)
            return null;
        if (values.length == 0)
            return null;
        String value = values[0].trim();
        if (value.isEmpty())
            return null;
        return value;
    }

    /**
     * Gets the sanitized multi values of a parameter
     *
     * @param values The current values
     * @return The values
     */
    public static String[] getMultiParameters(String[] values) {
        if (values == null)
            return new String[0];
        if (values.length == 0)
            return values;
        int count = 0;
        for (int i = 0; i != values.length; i++) {
            values[i] = values[i].trim();
            if (!values[i].isEmpty())
                count++;
        }
        if (count == values.length)
            return values;
        String[] result = new String[count];
        int index = 0;
        for (int i = 0; i != values.length; i++) {
            if (!values[i].isEmpty())
                result[index++] = values[i];
        }
        return result;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy