org.broadinstitute.http.nio.HttpPath Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of http-nio Show documentation
HTTP/S FileSystem provider for Java NIO.2
The newest version!
package org.broadinstitute.http.nio;

import org.broadinstitute.http.nio.utils.HttpUtils;
import org.broadinstitute.http.nio.utils.Utils;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOError;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.InvalidPathException;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.WatchEvent;
import java.nio.file.WatchKey;
import java.nio.file.WatchService;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Objects;
import java.util.stream.IntStream;

/**
 * {@link Path} for HTTP/S.
 *
 * The HTTP/S paths holds the following information:
 *
 * 

 *
 * 
 * The {@link HttpFileSystem} originating the path. The protocol is retrieved, if necessary,
 * from the provider of the File System.
 * 

 *
 * 
 * The hostname and domain for the URL/URI in a single authority String.
 * 

 *
 * 
 * If present, the path component of the URL/URI.
 * 

 *
 * 
 * If present, the query and reference Strings.
 * 

 *
 * 

 *
 * @author Daniel Gomez-Sanchez (magicDGS)
 */
final class HttpPath implements Path {

    // file system (indicates the scheme - HTTP or HTTPS)
    private final HttpFileSystem fs;

    // path - similar to other implementation of Path
    //Stored in encoded form
    private final byte[] normalizedPath;

    // offsets for the separator (computed if needed)
    private volatile int[] offsets;

    // query for the URL (may be null)
    //Stored in encoded form
    private final String query;

    // reference for the URL (may be null) / fragment for the URI representation
    //Stored in encoded form
    private final String reference;

    // true if the path is absolute; false otherwise
    private final boolean absolute;

    /**
     * Internal constructor.
     *
     * @param fs             file system. Shouldn't be {@code null}.
     * @param query          query. May be {@code null}.
     * @param reference      reference. May be {@code null}.
     * @param normalizedPath normalized path (as a byte array). Shouldn't be {@code null}.
     * @implNote does not perform any check for efficiency.
     */
    private HttpPath(final HttpFileSystem fs,
            final String query,
            final String reference,
            final boolean absolute,
            final byte... normalizedPath) {
        this.fs = fs;

        // optional query and reference components (may be null)
        this.query = query;
        this.reference = reference;

        // set the absolute status
        this.absolute = absolute;

        // normalized path bytes (shouldn't be null)
        this.normalizedPath = Utils.nonNull(normalizedPath,  () -> "path may not be null");
    }

    /**
     * Creates a new Path in the provided {@link HttpFileSystem}, with optional query and reference.
     *
     * @param fs        file system representing the base URL (scheme and authority).
     * @param path      path (absolute) component for the URL (required).
     * @param query     query  component for the URL (optional).
     * @param reference reference component for the URL (optional).
     */
    HttpPath(final HttpFileSystem fs, final String path, final String query,
             final String reference) {
        // always absolute and checking it when converting to byte[]
        this(Utils.nonNull(fs, () -> "null fs"), query, reference, true,
                getNormalizedPathBytes(Utils.nonNull(path, () -> "null path"), true));
    }

    @Override
    public HttpFileSystem getFileSystem() {
        return fs;
    }

    @Override
    public boolean isAbsolute() {
        return absolute;
    }

    @Override
    public Path getRoot() {
        // root is a Path with only the byte array (always absolute)
        return new HttpPath(fs, null, null, true, new byte[0]);
    }

    /**
     * {@inheritDoc}
     *
     * @implNote returns always a relative path.
     */
    @Override
    public Path getFileName() {
        initOffsets();
        // following the contract, for the getNameCounts() == 0 (root) we return null
        if (offsets.length == 0) {
            return null;
        }
        // file names are always relative paths
        return subpath(offsets.length - 1, offsets.length, false);
    }

    /**
     * {@inheritDoc}
     *
     * @implNote returned path keeps the {@link #isAbsolute()} status of the current path.
     */
    @Override
    public Path getParent() {
        initOffsets();
        // returns the root if there is no
        if (offsets.length == 0) {
            return getRoot();
        }
        // parent names are absolute/relative depending on the current status
        return subpath(0, offsets.length - 1, absolute);
    }

    @Override
    public int getNameCount() {
        initOffsets();
        return offsets.length;
    }

    /**
     * {@inheritDoc}
     *
     * @implNote returns always a relative path.
     */
    @Override
    public Path getName(final int index) {
        initOffsets();
        // returns always a relative path
        return subpath(index, index + 1, false);
    }

    @Override
    public Path subpath(final int beginIndex, final int endIndex) {
        initOffsets();
        // following the contract for invalid indexes
        if (beginIndex < 0 || beginIndex >= offsets.length ||
                endIndex <= beginIndex || endIndex > offsets.length) {
            throw new IllegalArgumentException(String
                    .format("Invalid indexes for path with %s name(s): [%s, %s]",
                            getNameCount(), beginIndex, endIndex));
        }
        // return the new path (always relative path following the contract)
        return subpath(beginIndex, endIndex, false);
    }

    /**
     * Helper method to implement different subpath routines with different absolute/relative
     * status.
     *
     * The contract of this method is the same as {@link Path#subpath(int, int)}).
     *
     * @param beginIndex the index of the first element, inclusive
     * @param endIndex   the index of the last element, exclusive
     * @param absolute   {@code true} if the returned path is absolute; {@code false} otherwise.
     *
     * @return a new path object that is a subsequence of the nams elements in this {@code
     * HttpPath}.
     *
     * @implNote assumes that the caller already initialized the offsets and that the indexes are
     * correct.
     */
    private HttpPath subpath(final int beginIndex, final int endIndex, final boolean absolute) {
        // get the coordinates to copy the path array
        final int begin = offsets[beginIndex];
        final int end = (endIndex == offsets.length) ? normalizedPath.length : offsets[endIndex];

        // construct the result
        final byte[] newPath = Arrays.copyOfRange(normalizedPath, begin, end);

        // return the new path (always relative path)
        // TODO: should the query/reference be propagated?
        return new HttpPath(this.fs, null, null, absolute, newPath);
    }

    @Override
    public boolean startsWith(final Path other) {
        // different FileSystems return false
        if (!this.getFileSystem().equals(Utils.nonNull(other, () -> "null path").getFileSystem())) {
            return false;
        }

        return startsWith(((HttpPath) other).normalizedPath);
    }

    @Override
    public boolean startsWith(final String other) {
        // throw if null
        Utils.nonNull(other, () -> "null other");
        // normalize the path and check with the byte method
        return startsWith(getNormalizedPathBytes(other, false));
    }

    /**
     * Private method to test startsWith only for the path component.
     *
     * 
The contract for this method is the same as {@link #startsWith(Path)} (Path)}, but only
     * for the path component.
     *
     * @param other the other path component.
     *
     * @return {@code true} if {@link #normalizedPath} ends with {@code other}; {@code false}
     * otherwise.
     */
    private boolean startsWith(final byte[] other) {
        // the other can still end in '/', so we should trim
        final int olen = getLastIndexWithoutTrailingSlash(other);

        // the other path component cannot have a larger than this for startWith
        if (olen > normalizedPath.length) {
            return false;
        }

        // check the bytes of the normalized path
        int i;
        for (i = 0; i <= olen; i++) {
            if (normalizedPath[i] != other[i]) {
                return false;
            }
        }

        // finally check the name boundary
        return i >= this.normalizedPath.length
                || this.normalizedPath[i] == HttpUtils.HTTP_PATH_SEPARATOR_CHAR;
    }

    @Override
    public boolean endsWith(final Path other) {
        // different FileSystems return false
        if (!this.getFileSystem().equals(Utils.nonNull(other, () -> "null path").getFileSystem())) {
            return false;
        }

        // compare the path component
        // TODO: maybe we should use isAbsolute() after https://github.com/magicDGS/jsr203-http/issues/12
        return endsWith(((HttpPath) other).normalizedPath, true);
    }

    @Override
    public boolean endsWith(final String other) {
        // throw if null
        Utils.nonNull(other, () -> "null other");
        // normalize the path and check with the byte method
        return endsWith(getNormalizedPathBytes(other, false), false);
    }

    /**
     * Private method to test endsWith only for the path component.
     *
     * 
The contract for this method is the same as {@link #endsWith(Path)}, but only for the
     * path component.
     *
     * @param other the other path component.
     * @param pathVersion if {@code false}, perform an extra check for the String version.
     * @return {@code true} if {@link #normalizedPath} ends with {@code other}; {@code false}
     * otherwise.
     */
    private boolean endsWith(final byte[] other, final boolean pathVersion) {
        // get the last index to check
        int olast = getLastIndexWithoutTrailingSlash(other);
        // get the last index to check
        int last = getLastIndexWithoutTrailingSlash(this.normalizedPath);

        // early termination if the length is 0 (last index = -1)
        if (olast == -1) {
            return last == -1;
        }
        // early termination if the other is larger
        if (last < olast) {
            return false;
        }

        // iterate over the bytes to check if they are the same
        for (; olast >= 0; olast--, last--) {
            if (other[olast] != this.normalizedPath[last]) {
                return false;
            }
        }

        // last == -1 when olast == -1 (the same length and equals)
        if (last == -1) {
            return true;
        }

        // switch for the path version or not path version
        if (pathVersion) {
            // at this point, the pathVersion should always return true (as it is bounded)
            // TODO: this might change after relative path support (https://github.com/magicDGS/jsr203-http/issues/12)
            return true;
        } else {
            // otherwise, it shouldn't be included (e.g., "/foo/bar" ends with "bar" but not "/bar"
            return this.normalizedPath[last] == HttpUtils.HTTP_PATH_SEPARATOR_CHAR;
        }
    }

    @Override
    public Path normalize() {
        throw new UnsupportedOperationException("Not implemented");
    }

    /**
     * {@inheritDoc}
     *
     * @implNote This implementation differs from the expected behavior of the method when other is absolute. Instead
     * of returning other in that case it will throw {@link UnsupportedOperationException}.
     */
    @Override
    public HttpPath resolve(final Path other) {
        if(other == null){
            return this;
        }  else if(other.isAbsolute()){
            //Note: This violates the general contract of the method but shouldn't be important practically.
                throw new UnsupportedOperationException("Cannot resolve an absolute path against an http(s) path."
                        + "\nThis path is: " + this
                        + "\nThe problematic path is an instance of " + other.getClass().getName()
                        + "\nOther path: " + other);
        } else {
            try {
                final URI otherUri = new URI(other.toString()); // to string is used here instead of the expected toUri because
                                                                // toUri will produce normalized absolute paths in many filesystems which is
                                                                // what we don't want
                return resolve(otherUri);
            } catch (URISyntaxException e) {
                throw new IllegalArgumentException("Can only resolve http(s) paths against fully encoded paths which are valid URIs.", e);
            }
        }
    }

    private HttpPath resolve(URI other){
        return new HttpPath(fs, other.getRawQuery(),
                other.getRawFragment(),
                this.isAbsolute(),
                concatPaths(this.normalizedPath, getNormalizedPathBytes(other.getRawPath(), false)));
    }

    @Override
    public HttpPath resolve(final String other) {
        return resolve(fromRelativeString(other));  // Paths.get() and the filesystem equivalent can't be used here
                                                    // because we don't allow them to create relative HttpPaths 
    }

    @Override
    public Path resolveSibling(final String other){
        return resolveSibling(fromRelativeString(other));
    }

    @Override
    public Path relativize(final Path other) {
        throw new UnsupportedOperationException("Not implemented");
    }

    private HttpPath fromRelativeString(final String other) {
        if (other == null) {
            return null;
        } else {
            try {
                final URI uri = new URI(other);
                if (uri.isAbsolute()) {
                    throw new UnsupportedOperationException("Resolving absolute URI strings against an HTTP path is not supported." +
                            "\nURI: " + uri);
                }
                return new HttpPath(getFileSystem(), uri.getRawFragment(), uri.getRawQuery(), false,
                        getNormalizedPathBytes(uri.getRawPath(), false));
            } catch (URISyntaxException e) {
                throw new IllegalArgumentException("Cannot resolve against an invalid URI.", e);
            }
        }
    }

    @Override
    public URI toUri() {
        try {
            return new URI(toUriString(true));
        } catch (final URISyntaxException e) {
            throw new IOError(e);
        }
    }

    @Override
    public Path toAbsolutePath() {
        if (isAbsolute()) {
            return this;
        }
        // just fromUri a new path with a different absolute status
        return new HttpPath(fs, query, reference, true,normalizedPath);
    }

    @Override
    public Path toRealPath(final LinkOption... options) throws IOException {
        throw new UnsupportedOperationException("Not implemented");
    }

    /** Unsupported method. */
    @Override
    public File toFile() {
        throw new UnsupportedOperationException(this.getClass() + " cannot be converted to a File");
    }

    @Override
    public WatchKey register(final WatchService watcher, final WatchEvent.Kind[] events,
            final WatchEvent.Modifier... modifiers) throws IOException {
        throw new UnsupportedOperationException("Not implemented");
    }

    @Override
    public WatchKey register(final WatchService watcher, final WatchEvent.Kind... events)
            throws IOException {
        throw new UnsupportedOperationException("Not implemented");
    }

    @Override
    public Iterator iterator() {
        return IntStream.range(0, getNameCount()).mapToObj(this::getName).iterator();
    }

    /**
     * {@inheritDoc}
     *
     * @implNote comparison of every component of the HTTP/S path is case-sensitive, except the
     * scheme and the authority.
     * @implNote if the query and/or reference are present, this method order the one without any
     * of them first.
     */
    @Override
    public int compareTo(final Path other) {
        if (this == other) {
            return 0;
        }

        final HttpPath httpOther = (HttpPath) other;
        // object comparison - should be from the same provider
        if (fs.provider() != httpOther.fs.provider()) {
            throw new ClassCastException();
        }

        // first check the authority (case insensitive)
        int comparison = fs.getAuthority().compareToIgnoreCase(httpOther.fs.getAuthority());
        if (comparison != 0) {
            return comparison;
        }

        // then check the path
        final int len1 = normalizedPath.length;
        final int len2 = httpOther.normalizedPath.length;
        final int n = Math.min(len1, len2);
        for (int k = 0; k < n; k++) {
            // this is case sensitive
            comparison = Byte.compare(this.normalizedPath[k], httpOther.normalizedPath[k]);
            if (comparison != 0) {
                return comparison;
            }
        }
        comparison = len1 - len2;
        if (comparison != 0) {
            return comparison;
        }

        // compare the query if present
        comparison = Comparator.nullsFirst(String::compareTo).compare(this.query, httpOther.query);
        if (comparison != 0) {
            return comparison;
        }

        // otherwise, just return the value of comparing the fragment
        return Comparator.nullsFirst(String::compareTo)
                .compare(this.reference, httpOther.reference);
    }

    /**
     * {@inheritDoc}
     *
     * @implNote it uses the {@link #compareTo(Path)} method and the absolute status.
     */
    @Override
    public boolean equals(final Object other) {
        try {
            return ((HttpPath) other).absolute == this.absolute && compareTo((Path) other) == 0;
        } catch (ClassCastException e) {
            return false;
        }
    }

    /**
     * {@inheritDoc}
     *
     * @implNote Includes the absolute status and  all the components of the path in a
     * case-sensitive way, except the scheme and the authority.
     */
    @Override
    public int hashCode() {
        // TODO - maybe we should cache (https://github.com/magicDGS/jsr203-http/issues/18)
        int h = 31 * Boolean.hashCode(absolute) + fs.hashCode();
        for (int i = 0; i < normalizedPath.length; i++) {
            h = 31 * h + (normalizedPath[i] & 0xff);
        }
        // this is safe for null query and reference
        h = 31 * h + Objects.hash(query, reference);
        return h;
    }

    @Override
    public String toString() {
        return toUriString(isAbsolute());
    }

    private String toUriString(boolean includeRoot) {
        // TODO - maybe we should cache (https://github.com/magicDGS/jsr203-http/issues/18)
        // adding scheme, authority and normalized path
        final StringBuilder sb = new StringBuilder();
        if(includeRoot) {
            sb.append(fs.provider().getScheme()) // scheme
                    .append("://")
                    .append(fs.getAuthority()) // authority
                    .append(new String(normalizedPath, HttpUtils.HTTP_PATH_CHARSET));
        }  else if( normalizedPath.length != 0){
            if(normalizedPath[0] == HttpUtils.HTTP_PATH_SEPARATOR_CHAR) {
                sb.append(new String(normalizedPath, 1, normalizedPath.length - 1, HttpUtils.HTTP_PATH_CHARSET));
            } else {
                 sb.append(new String(normalizedPath, HttpUtils.HTTP_PATH_CHARSET));
            }
        }
        if (query != null) {
            sb.append('?').append(query);
        }
        if (reference != null) {
            sb.append('#').append(reference);
        }
        return sb.toString();
    }

    /**
     * Creates the array of offsets if not already created.
     *
     * @implNote it assumes that redundant separators are already removed.
     */
    private void initOffsets() {
        if (offsets == null) {
            // get the length without the trailing slash
            final int length = getLastIndexWithoutTrailingSlash(normalizedPath);
            // count names
            int count = 0;
            // index position (outside loop to re-use in the next loop)
            int index = 0;
            for (; index < length; index++) {
                final byte c = normalizedPath[index];
                if (c == HttpUtils.HTTP_PATH_SEPARATOR_CHAR) {
                    count++;
                    // assumes that redundant separators are already removed
                    index++;
                }
            }
            // populate offsets
            final int[] result = new int[count];
            count = 0;
            for (index = 0; index < length; index++) {
                final byte c = normalizedPath[index];
                if (c == HttpUtils.HTTP_PATH_SEPARATOR_CHAR) {
                    // assumes that redundant separators are already removed
                    result[count++] = index++;
                }
            }
            // update in a thread-safe manner
            synchronized (this) {
                if (offsets == null) {
                    offsets = result;
                }
            }
        }
    }

    /**
     * Gets the path as a normalized (without multiple slashes) array of bytes.
     *
     * @param path          path to convert into byte[]
     * @param checkRelative if {@code true}, check if the path is absolute.
     *
     * @return array of bytes, without multiple slashes together.
     */
    private static byte[] getNormalizedPathBytes(final String path, final boolean checkRelative) {
        if (checkRelative && !path.isEmpty() && !path.startsWith(HttpUtils.HTTP_PATH_SEPARATOR_STRING)) {
            throw new InvalidPathException(path, "Relative HTTP/S path are not supported");
        }

        if (HttpUtils.HTTP_PATH_SEPARATOR_STRING.equals(path) || path.isEmpty()) {
            return new byte[0];
        }
        final int len = path.length();

        char prevChar = 0;
        for (int i = 0; i < len; i++) {
            char c = path.charAt(i);
            if (isDoubleSeparator(prevChar, c)) {
                return getNormalizedPathBytes(path, len, i - 1);
            }
            prevChar = checkNotNull(path, c);
        }
        if (prevChar == HttpUtils.HTTP_PATH_SEPARATOR_CHAR) {
            return getNormalizedPathBytes(path, len, len - 1);
        }

        return path.getBytes(HttpUtils.HTTP_PATH_CHARSET);
    }

    private static byte[] getNormalizedPathBytes(final String path, final int len,
            final int offset) {
        // get first the last offset
        int lastOffset = len;
        while (lastOffset > 0
                && path.charAt(lastOffset - 1) == HttpUtils.HTTP_PATH_SEPARATOR_CHAR) {
            lastOffset--;
        }
        if (lastOffset == 0) {
            // early termination
            return new byte[] {HttpUtils.HTTP_PATH_SEPARATOR_CHAR};
        }
        // byte output stream
        try (final ByteArrayOutputStream os = new ByteArrayOutputStream(len)) {
            if (offset > 0) {
                os.write(path.substring(0, offset).getBytes(HttpUtils.HTTP_PATH_CHARSET));
            }
            char prevChar = 0;
            for (int i = offset; i < len; i++) {
                char c = path.charAt(i);
                if (isDoubleSeparator(prevChar, c)) {
                    continue;
                }
                prevChar = checkNotNull(path, c);
                os.write(c);
            }

            return os.toByteArray();
        } catch (final IOException e) {
            throw new Utils.ShouldNotHappenException(e);
        }
    }

    private static boolean isDoubleSeparator(final char prevChar, final char c) {
        return c == HttpUtils.HTTP_PATH_SEPARATOR_CHAR
                && prevChar == HttpUtils.HTTP_PATH_SEPARATOR_CHAR;
    }

    private static char checkNotNull(final String path, char c) {
        if (c == '\u0000') {
            throw new InvalidPathException(path, "Null character not allowed in path");
        }
        return c;
    }

    /**
     * Gets the last index to consider in the path bytes.
     *
     * If the lst index is a trailing slash {@link HttpUtils#HTTP_PATH_SEPARATOR_CHAR}, it
     * should not be considered for some operations. This method takes into account that problem.
     *
     * @param path bytes representing the path.
     *
     * @return last index of path to consider.
     */
    private static int getLastIndexWithoutTrailingSlash(final byte[] path) {
        int len = path.length - 1;
        if (len > 0 && path[len] == HttpUtils.HTTP_PATH_SEPARATOR_CHAR) {
            len--;
        }
        return len;
    }

    private static byte[] concatPaths(byte[] array1, byte[] array2) {
        int array1ModifiedLength = getLastIndexWithoutTrailingSlash(array1) + 1;
        byte[] result = Arrays.copyOf(array1, array1ModifiedLength + 1 + array2.length );
        result[array1ModifiedLength] = HttpUtils.HTTP_PATH_SEPARATOR_CHAR;
        System.arraycopy(array2, 0, result, array1ModifiedLength + 1, array2.length);
        return result;
    }
    
}