All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.http.nio.HttpPath Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.http.nio;

import org.broadinstitute.http.nio.utils.HttpUtils;
import org.broadinstitute.http.nio.utils.Utils;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOError;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.InvalidPathException;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.WatchEvent;
import java.nio.file.WatchKey;
import java.nio.file.WatchService;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Objects;
import java.util.stream.IntStream;

/**
 * {@link Path} for HTTP/S.
 *
 * 

The HTTP/S paths holds the following information: * *

    * *
  • * The {@link HttpFileSystem} originating the path. The protocol is retrieved, if necessary, * from the provider of the File System. *
  • * *
  • * The hostname and domain for the URL/URI in a single authority String. *
  • * *
  • * If present, the path component of the URL/URI. *
  • * *
  • * If present, the query and reference Strings. *
  • * *
* * @author Daniel Gomez-Sanchez (magicDGS) */ final class HttpPath implements Path { // file system (indicates the scheme - HTTP or HTTPS) private final HttpFileSystem fs; // path - similar to other implementation of Path //Stored in encoded form private final byte[] normalizedPath; // offsets for the separator (computed if needed) private volatile int[] offsets; // query for the URL (may be null) //Stored in encoded form private final String query; // reference for the URL (may be null) / fragment for the URI representation //Stored in encoded form private final String reference; // true if the path is absolute; false otherwise private final boolean absolute; /** * Internal constructor. * * @param fs file system. Shouldn't be {@code null}. * @param query query. May be {@code null}. * @param reference reference. May be {@code null}. * @param normalizedPath normalized path (as a byte array). Shouldn't be {@code null}. * @implNote does not perform any check for efficiency. */ private HttpPath(final HttpFileSystem fs, final String query, final String reference, final boolean absolute, final byte... normalizedPath) { this.fs = fs; // optional query and reference components (may be null) this.query = query; this.reference = reference; // set the absolute status this.absolute = absolute; // normalized path bytes (shouldn't be null) this.normalizedPath = Utils.nonNull(normalizedPath, () -> "path may not be null"); } /** * Creates a new Path in the provided {@link HttpFileSystem}, with optional query and reference. * * @param fs file system representing the base URL (scheme and authority). * @param path path (absolute) component for the URL (required). * @param query query component for the URL (optional). * @param reference reference component for the URL (optional). */ HttpPath(final HttpFileSystem fs, final String path, final String query, final String reference) { // always absolute and checking it when converting to byte[] this(Utils.nonNull(fs, () -> "null fs"), query, reference, true, getNormalizedPathBytes(Utils.nonNull(path, () -> "null path"), true)); } @Override public HttpFileSystem getFileSystem() { return fs; } @Override public boolean isAbsolute() { return absolute; } @Override public Path getRoot() { // root is a Path with only the byte array (always absolute) return new HttpPath(fs, null, null, true, new byte[0]); } /** * {@inheritDoc} * * @implNote returns always a relative path. */ @Override public Path getFileName() { initOffsets(); // following the contract, for the getNameCounts() == 0 (root) we return null if (offsets.length == 0) { return null; } // file names are always relative paths return subpath(offsets.length - 1, offsets.length, false); } /** * {@inheritDoc} * * @implNote returned path keeps the {@link #isAbsolute()} status of the current path. */ @Override public Path getParent() { initOffsets(); // returns the root if there is no if (offsets.length == 0) { return getRoot(); } // parent names are absolute/relative depending on the current status return subpath(0, offsets.length - 1, absolute); } @Override public int getNameCount() { initOffsets(); return offsets.length; } /** * {@inheritDoc} * * @implNote returns always a relative path. */ @Override public Path getName(final int index) { initOffsets(); // returns always a relative path return subpath(index, index + 1, false); } @Override public Path subpath(final int beginIndex, final int endIndex) { initOffsets(); // following the contract for invalid indexes if (beginIndex < 0 || beginIndex >= offsets.length || endIndex <= beginIndex || endIndex > offsets.length) { throw new IllegalArgumentException(String .format("Invalid indexes for path with %s name(s): [%s, %s]", getNameCount(), beginIndex, endIndex)); } // return the new path (always relative path following the contract) return subpath(beginIndex, endIndex, false); } /** * Helper method to implement different subpath routines with different absolute/relative * status. * *

The contract of this method is the same as {@link Path#subpath(int, int)}). * * @param beginIndex the index of the first element, inclusive * @param endIndex the index of the last element, exclusive * @param absolute {@code true} if the returned path is absolute; {@code false} otherwise. * * @return a new path object that is a subsequence of the nams elements in this {@code * HttpPath}. * * @implNote assumes that the caller already initialized the offsets and that the indexes are * correct. */ private HttpPath subpath(final int beginIndex, final int endIndex, final boolean absolute) { // get the coordinates to copy the path array final int begin = offsets[beginIndex]; final int end = (endIndex == offsets.length) ? normalizedPath.length : offsets[endIndex]; // construct the result final byte[] newPath = Arrays.copyOfRange(normalizedPath, begin, end); // return the new path (always relative path) // TODO: should the query/reference be propagated? return new HttpPath(this.fs, null, null, absolute, newPath); } @Override public boolean startsWith(final Path other) { // different FileSystems return false if (!this.getFileSystem().equals(Utils.nonNull(other, () -> "null path").getFileSystem())) { return false; } return startsWith(((HttpPath) other).normalizedPath); } @Override public boolean startsWith(final String other) { // throw if null Utils.nonNull(other, () -> "null other"); // normalize the path and check with the byte method return startsWith(getNormalizedPathBytes(other, false)); } /** * Private method to test startsWith only for the path component. * *

The contract for this method is the same as {@link #startsWith(Path)} (Path)}, but only * for the path component. * * @param other the other path component. * * @return {@code true} if {@link #normalizedPath} ends with {@code other}; {@code false} * otherwise. */ private boolean startsWith(final byte[] other) { // the other can still end in '/', so we should trim final int olen = getLastIndexWithoutTrailingSlash(other); // the other path component cannot have a larger than this for startWith if (olen > normalizedPath.length) { return false; } // check the bytes of the normalized path int i; for (i = 0; i <= olen; i++) { if (normalizedPath[i] != other[i]) { return false; } } // finally check the name boundary return i >= this.normalizedPath.length || this.normalizedPath[i] == HttpUtils.HTTP_PATH_SEPARATOR_CHAR; } @Override public boolean endsWith(final Path other) { // different FileSystems return false if (!this.getFileSystem().equals(Utils.nonNull(other, () -> "null path").getFileSystem())) { return false; } // compare the path component // TODO: maybe we should use isAbsolute() after https://github.com/magicDGS/jsr203-http/issues/12 return endsWith(((HttpPath) other).normalizedPath, true); } @Override public boolean endsWith(final String other) { // throw if null Utils.nonNull(other, () -> "null other"); // normalize the path and check with the byte method return endsWith(getNormalizedPathBytes(other, false), false); } /** * Private method to test endsWith only for the path component. * *

The contract for this method is the same as {@link #endsWith(Path)}, but only for the * path component. * * @param other the other path component. * @param pathVersion if {@code false}, perform an extra check for the String version. * @return {@code true} if {@link #normalizedPath} ends with {@code other}; {@code false} * otherwise. */ private boolean endsWith(final byte[] other, final boolean pathVersion) { // get the last index to check int olast = getLastIndexWithoutTrailingSlash(other); // get the last index to check int last = getLastIndexWithoutTrailingSlash(this.normalizedPath); // early termination if the length is 0 (last index = -1) if (olast == -1) { return last == -1; } // early termination if the other is larger if (last < olast) { return false; } // iterate over the bytes to check if they are the same for (; olast >= 0; olast--, last--) { if (other[olast] != this.normalizedPath[last]) { return false; } } // last == -1 when olast == -1 (the same length and equals) if (last == -1) { return true; } // switch for the path version or not path version if (pathVersion) { // at this point, the pathVersion should always return true (as it is bounded) // TODO: this might change after relative path support (https://github.com/magicDGS/jsr203-http/issues/12) return true; } else { // otherwise, it shouldn't be included (e.g., "/foo/bar" ends with "bar" but not "/bar" return this.normalizedPath[last] == HttpUtils.HTTP_PATH_SEPARATOR_CHAR; } } @Override public Path normalize() { throw new UnsupportedOperationException("Not implemented"); } /** * {@inheritDoc} * * @implNote This implementation differs from the expected behavior of the method when other is absolute. Instead * of returning other in that case it will throw {@link UnsupportedOperationException}. */ @Override public HttpPath resolve(final Path other) { if(other == null){ return this; } else if(other.isAbsolute()){ //Note: This violates the general contract of the method but shouldn't be important practically. throw new UnsupportedOperationException("Cannot resolve an absolute path against an http(s) path." + "\nThis path is: " + this + "\nThe problematic path is an instance of " + other.getClass().getName() + "\nOther path: " + other); } else { try { final URI otherUri = new URI(other.toString()); // to string is used here instead of the expected toUri because // toUri will produce normalized absolute paths in many filesystems which is // what we don't want return resolve(otherUri); } catch (URISyntaxException e) { throw new IllegalArgumentException("Can only resolve http(s) paths against fully encoded paths which are valid URIs.", e); } } } private HttpPath resolve(URI other){ return new HttpPath(fs, other.getRawQuery(), other.getRawFragment(), this.isAbsolute(), concatPaths(this.normalizedPath, getNormalizedPathBytes(other.getRawPath(), false))); } @Override public HttpPath resolve(final String other) { return resolve(fromRelativeString(other)); // Paths.get() and the filesystem equivalent can't be used here // because we don't allow them to create relative HttpPaths } @Override public Path resolveSibling(final String other){ return resolveSibling(fromRelativeString(other)); } @Override public Path relativize(final Path other) { throw new UnsupportedOperationException("Not implemented"); } private HttpPath fromRelativeString(final String other) { if (other == null) { return null; } else { try { final URI uri = new URI(other); if (uri.isAbsolute()) { throw new UnsupportedOperationException("Resolving absolute URI strings against an HTTP path is not supported." + "\nURI: " + uri); } return new HttpPath(getFileSystem(), uri.getRawFragment(), uri.getRawQuery(), false, getNormalizedPathBytes(uri.getRawPath(), false)); } catch (URISyntaxException e) { throw new IllegalArgumentException("Cannot resolve against an invalid URI.", e); } } } @Override public URI toUri() { try { return new URI(toUriString(true)); } catch (final URISyntaxException e) { throw new IOError(e); } } @Override public Path toAbsolutePath() { if (isAbsolute()) { return this; } // just fromUri a new path with a different absolute status return new HttpPath(fs, query, reference, true,normalizedPath); } @Override public Path toRealPath(final LinkOption... options) throws IOException { throw new UnsupportedOperationException("Not implemented"); } /** Unsupported method. */ @Override public File toFile() { throw new UnsupportedOperationException(this.getClass() + " cannot be converted to a File"); } @Override public WatchKey register(final WatchService watcher, final WatchEvent.Kind[] events, final WatchEvent.Modifier... modifiers) throws IOException { throw new UnsupportedOperationException("Not implemented"); } @Override public WatchKey register(final WatchService watcher, final WatchEvent.Kind... events) throws IOException { throw new UnsupportedOperationException("Not implemented"); } @Override public Iterator iterator() { return IntStream.range(0, getNameCount()).mapToObj(this::getName).iterator(); } /** * {@inheritDoc} * * @implNote comparison of every component of the HTTP/S path is case-sensitive, except the * scheme and the authority. * @implNote if the query and/or reference are present, this method order the one without any * of them first. */ @Override public int compareTo(final Path other) { if (this == other) { return 0; } final HttpPath httpOther = (HttpPath) other; // object comparison - should be from the same provider if (fs.provider() != httpOther.fs.provider()) { throw new ClassCastException(); } // first check the authority (case insensitive) int comparison = fs.getAuthority().compareToIgnoreCase(httpOther.fs.getAuthority()); if (comparison != 0) { return comparison; } // then check the path final int len1 = normalizedPath.length; final int len2 = httpOther.normalizedPath.length; final int n = Math.min(len1, len2); for (int k = 0; k < n; k++) { // this is case sensitive comparison = Byte.compare(this.normalizedPath[k], httpOther.normalizedPath[k]); if (comparison != 0) { return comparison; } } comparison = len1 - len2; if (comparison != 0) { return comparison; } // compare the query if present comparison = Comparator.nullsFirst(String::compareTo).compare(this.query, httpOther.query); if (comparison != 0) { return comparison; } // otherwise, just return the value of comparing the fragment return Comparator.nullsFirst(String::compareTo) .compare(this.reference, httpOther.reference); } /** * {@inheritDoc} * * @implNote it uses the {@link #compareTo(Path)} method and the absolute status. */ @Override public boolean equals(final Object other) { try { return ((HttpPath) other).absolute == this.absolute && compareTo((Path) other) == 0; } catch (ClassCastException e) { return false; } } /** * {@inheritDoc} * * @implNote Includes the absolute status and all the components of the path in a * case-sensitive way, except the scheme and the authority. */ @Override public int hashCode() { // TODO - maybe we should cache (https://github.com/magicDGS/jsr203-http/issues/18) int h = 31 * Boolean.hashCode(absolute) + fs.hashCode(); for (int i = 0; i < normalizedPath.length; i++) { h = 31 * h + (normalizedPath[i] & 0xff); } // this is safe for null query and reference h = 31 * h + Objects.hash(query, reference); return h; } @Override public String toString() { return toUriString(isAbsolute()); } private String toUriString(boolean includeRoot) { // TODO - maybe we should cache (https://github.com/magicDGS/jsr203-http/issues/18) // adding scheme, authority and normalized path final StringBuilder sb = new StringBuilder(); if(includeRoot) { sb.append(fs.provider().getScheme()) // scheme .append("://") .append(fs.getAuthority()) // authority .append(new String(normalizedPath, HttpUtils.HTTP_PATH_CHARSET)); } else if( normalizedPath.length != 0){ if(normalizedPath[0] == HttpUtils.HTTP_PATH_SEPARATOR_CHAR) { sb.append(new String(normalizedPath, 1, normalizedPath.length - 1, HttpUtils.HTTP_PATH_CHARSET)); } else { sb.append(new String(normalizedPath, HttpUtils.HTTP_PATH_CHARSET)); } } if (query != null) { sb.append('?').append(query); } if (reference != null) { sb.append('#').append(reference); } return sb.toString(); } /** * Creates the array of offsets if not already created. * * @implNote it assumes that redundant separators are already removed. */ private void initOffsets() { if (offsets == null) { // get the length without the trailing slash final int length = getLastIndexWithoutTrailingSlash(normalizedPath); // count names int count = 0; // index position (outside loop to re-use in the next loop) int index = 0; for (; index < length; index++) { final byte c = normalizedPath[index]; if (c == HttpUtils.HTTP_PATH_SEPARATOR_CHAR) { count++; // assumes that redundant separators are already removed index++; } } // populate offsets final int[] result = new int[count]; count = 0; for (index = 0; index < length; index++) { final byte c = normalizedPath[index]; if (c == HttpUtils.HTTP_PATH_SEPARATOR_CHAR) { // assumes that redundant separators are already removed result[count++] = index++; } } // update in a thread-safe manner synchronized (this) { if (offsets == null) { offsets = result; } } } } /** * Gets the path as a normalized (without multiple slashes) array of bytes. * * @param path path to convert into byte[] * @param checkRelative if {@code true}, check if the path is absolute. * * @return array of bytes, without multiple slashes together. */ private static byte[] getNormalizedPathBytes(final String path, final boolean checkRelative) { if (checkRelative && !path.isEmpty() && !path.startsWith(HttpUtils.HTTP_PATH_SEPARATOR_STRING)) { throw new InvalidPathException(path, "Relative HTTP/S path are not supported"); } if (HttpUtils.HTTP_PATH_SEPARATOR_STRING.equals(path) || path.isEmpty()) { return new byte[0]; } final int len = path.length(); char prevChar = 0; for (int i = 0; i < len; i++) { char c = path.charAt(i); if (isDoubleSeparator(prevChar, c)) { return getNormalizedPathBytes(path, len, i - 1); } prevChar = checkNotNull(path, c); } if (prevChar == HttpUtils.HTTP_PATH_SEPARATOR_CHAR) { return getNormalizedPathBytes(path, len, len - 1); } return path.getBytes(HttpUtils.HTTP_PATH_CHARSET); } private static byte[] getNormalizedPathBytes(final String path, final int len, final int offset) { // get first the last offset int lastOffset = len; while (lastOffset > 0 && path.charAt(lastOffset - 1) == HttpUtils.HTTP_PATH_SEPARATOR_CHAR) { lastOffset--; } if (lastOffset == 0) { // early termination return new byte[] {HttpUtils.HTTP_PATH_SEPARATOR_CHAR}; } // byte output stream try (final ByteArrayOutputStream os = new ByteArrayOutputStream(len)) { if (offset > 0) { os.write(path.substring(0, offset).getBytes(HttpUtils.HTTP_PATH_CHARSET)); } char prevChar = 0; for (int i = offset; i < len; i++) { char c = path.charAt(i); if (isDoubleSeparator(prevChar, c)) { continue; } prevChar = checkNotNull(path, c); os.write(c); } return os.toByteArray(); } catch (final IOException e) { throw new Utils.ShouldNotHappenException(e); } } private static boolean isDoubleSeparator(final char prevChar, final char c) { return c == HttpUtils.HTTP_PATH_SEPARATOR_CHAR && prevChar == HttpUtils.HTTP_PATH_SEPARATOR_CHAR; } private static char checkNotNull(final String path, char c) { if (c == '\u0000') { throw new InvalidPathException(path, "Null character not allowed in path"); } return c; } /** * Gets the last index to consider in the path bytes. * *

If the lst index is a trailing slash {@link HttpUtils#HTTP_PATH_SEPARATOR_CHAR}, it * should not be considered for some operations. This method takes into account that problem. * * @param path bytes representing the path. * * @return last index of path to consider. */ private static int getLastIndexWithoutTrailingSlash(final byte[] path) { int len = path.length - 1; if (len > 0 && path[len] == HttpUtils.HTTP_PATH_SEPARATOR_CHAR) { len--; } return len; } private static byte[] concatPaths(byte[] array1, byte[] array2) { int array1ModifiedLength = getLastIndexWithoutTrailingSlash(array1) + 1; byte[] result = Arrays.copyOf(array1, array1ModifiedLength + 1 + array2.length ); result[array1ModifiedLength] = HttpUtils.HTTP_PATH_SEPARATOR_CHAR; System.arraycopy(array2, 0, result, array1ModifiedLength + 1, array2.length); return result; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy