All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.storage.StoragePath Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hudi.storage;

import org.apache.hudi.ApiMaturityLevel;
import org.apache.hudi.PublicAPIClass;
import org.apache.hudi.PublicAPIMethod;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.net.URI;
import java.net.URISyntaxException;

/**
 * Names a file or directory on storage.
 * Path strings use slash (`/`) as the directory separator.
 * The APIs are mainly based on {@code org.apache.hadoop.fs.Path} class.
 */
@PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING)
public class StoragePath implements Comparable, Serializable {
  public static final char SEPARATOR_CHAR = '/';
  public static final char COLON_CHAR = ':';
  public static final String SEPARATOR = "" + SEPARATOR_CHAR;
  private URI uri;
  private transient volatile StoragePath cachedParent;
  private transient volatile String cachedName;
  private transient volatile String uriString;

  public StoragePath(URI uri) {
    this.uri = uri.normalize();
  }

  public StoragePath(String path) {
    try {
      // This part of parsing is compatible with hadoop's Path
      // and required for properly handling encoded path with URI
      String scheme = null;
      String authority = null;

      int start = 0;

      // Parse URI scheme, if any
      int colon = path.indexOf(COLON_CHAR);
      int slash = path.indexOf(SEPARATOR_CHAR);
      if (colon != -1
          && ((slash == -1) || (colon < slash))) {
        scheme = path.substring(0, colon);
        start = colon + 1;
      }

      // Parse URI authority, if any
      if (path.startsWith("//", start)
          && (path.length() - start > 2)) {
        int nextSlash = path.indexOf(SEPARATOR_CHAR, start + 2);
        int authEnd = nextSlash > 0 ? nextSlash : path.length();
        authority = path.substring(start + 2, authEnd);
        start = authEnd;
      }

      // URI path is the rest of the string -- query & fragment not supported
      String uriPath = path.substring(start);

      this.uri = new URI(scheme, authority, normalize(uriPath, true), null, null).normalize();
    } catch (URISyntaxException e) {
      throw new IllegalArgumentException(e);
    }
  }

  public StoragePath(String parent, String child) {
    this(new StoragePath(parent), child);
  }

  public StoragePath(StoragePath parent, String child) {
    URI parentUri = parent.toUri();
    String normalizedChild = normalize(child, false);

    if (normalizedChild.isEmpty()) {
      this.uri = parentUri;
      return;
    }

    if (!child.contains(SEPARATOR)) {
      this.cachedParent = parent;
    }
    String parentPathWithSeparator = parentUri.getPath();
    if (!parentPathWithSeparator.endsWith(SEPARATOR)) {
      parentPathWithSeparator = parentPathWithSeparator + SEPARATOR;
    }
    try {
      URI resolvedUri = new URI(
          parentUri.getScheme(),
          parentUri.getAuthority(),
          parentPathWithSeparator,
          null,
          parentUri.getFragment())
          .resolve(new URI(null, null, normalizedChild, null, null));
      this.uri = new URI(
          parentUri.getScheme(),
          parentUri.getAuthority(),
          resolvedUri.getPath(),
          null,
          resolvedUri.getFragment()).normalize();
    } catch (URISyntaxException e) {
      throw new IllegalArgumentException(e);
    }
  }

  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public boolean isAbsolute() {
    return uri.getPath().startsWith(SEPARATOR);
  }

  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public StoragePath getParent() {
    // This value could be overwritten concurrently and that's okay, since
    // {@code StoragePath} is immutable
    if (cachedParent == null) {
      String path = uri.getPath();
      int lastSlash = path.lastIndexOf(SEPARATOR_CHAR);
      if (path.isEmpty() || path.equals(SEPARATOR)) {
        throw new IllegalStateException("Cannot get parent path of a root path");
      }
      String parentPath = lastSlash == -1
          ? "" : path.substring(0, lastSlash == 0 ? 1 : lastSlash);
      try {
        cachedParent = new StoragePath(new URI(
            uri.getScheme(), uri.getAuthority(), parentPath, null, uri.getFragment()));
      } catch (URISyntaxException e) {
        throw new IllegalArgumentException(e);
      }
    }
    return cachedParent;
  }

  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public String getName() {
    // This value could be overwritten concurrently and that's okay, since
    // {@code StoragePath} is immutable
    if (cachedName == null) {
      String path = uri.getPath();
      int slash = path.lastIndexOf(SEPARATOR);
      cachedName = path.substring(slash + 1);
    }
    return cachedName;
  }

  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public StoragePath getPathWithoutSchemeAndAuthority() {
    try {
      return new StoragePath(
          new URI(null, null, uri.getPath(), uri.getQuery(), uri.getFragment()));
    } catch (URISyntaxException e) {
      throw new IllegalArgumentException(e);
    }
  }

  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public int depth() {
    String path = uri.getPath();
    int depth = 0;
    int slash = path.length() == 1 && path.charAt(0) == SEPARATOR_CHAR ? -1 : 0;
    while (slash != -1) {
      depth++;
      slash = path.indexOf(SEPARATOR_CHAR, slash + 1);
    }
    return depth;
  }

  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public URI toUri() {
    return uri;
  }

  /**
   * Returns a qualified path object.
   *
   * @param defaultUri if this path is missing the scheme or authority
   *                   components, borrow them from this URI.
   * @return this path if it contains a scheme and authority, or
   * a new path that includes a path and authority and is fully qualified.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public StoragePath makeQualified(URI defaultUri) {
    if (!isAbsolute()) {
      throw new IllegalStateException("Only an absolute path can be made qualified");
    }
    StoragePath path = this;
    URI pathUri = path.toUri();

    String scheme = pathUri.getScheme();
    String authority = pathUri.getAuthority();
    String fragment = pathUri.getFragment();

    if (scheme != null && (authority != null || defaultUri.getAuthority() == null)) {
      return path;
    }

    if (scheme == null) {
      scheme = defaultUri.getScheme();
    }

    if (authority == null) {
      authority = defaultUri.getAuthority();
      if (authority == null) {
        authority = "";
      }
    }

    URI newUri;
    try {
      newUri = new URI(scheme, authority,
          normalize(pathUri.getPath(), true), null, fragment);
    } catch (URISyntaxException e) {
      throw new IllegalArgumentException(e);
    }
    return new StoragePath(newUri);
  }

  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public String getFileExtension() {
    String fileName = getName();
    int dotIndex = fileName.lastIndexOf('.');
    return dotIndex == -1 ? "" : fileName.substring(dotIndex);
  }

  @Override
  public String toString() {
    // This value could be overwritten concurrently and that's okay, since
    // {@code StoragePath} is immutable
    if (uriString == null) {
      // We can't use uri.toString(), which escapes everything, because we want
      // illegal characters unescaped in the string, for glob processing, etc.
      StringBuilder buffer = new StringBuilder();
      if (uri.getScheme() != null) {
        buffer.append(uri.getScheme())
            .append(":");
      }
      if (uri.getAuthority() != null) {
        buffer.append("//")
            .append(uri.getAuthority());
      }
      if (uri.getPath() != null) {
        String path = uri.getPath();
        buffer.append(path);
      }
      if (uri.getFragment() != null) {
        buffer.append("#").append(uri.getFragment());
      }
      uriString = buffer.toString();
    }
    return uriString;
  }

  @Override
  public boolean equals(Object o) {
    if (!(o instanceof StoragePath)) {
      return false;
    }
    return this.uri.equals(((StoragePath) o).toUri());
  }

  @Override
  public int hashCode() {
    return uri.hashCode();
  }

  @Override
  public int compareTo(StoragePath o) {
    return this.uri.compareTo(o.uri);
  }

  /**
   * Normalizes the path by removing the trailing slashes (`/`).
   * When {@code keepSingleSlash} is {@code true}, `/` as the path is not changed;
   * otherwise ({@code false}), `/` becomes empty String after normalization.
   *
   * @param path            {@link String} path to normalize.
   * @param keepSingleSlash whether to keep `/` as the path.
   * @return normalized path.
   */
  private static String normalize(String path, boolean keepSingleSlash) {
    int indexOfLastSlash = path.length() - 1;
    while (indexOfLastSlash >= 0) {
      if (path.charAt(indexOfLastSlash) != SEPARATOR_CHAR) {
        break;
      }
      indexOfLastSlash--;
    }
    indexOfLastSlash++;
    if (indexOfLastSlash == path.length()) {
      return path;
    }
    if (keepSingleSlash && indexOfLastSlash == 0) {
      // All slashes and we want to keep one slash
      return SEPARATOR;
    }
    return path.substring(0, indexOfLastSlash);
  }

  private void writeObject(ObjectOutputStream out) throws IOException {
    out.writeObject(uri);
  }

  private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
    uri = (URI) in.readObject();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy