All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.openlineage.client.utils.DatasetIdentifierUtils Maven / Gradle / Ivy

There is a newer version: 1.26.0
Show newest version
/*
/* Copyright 2018-2024 contributors to the OpenLineage project
/* SPDX-License-Identifier: Apache-2.0
*/

package io.openlineage.client.utils;

import java.io.File;
import java.net.URI;
import java.util.Optional;
import java.util.regex.Pattern;

public class DatasetIdentifierUtils {

  private static final String DEFAULT_SCHEME = "file";

  /** The directory separator, a slash, as a character. */
  public static final char SEPARATOR_CHAR = '/';

  /** The directory separator, a slash. */
  public static final String SEPARATOR = "/";

  /** Whether the current host is a Windows machine. */
  public static final boolean WINDOWS = System.getProperty("os.name").startsWith("Windows");

  /** Pre-org.apache.hadoop.shaded.com.iled regular expressions to detect path formats. */
  private static final Pattern HAS_DRIVE_LETTER_SPECIFIER = Pattern.compile("^/?[a-zA-Z]:");

  public static DatasetIdentifier fromURI(URI uri) {
    return fromURI(uri, DEFAULT_SCHEME);
  }

  public static DatasetIdentifier fromURI(URI uri, String defaultScheme) {
    if (isAbsoluteAndSchemeAuthorityNull(uri)) {
      return new DatasetIdentifier(uri.getPath(), defaultScheme);
    }

    String name =
        Optional.of(uri.getPath())
            .map(DatasetIdentifierUtils::removeLastSlash)
            .map(DatasetIdentifierUtils::removeFirstSlashIfSingleSlashInString)
            .get();

    String namespace =
        Optional.ofNullable(uri.getAuthority())
            .map(a -> String.format("%s://%s", uri.getScheme(), a))
            .orElseGet(() -> (uri.getScheme() != null) ? uri.getScheme() : defaultScheme);

    return new DatasetIdentifier(name, namespace);
  }

  private static String removeFirstSlashIfSingleSlashInString(String name) {
    if (name.chars().filter(x -> x == File.separatorChar).count() == 1 && name.startsWith("/")) {
      return name.substring(1);
    }
    return name;
  }

  private static String removeLastSlash(String name) {
    if (!name.isEmpty() && name.charAt(name.length() - 1) == File.separatorChar) {
      return name.substring(0, name.length() - 1);
    }
    return name;
  }

  /**
   * Copied implementation of `isAbsoluteAndSchemeAuthorityNull` method in Path class within hadoop
   * common package. We don't want to add 4MB dependency, however we need to have a method that
   * checks if a path is absolute in that way.
   *
   * @see
   *     original
   *     method
   */
  private static boolean isAbsoluteAndSchemeAuthorityNull(URI uri) {
    if (uri.getScheme() != null || uri.getAuthority() != null) {
      return false;
    }

    boolean hasWindowsDrive = (WINDOWS && HAS_DRIVE_LETTER_SPECIFIER.matcher(uri.getPath()).find());

    int startPositionWithoutWindowsDrive = 0;
    if (hasWindowsDrive) {
      startPositionWithoutWindowsDrive = (uri.getPath().charAt(0) == SEPARATOR_CHAR ? 3 : 2);
    }

    return uri.getPath().startsWith(SEPARATOR, startPositionWithoutWindowsDrive);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy