All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tarlake-spark3_2.12.1.3.0.source-code.Setup Maven / Gradle / Ivy

import javax.net.ssl.*;
import java.io.*;
import java.net.*;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Setup extends ProxySelector implements X509TrustManager {

    private static class  UserPwdAuth extends  Authenticator {
        @Override
        protected PasswordAuthentication getPasswordAuthentication() {
            return new PasswordAuthentication(username, password.toCharArray());
        }
    };

    private static class JarDependency {

        private final String url;

        private final String artefactName;

        public JarDependency(String artefactName, String url) {
            this.url = url;
            this.artefactName = artefactName;
        }

        public String getUrlName() {
            return url.substring(url.lastIndexOf("/") + 1);
        }
    }

    private static String protocol = null;
    private static String host = null;
    private static int port = 0;
    private static String username = null;
    private static String password = null;

    private static String httpsProxy = getEnv("https_proxy").orElse("");
    private static String httpProxy = getEnv("http_proxy").orElse("");
    private static String noProxy = getEnv("no_proxy").orElse("").replaceAll(",", "|");

    private static Proxy proxy = Proxy.NO_PROXY;
    private static HttpClient client = null;

    private static boolean isWindowsOs() {
        String os = System.getProperty("os.name").toLowerCase();
        return os.startsWith("windows");
    }

    private static void parseProxy(String proxy) {
        if (proxy.isEmpty()) {
            return;
        }
        final Pattern pattern = Pattern.compile("(https?|socks5?):\\/\\/([^:].+)", Pattern.CASE_INSENSITIVE);
        final Matcher m = pattern.matcher(proxy);
        if (m.matches()) {
            protocol = m.group(1).toLowerCase();
            final String hostAndPortWithMaybeCredentials = m.group(2);
            if (hostAndPortWithMaybeCredentials.contains("@")) {
                final String[] hostAndPortWithCredentials = hostAndPortWithMaybeCredentials.split("@");
                final String[] credentials = hostAndPortWithCredentials[0].split(":");
                assert(credentials.length == 2): "Invalid credentials format, expecting 'username:password'";
                username = credentials[0];
                password = credentials[1];
                final String[] hostAndPort = hostAndPortWithCredentials[1].split(":");
                host = hostAndPort[0];
                if (hostAndPort.length > 1) {
                    port = Integer.parseInt(hostAndPort[1]);
                }
            } else {
                final String[] hostAndPort = hostAndPortWithMaybeCredentials.split(":");
                host = hostAndPort[0];
                if (hostAndPort.length > 1) {
                    port = Integer.parseInt(hostAndPort[1]);
                }
            }
        } else {
            throw new IllegalArgumentException("Invalid proxy format: " + proxy);
        }
    }

    private static void setProxy() {
        if (!httpsProxy.isEmpty()) {
            parseProxy(httpsProxy);
        } else if (!httpProxy.isEmpty()) {
            parseProxy(httpProxy);
        }
        if (host != null) {
            if (port == 0) {
                if (protocol.equals("https")) {
                    port = 443;
                } else if (protocol.startsWith("socks")) {
                    port = 1080;
                } else {
                    port = 80;
                }
            }
            Proxy.Type proxyType = Proxy.Type.HTTP;
            if (protocol.startsWith("socks")) {
                proxyType = Proxy.Type.SOCKS;
            }
            proxy = new Proxy(proxyType, new InetSocketAddress(host, port));
        }
        if (!noProxy.isEmpty()) {
            System.setProperty("http.nonProxyHosts", noProxy);
        }
    }

    // ENV VARS
    public static boolean ENABLE_ALL = envIsTrue("ENABLE_ALL");
    public static boolean ENABLE_BIGQUERY = ENABLE_ALL || envIsTrue("ENABLE_BIGQUERY");
    public static boolean ENABLE_AZURE = ENABLE_ALL || envIsTrue("ENABLE_AZURE");
    public static boolean ENABLE_SNOWFLAKE = ENABLE_ALL || envIsTrue("ENABLE_SNOWFLAKE");
    public static boolean ENABLE_REDSHIFT = ENABLE_ALL || envIsTrue("ENABLE_REDSHIFT");
    public static boolean ENABLE_POSTGRESQL = ENABLE_ALL || envIsTrue("ENABLE_POSTGRESQL");
    public static boolean ENABLE_DUCKDB = ENABLE_ALL || envIsTrue("ENABLE_DUCKDB");

    private static final boolean[] ALL_ENABLERS = new boolean[] {
            ENABLE_BIGQUERY,
            ENABLE_AZURE,
            ENABLE_SNOWFLAKE,
            ENABLE_REDSHIFT,
            ENABLE_POSTGRESQL,
            ENABLE_DUCKDB
    };

    // SCALA 2.12 by default until spark redshift is available for 2.13
    private static final String SCALA_VERSION = getEnv("SCALA_VERSION").orElse("2.13");

    // STARLAKE
    private static final String SL_VERSION = getEnv("SL_VERSION").orElse("1.2.0-SNAPSHOT");

    // SPARK
    private static final String SPARK_VERSION = getEnv("SPARK_VERSION").orElse("3.5.3");
    private static final String SPARK_MAJOR_VERSION = SPARK_VERSION.split("\\.")[0];
    private static final String HADOOP_VERSION = getEnv("HADOOP_VERSION").orElse("3");


    // BIGQUERY
    private static final String SPARK_BQ_VERSION = getEnv("SPARK_BQ_VERSION").orElse("0.40.0");

    // deltalake
    private static final String DELTA_SPARK = getEnv("SPARK_DELTA").orElse("3.2.0");

    private static final String HADOOP_AZURE_VERSION = getEnv("HADOOP_AZURE_VERSION").orElse("3.3.5");
    private static final String AZURE_STORAGE_VERSION = getEnv("AZURE_STORAGE_VERSION").orElse("8.6.6");
    private static final String JETTY_VERSION = getEnv("JETTY_VERSION").orElse("9.4.51.v20230217");

    // HADOOP_LIB ON WINDOWS
    private static final String[] HADOOP_LIBS = new String[]{
            "https://raw.githubusercontent.com/cdarlint/winutils/master/hadoop-3.3.5/bin/winutils.exe",
            "https://raw.githubusercontent.com/cdarlint/winutils/master/hadoop-3.3.5/bin/hadoop.dll",
    };

    // SNOWFLAKE
    private static final String SNOWFLAKE_JDBC_VERSION = getEnv("SNOWFLAKE_JDBC_VERSION").orElse("3.18.0");
    private static final String SPARK_SNOWFLAKE_VERSION = getEnv("SPARK_SNOWFLAKE_VERSION").orElse("3.0.0");

    // POSTGRESQL
    private static final String POSTGRESQL_VERSION = getEnv("POSTGRESQL_VERSION").orElse("42.5.4");

    // DUCKDB
    private static final String DUCKDB_VERSION = getEnv("DUCKDB_VERSION").orElse("1.1.0");

    // REDSHIFT
    private static final String AWS_JAVA_SDK_VERSION = getEnv("AWS_JAVA_SDK_VERSION").orElse("1.12.595");
    private static final String HADOOP_AWS_VERSION = getEnv("HADOOP_AWS_VERSION").orElse("3.3.4");
    private static final String REDSHIFT_JDBC_VERSION = getEnv("REDSHIFT_JDBC_VERSION").orElse("2.1.0.30");
    private static  String SPARK_REDSHIFT_VERSION() {
        if (SCALA_VERSION.equals("2.13")) {
            return getEnv("SPARK_REDSHIFT_VERSION").orElse("6.3.0-spark_3.5-SNAPSHOT");
        } else {
            return getEnv("SPARK_REDSHIFT_VERSION").orElse("6.3.0-spark_3.5");
        }
    }

    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    // DUCKDB
    private static final JarDependency SPARK_JAR = new JarDependency("spark", "https://archive.apache.org/dist/spark/spark-" + SPARK_VERSION + "/spark-" + SPARK_VERSION + "-bin-hadoop" + HADOOP_VERSION + ".tgz");
    private static final JarDependency SPARK_JAR_213 = new JarDependency("spark", "https://archive.apache.org/dist/spark/spark-" + SPARK_VERSION + "/spark-" + SPARK_VERSION + "-bin-hadoop" + HADOOP_VERSION + "-scala2.13.tgz");
    private static final JarDependency SPARK_BQ_JAR = new JarDependency("spark-bigquery-with-dependencies",
            "https://repo1.maven.org/maven2/com/google/cloud/spark/spark-bigquery-with-dependencies_" + SCALA_VERSION + "/" +
                    SPARK_BQ_VERSION + "/" +
                    "spark-bigquery-with-dependencies_" + SCALA_VERSION + "-" + SPARK_BQ_VERSION + ".jar");
    private static final JarDependency DELTA_SPARK_JAR = new JarDependency("delta-spark",
            "https://repo1.maven.org/maven2/io/delta/delta-spark_" + SCALA_VERSION + "/" + DELTA_SPARK + "/delta-spark_" + SCALA_VERSION + "-" + DELTA_SPARK + ".jar");

    private static final JarDependency DELTA_STORAGE_JAR = new JarDependency("delta-storage",
            "https://repo1.maven.org/maven2/io/delta/delta-storage" + "/" + DELTA_SPARK + "/delta-storage" +"-" + DELTA_SPARK + ".jar");
    private static final JarDependency HADOOP_AZURE_JAR = new JarDependency("hadoop-azure", "https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-azure/" + HADOOP_AZURE_VERSION + "/hadoop-azure-" + HADOOP_AZURE_VERSION + ".jar");
    private static final JarDependency AZURE_STORAGE_JAR = new JarDependency("azure-storage", "https://repo1.maven.org/maven2/com/microsoft/azure/azure-storage/" + AZURE_STORAGE_VERSION + "/azure-storage-" + AZURE_STORAGE_VERSION + ".jar");
    private static final JarDependency JETTY_SERVER_JAR = new JarDependency("jetty-server", "https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-server/" + JETTY_VERSION + "/jetty-server-" + JETTY_VERSION + ".jar");
    private static final JarDependency SNOWFLAKE_JDBC_JAR = new JarDependency("snowflake-jdbc", "https://repo1.maven.org/maven2/net/snowflake/snowflake-jdbc/" + SNOWFLAKE_JDBC_VERSION + "/snowflake-jdbc-" + SNOWFLAKE_JDBC_VERSION + ".jar");
    private static final JarDependency SPARK_SNOWFLAKE_JAR = new JarDependency("spark-snowflake", "https://repo1.maven.org/maven2/net/snowflake/spark-snowflake_" + SCALA_VERSION +
            "/" + SPARK_SNOWFLAKE_VERSION + "/spark-snowflake_" + SCALA_VERSION + "-" + SPARK_SNOWFLAKE_VERSION + ".jar");
    private static final JarDependency POSTGRESQL_JAR = new JarDependency("postgresql", "https://repo1.maven.org/maven2/org/postgresql/postgresql/" + POSTGRESQL_VERSION + "/postgresql-" + POSTGRESQL_VERSION + ".jar");

    private static final JarDependency DUCKDB_JAR = new JarDependency("duckdb_jdbc", "https://repo1.maven.org/maven2/org/duckdb/duckdb_jdbc/" + DUCKDB_VERSION + "/duckdb_jdbc-" + DUCKDB_VERSION + ".jar");
    private static final JarDependency AWS_JAVA_SDK_JAR = new JarDependency("aws-java-sdk-bundle", "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/" + AWS_JAVA_SDK_VERSION + "/aws-java-sdk-bundle-" + AWS_JAVA_SDK_VERSION + ".jar");
    private static final JarDependency HADOOP_AWS_JAR = new JarDependency("hadoop-aws", "https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/" + HADOOP_AWS_VERSION + "/hadoop-aws-" + HADOOP_AWS_VERSION + ".jar");
    private static final JarDependency REDSHIFT_JDBC_JAR = new JarDependency("redshift-jdbc42", "https://repo1.maven.org/maven2/com/amazon/redshift/redshift-jdbc42/" + REDSHIFT_JDBC_VERSION + "/redshift-jdbc42-" + REDSHIFT_JDBC_VERSION + ".jar");
    private static  JarDependency SPARK_REDSHIFT_JAR() {
        if (SCALA_VERSION.equals("2.13")) {
            return new JarDependency("spark-redshift", "https://s01.oss.sonatype.org/content/repositories/snapshots/ai/starlake/spark-redshift_" + SCALA_VERSION +
                    "/" + SPARK_REDSHIFT_VERSION() + "/spark-redshift_" + SCALA_VERSION + "-" + SPARK_REDSHIFT_VERSION() + ".jar");
        }
        else {
        return new JarDependency("spark-redshift", "https://repo1.maven.org/maven2/io/github/spark-redshift-community/spark-redshift_" + SCALA_VERSION +
            "/" + SPARK_REDSHIFT_VERSION() + "/spark-redshift_" + SCALA_VERSION + "-" + SPARK_REDSHIFT_VERSION() + ".jar");
        }
    }
    private static final JarDependency STARLAKE_SNAPSHOT_JAR = new JarDependency("starlake-spark", "https://s01.oss.sonatype.org/content/repositories/snapshots/ai/starlake/starlake-spark" + SPARK_MAJOR_VERSION + "_" + SCALA_VERSION + "/" + SL_VERSION + "/starlake-spark" + SPARK_MAJOR_VERSION + "_" + SCALA_VERSION + "-" + SL_VERSION + "-assembly.jar");
    private static final JarDependency STARLAKE_RELEASE_JAR = new JarDependency("starlake-spark", "https://s01.oss.sonatype.org/content/repositories/releases/ai/starlake/starlake-spark" + SPARK_MAJOR_VERSION + "_" + SCALA_VERSION + "/" + SL_VERSION + "/starlake-spark" + SPARK_MAJOR_VERSION + "_" + SCALA_VERSION + "-" + SL_VERSION + "-assembly.jar");


    private static final JarDependency[] snowflakeDependencies = {
            SNOWFLAKE_JDBC_JAR,
            SPARK_SNOWFLAKE_JAR
    };

    private static final JarDependency[] redshiftDependencies = {
            AWS_JAVA_SDK_JAR,
            HADOOP_AWS_JAR,
            REDSHIFT_JDBC_JAR,
            SPARK_REDSHIFT_JAR()
    };

    private static final JarDependency[] azureDependencies = {
            HADOOP_AZURE_JAR,
            AZURE_STORAGE_JAR,
            JETTY_SERVER_JAR
    };

    private static final JarDependency[] postgresqlDependencies = {
            POSTGRESQL_JAR
    };

    private static final JarDependency[] duckDbDependencies = {
            DUCKDB_JAR
    };

    private static final JarDependency[] bigqueryDependencies = {
            SPARK_BQ_JAR
    };

    private static final JarDependency[] sparkDependencies = {
            DELTA_SPARK_JAR,
            DELTA_STORAGE_JAR
    };

    private static Optional getEnv(String env) {
        // consider empty env variables as not set
        return Optional.ofNullable(System.getenv(env)).filter(s -> !s.isEmpty());
    }

    private static boolean envIsTrue(String env) {
        String value = getEnv(env).orElse("false");
        return !value.equals("false") && !value.equals("0");

    }

    private static void generateUnixVersions(File targetDir) throws IOException {
        generateVersions(targetDir, "versions.sh", "#!/bin/bash\nset -e\n\n",
                (writer) -> (variableName, value) -> {
                    try {
                        writer.write(variableName + "=" + "${" + variableName + ":-" + value + "}\n");
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                });
    }

    private static void generateWindowsVersions(File targetDir) throws IOException {
        generateVersions(targetDir, "versions.cmd", "@ECHO OFF\n\n",
                (writer) -> (variableName, value) -> {
                    try {
                        writer.write(
                                "if \"%" + variableName + "%\"==\"\" (\n" +
                                        "    SET " + variableName + "=" + value + "\n" +
                                        ")\n");
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                });
    }

    // Used BiConsumer with Function because TriConsumer doesn't exist natively and avoid creating a new type
    private static void generateVersions(File targetDir, String versionsFileName, String fileHeader, Function> variableWriter) throws IOException {
        File versionFile = new File(targetDir, versionsFileName);
        deleteFile(versionFile);
        BufferedWriter writer = new BufferedWriter(new FileWriter(versionFile));
        try {
            writer.write(fileHeader);
            variableWriter.apply(writer).accept("ENABLE_BIGQUERY", String.valueOf(ENABLE_BIGQUERY));
            variableWriter.apply(writer).accept("ENABLE_AZURE", String.valueOf(ENABLE_AZURE));
            variableWriter.apply(writer).accept("ENABLE_SNOWFLAKE", String.valueOf(ENABLE_SNOWFLAKE));
            variableWriter.apply(writer).accept("ENABLE_POSTGRESQL", String.valueOf(ENABLE_POSTGRESQL));
            variableWriter.apply(writer).accept("ENABLE_REDSHIFT", String.valueOf(ENABLE_REDSHIFT));
            variableWriter.apply(writer).accept("SL_VERSION", SL_VERSION);
            variableWriter.apply(writer).accept("SCALA_VERSION", SCALA_VERSION);
            variableWriter.apply(writer).accept("SPARK_VERSION", SPARK_VERSION);
            variableWriter.apply(writer).accept("HADOOP_VERSION", HADOOP_VERSION);
            variableWriter.apply(writer).accept("DUCKDB_VERSION", DUCKDB_VERSION);

            if (ENABLE_BIGQUERY || !anyDependencyEnabled()) {
                variableWriter.apply(writer).accept("SPARK_BQ_VERSION", SPARK_BQ_VERSION);
            }
            if (ENABLE_AZURE || !anyDependencyEnabled()) {
                variableWriter.apply(writer).accept("HADOOP_AZURE_VERSION", HADOOP_AZURE_VERSION);
                variableWriter.apply(writer).accept("AZURE_STORAGE_VERSION", AZURE_STORAGE_VERSION);
                variableWriter.apply(writer).accept("JETTY_VERSION", JETTY_VERSION);
            }
            if (ENABLE_SNOWFLAKE || !anyDependencyEnabled()) {
                variableWriter.apply(writer).accept("SPARK_SNOWFLAKE_VERSION", SPARK_SNOWFLAKE_VERSION);
                variableWriter.apply(writer).accept("SNOWFLAKE_JDBC_VERSION", SNOWFLAKE_JDBC_VERSION);
            }
            if (ENABLE_POSTGRESQL || !anyDependencyEnabled()) {
                variableWriter.apply(writer).accept("POSTGRESQL_VERSION", POSTGRESQL_VERSION);
            }
            if (ENABLE_REDSHIFT || !anyDependencyEnabled()) {
                variableWriter.apply(writer).accept("AWS_JAVA_SDK_VERSION", AWS_JAVA_SDK_VERSION);
                variableWriter.apply(writer).accept("HADOOP_AWS_VERSION", HADOOP_AWS_VERSION);
                variableWriter.apply(writer).accept("REDSHIFT_JDBC_VERSION", REDSHIFT_JDBC_VERSION);
                variableWriter.apply(writer).accept("SPARK_REDSHIFT_VERSION", SPARK_REDSHIFT_VERSION());
            }
        } finally {
            writer.close();
        }
        System.out.println(versionFile.getAbsolutePath() + " created");
    }

    private static void generateVersions(File targetDir, boolean unix) throws IOException {
        if (isWindowsOs() && !unix) {
            generateWindowsVersions(targetDir);
        } else {
            generateUnixVersions(targetDir);
        }

    }

    private static boolean anyDependencyEnabled() {
        for (boolean enabled : ALL_ENABLERS) {
            if (enabled) {
                return true;
            }
        }
        return ENABLE_ALL;
    }

    @Override
    public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
    }

    @Override
    public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
    }

    @Override
    public X509Certificate[] getAcceptedIssuers() {
        return new X509Certificate[0];
    }

    @Override
    public List select(URI uri) {
        return Collections.singletonList(proxy);
    }

    @Override
    public void connectFailed(URI uri, SocketAddress sa, IOException ioe) {
        throw new RuntimeException("Failed to connect to " + uri + " using proxy " + sa);
    }

    private static final Setup instance = new Setup();

    private static final TrustManager alwaysTrustManager = instance;

    private static final ProxySelector proxySelector = instance;

    private static void setHttpClient() throws NoSuchAlgorithmException, KeyManagementException {
        setProxy();
        HttpClient.Builder clientBuilder = HttpClient.newBuilder();
        clientBuilder.proxy(proxySelector);
        if (username != null && password != null) {
            Authenticator authenticator = new UserPwdAuth();
            clientBuilder.authenticator(authenticator);
        }
        if (host != null && envIsTrue("SL_INSECURE")) {
            System.out.println("Enabling insecure mode for SSL connections using proxy " + protocol + "://" + host + ":" + port);
            // Create a trust manager that does not validate certificate chains
            TrustManager[] trustAllCerts = new TrustManager[]{alwaysTrustManager};

            // Install the all-trusting trust manager
            SSLContext sc = SSLContext.getInstance("SSL");
            sc.init(null, trustAllCerts, new java.security.SecureRandom());
            clientBuilder.sslContext(sc);
        }
        client = clientBuilder.build();
    }

    private static void updateSparkLog4j2Properties(File sparkDir) {
        File log4jFile = new File(new File(sparkDir, "conf"), "log4j2.properties");
        try {
            BufferedReader reader = new BufferedReader(new FileReader(log4jFile));
            StringBuilder sb = new StringBuilder();
            String line;
            while ((line = reader.readLine()) != null) {
                if (line.startsWith("rootLogger.level =")|| line.startsWith("rootLogger.level=")) {
                    line = "rootLogger.level = ${env:SL_LOG_LEVEL:-error}";
                }
                sb.append(line).append("\n");
            }
            reader.close();

            sb.append("logger.shutdown.name=org.apache.spark.util.ShutdownHookManager").append("\n");
            sb.append("logger.shutdown.level=OFF").append("\n");
            sb.append("logger.env.name=org.apache.spark.SparkEnv").append("\n");
            sb.append("logger.env.level=error").append("\n");

            BufferedWriter writer = new BufferedWriter(new FileWriter(log4jFile));
            writer.write(sb.toString());
            writer.close();
        } catch (IOException e) {
            System.out.println("Failed to update log4j.properties");
            e.printStackTrace();
        }
    }

    private static void askUserWhichConfigToEnable() {
        if (!anyDependencyEnabled()) {
            System.out.println("Do you want to enable all datawarehouse configurations ? [y/n]");
            try {
                BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
                String answer = reader.readLine();
                if (answer.equalsIgnoreCase("y")) {
                    ENABLE_AZURE = true;
                    ENABLE_BIGQUERY = true;
                    ENABLE_SNOWFLAKE = true;
                    ENABLE_REDSHIFT = true;
                    ENABLE_POSTGRESQL = true;
                    ENABLE_DUCKDB = true;
                } else {
                    System.out.println("Please enable the configurations you want to use by setting the corresponding environment variables below");
                    System.out.println("ENABLE_BIGQUERY, ENABLE_DATABRICKS, ENABLE_AZURE, ENABLE_SNOWFLAKE, ENABLE_REDSHIFT, ENABLE_POSTGRESQL, ENABLE_ANY_JDBC");
                    System.exit(1);
                }
            } catch (IOException e) {
                System.out.println("Failed to read user input");
                e.printStackTrace();
            }
        }
    }
    public static void main(String[] args) throws IOException {
        try {
            if (args.length == 0) {
                System.out.println("Please specify the target directory");
                System.exit(1);
            }

            askUserWhichConfigToEnable();

            final File targetDir = new File(args[0]);
            if (!targetDir.exists()) {
                targetDir.mkdirs();
                System.out.println("Created target directory " + targetDir.getAbsolutePath());
            }

            setHttpClient();

            if (!anyDependencyEnabled()) {
                ENABLE_AZURE = true;
                ENABLE_BIGQUERY = true;
                ENABLE_SNOWFLAKE = true;
                ENABLE_REDSHIFT = true;
                ENABLE_POSTGRESQL = true;
                ENABLE_DUCKDB = true;
            }
            final File binDir = new File(targetDir, "bin");

            if (isWindowsOs()) {
                final File hadoopDir = new File(binDir, "hadoop");
                final File hadoopBinDir = new File(hadoopDir, "bin");
                if (!hadoopBinDir.exists()) {
                    hadoopBinDir.mkdirs();
                }
                for (String lib : HADOOP_LIBS) {
                    final File libFile = new File(hadoopBinDir, lib.substring(lib.lastIndexOf("/") + 1));
                    downloadAndDisplayProgress(lib, libFile.getAbsolutePath());
                }

            } else {
                System.out.println("Unix OS detected");
            }

            File slDir = new File(binDir, "sl");
            if (SL_VERSION.endsWith("SNAPSHOT")) {
                deleteFile(new File(slDir, STARLAKE_SNAPSHOT_JAR.getUrlName()));
                downloadAndDisplayProgress(new JarDependency[]{STARLAKE_SNAPSHOT_JAR}, slDir, false);
            } else {
                deleteFile(new File(slDir, STARLAKE_RELEASE_JAR.getUrlName()));
                downloadAndDisplayProgress(new JarDependency[]{STARLAKE_RELEASE_JAR}, slDir, false);
            }

            File sparkDir = new File(binDir, "spark");
            if (!sparkDir.exists()) {
                downloadSpark(binDir);
            }

            File depsDir = new File(binDir, "deps");

            downloadAndDisplayProgress(sparkDependencies, depsDir, true);
            updateSparkLog4j2Properties(sparkDir);
            downloadAndDisplayProgress(duckDbDependencies, depsDir, true);

            if (ENABLE_BIGQUERY) {
                downloadAndDisplayProgress(bigqueryDependencies, depsDir, true);
            } else {
                deleteDependencies(bigqueryDependencies, depsDir);
            }
            if (ENABLE_AZURE) {
                downloadAndDisplayProgress(azureDependencies, depsDir, true);
            } else {
                deleteDependencies(azureDependencies, depsDir);
            }
            if (ENABLE_SNOWFLAKE) {
                downloadAndDisplayProgress(snowflakeDependencies, depsDir, true);
            } else {
                deleteDependencies(snowflakeDependencies, depsDir);
            }
            if (ENABLE_REDSHIFT) {
                downloadAndDisplayProgress(redshiftDependencies, depsDir, true);
            } else {
                deleteDependencies(redshiftDependencies, depsDir);
            }
            if (ENABLE_POSTGRESQL) {
                downloadAndDisplayProgress(postgresqlDependencies, depsDir, true);
            } else {
                deleteDependencies(postgresqlDependencies, depsDir);
            }

            boolean unix = args.length > 1 && args[1].equalsIgnoreCase("unix");
            generateVersions(targetDir, unix);
        } catch (Exception e) {
            System.out.println("Failed to download dependency" + e.getMessage());
            e.printStackTrace();
            System.exit(1);
        }
    }

    public static void downloadSpark(File binDir) throws IOException, InterruptedException {
        JarDependency sparkJar = SPARK_JAR;
        if (!SCALA_VERSION.equals("2.12")) {
            sparkJar = SPARK_JAR_213;
        }
        downloadAndDisplayProgress(new JarDependency[]{sparkJar}, binDir, false);
        String tgzName = sparkJar.getUrlName();
        final File sparkFile = new File(binDir, tgzName);
        ProcessBuilder builder = new ProcessBuilder("tar", "-xzf", sparkFile.getAbsolutePath(), "-C", binDir.getAbsolutePath()).inheritIO();
        Process process = builder.start();
        try {
            process.waitFor();
        } catch (InterruptedException e) {
            System.out.println("Failed to extract spark tarball");
            e.printStackTrace();
        }
        sparkFile.delete();
        File sparkDir = new File(binDir, tgzName.substring(0, tgzName.lastIndexOf(".")));
        sparkDir.renameTo(new File(binDir, "spark"));
        sparkDir = new File(binDir, "spark");
        File log4j2File = new File(sparkDir, "conf/log4j2.properties.template");
        log4j2File.renameTo(new File(sparkDir, "conf/log4j2.properties"));
    }

    private static void downloadAndDisplayProgress(JarDependency[] dependencies, File targetDir, boolean replaceJar) throws IOException, InterruptedException {
        if (!targetDir.exists()) {
            targetDir.mkdirs();
        }
        if (replaceJar) {
            deleteDependencies(dependencies, targetDir);
        }
        for (JarDependency dependency : dependencies) {
            final File targetFile = new File(targetDir, dependency.getUrlName());
            downloadAndDisplayProgress(dependency.url, targetFile.getAbsolutePath());
        }
    }

    private static void deleteDependencies(JarDependency[] dependencies, File targetDir) {
        if (targetDir.exists()) {
            for (JarDependency dependency : dependencies) {
                File[] files = targetDir.listFiles(f -> f.getName().startsWith(dependency.artefactName));
                if (files != null) {
                    for (File file : files) {
                        deleteFile(file);
                    }
                }
            }
        }
    }

    private static void deleteFile(File file) {
        if (file.exists()) {
            if (file.delete()) {
                System.out.println(file.getAbsolutePath() + " deleted");
            }
        }
    }

    private static void downloadAndDisplayProgress(String urlStr, String file) throws IOException, InterruptedException {
        final int CHUNK_SIZE = 1024;
        int filePartIndex = urlStr.lastIndexOf("/") + 1;
        String name = urlStr.substring(filePartIndex);
        String urlFolder = urlStr.substring(0, filePartIndex);
        System.out.println("Downloading to " + file + " from " + urlFolder + " ...");
        HttpRequest request = HttpRequest.newBuilder()
                .uri(URI.create(urlStr))
                .build();
        HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofInputStream());
        long lengthOfFile = response.headers().firstValueAsLong("Content-Length").orElse(0L);
        InputStream input = new BufferedInputStream(response.body());
        OutputStream output = new FileOutputStream(file);
        byte data[] = new byte[CHUNK_SIZE];
        long total = 0;
        int count;
        int loop = 0;
        int sbLen = 0;
        long lastTime = System.currentTimeMillis();
        while ((count = input.read(data)) != -1) {
            total += count;
            output.write(data, 0, count);
            loop++;
            if (loop % 1000 == 0) {
                StringBuilder sb = new StringBuilder(" " + (total / 1024 / 1024) + "/" + (lengthOfFile / 1024 / 1024) + " MB");
                if (lengthOfFile > 0) {
                    sb.append(" (");
                    sb.append(total * 100 / lengthOfFile);
                    sb.append("%)");
                }
                long currentTime = System.currentTimeMillis();
                long timeDiff = currentTime - lastTime;
                double bytesPerMilliSec = (CHUNK_SIZE * 1000.0 / timeDiff);
                double bytesPerSec = bytesPerMilliSec * 1000;
                double mbPerSec = bytesPerSec / 1024 / 1024;
                sb.append(" ");
                sb.append(String.format("[%.2f MB/sec]", mbPerSec));
                lastTime = currentTime;
                sbLen = sb.length();
                for (int cnt = 0; cnt < sbLen; cnt++) {
                    System.out.print("\b");
                }
                System.out.print(sb);
            }
        }
        for (int cnt = 0; cnt < sbLen; cnt++) {
            System.out.print("\b");
        }
        System.out.print(name + " downloaded");
        System.out.println();
        output.flush();
        output.close();
        input.close();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy