All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.fs.gs.utils.ConfigUtils Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.fs.gs.utils;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.CoreOptions;
import org.apache.flink.runtime.util.HadoopConfigLoader;

import com.google.auth.oauth2.GoogleCredentials;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Collections;
import java.util.Optional;

/** Utilities class for configuration of Hadoop and Google Storage. */
public class ConfigUtils {

    private static final Logger LOGGER = LoggerFactory.getLogger(ConfigUtils.class);

    private static final String HADOOP_CONFIG_PREFIX = "fs.gs.";

    private static final String[] FLINK_CONFIG_PREFIXES = {"gs.", HADOOP_CONFIG_PREFIX};

    private static final String[][] MIRRORED_CONFIG_KEYS = {};

    private static final String FLINK_SHADING_PREFIX = "";

    private static final String HADOOP_OPTION_ENABLE_SERVICE_ACCOUNT =
            "google.cloud.auth.service.account.enable";

    private static final String HADOOP_OPTION_SERVICE_ACCOUNT_JSON_KEYFILE =
            "google.cloud.auth.service.account.json.keyfile";

    /**
     * Loads the Hadoop configuration, by loading from a Hadoop conf dir (if one exists) and then
     * overlaying properties derived from the Flink config.
     *
     * @param flinkConfig The Flink config
     * @param configContext The config context.
     * @return The Hadoop config.
     */
    public static org.apache.hadoop.conf.Configuration getHadoopConfiguration(
            Configuration flinkConfig, ConfigContext configContext) {

        // create a starting hadoop configuration
        org.apache.hadoop.conf.Configuration hadoopConfig =
                new org.apache.hadoop.conf.Configuration();

        // look for a hadoop configuration directory and load configuration from it if found
        Optional hadoopConfigDir =
                Optional.ofNullable(flinkConfig.get(CoreOptions.FLINK_HADOOP_CONF_DIR));
        if (!hadoopConfigDir.isPresent()) {
            hadoopConfigDir = configContext.getenv("HADOOP_CONF_DIR");
        }
        hadoopConfigDir.ifPresent(
                configDir -> {
                    LOGGER.info("Loading Hadoop config resources from {}", configDir);
                    hadoopConfig.addResource(configContext.loadHadoopConfigFromDir(configDir));
                });

        // now, load hadoop config from flink and add to base hadoop config
        HadoopConfigLoader hadoopConfigLoader =
                new HadoopConfigLoader(
                        FLINK_CONFIG_PREFIXES,
                        MIRRORED_CONFIG_KEYS,
                        HADOOP_CONFIG_PREFIX,
                        Collections.emptySet(),
                        Collections.emptySet(),
                        FLINK_SHADING_PREFIX);
        hadoopConfigLoader.setFlinkConfig(flinkConfig);
        org.apache.hadoop.conf.Configuration flinkHadoopConfig =
                hadoopConfigLoader.getOrLoadHadoopConfig();
        hadoopConfig.addResource(flinkHadoopConfig);

        // reload the config resources and return it
        hadoopConfig.reloadConfiguration();
        return hadoopConfig;
    }

    /**
     * Creates an (optional) GoogleCredentials instance for the given Hadoop config and environment.
     *
     * @param hadoopConfig The Hadoop config.
     * @param configContext The config context.
     * @return The optional GoogleCredentials instance.
     */
    public static Optional getStorageCredentials(
            org.apache.hadoop.conf.Configuration hadoopConfig, ConfigContext configContext) {

        // follow the same rules as for the Hadoop connector, i.e.
        // 1) only use service credentials at all if Hadoop
        // "google.cloud.auth.service.account.enable" is true (default: true)
        // 2) use GOOGLE_APPLICATION_CREDENTIALS as location of credentials, if supplied
        // 3) use Hadoop "google.cloud.auth.service.account.json.keyfile" as location of
        // credentials, if supplied
        // 4) use no credentials

        // store any credentials we are to use, here
        Optional credentialsPath = Optional.empty();

        // only look for credentials if service account support is enabled
        boolean enableServiceAccount =
                hadoopConfig.getBoolean(HADOOP_OPTION_ENABLE_SERVICE_ACCOUNT, true);
        if (enableServiceAccount) {

            // load google application credentials, and then fall back to
            // "google.cloud.auth.service.account.json.keyfile" from Hadoop
            credentialsPath = configContext.getenv("GOOGLE_APPLICATION_CREDENTIALS");
            if (credentialsPath.isPresent()) {
                LOGGER.info(
                        "GSRecoverableWriter is using GOOGLE_APPLICATION_CREDENTIALS at {}",
                        credentialsPath.get());
            } else {
                credentialsPath =
                        Optional.ofNullable(
                                hadoopConfig.get(HADOOP_OPTION_SERVICE_ACCOUNT_JSON_KEYFILE));
                credentialsPath.ifPresent(
                        path ->
                                LOGGER.info(
                                        "GSRecoverableWriter is using credentials from Hadoop at {}",
                                        path));
            }
        }

        // if we have a credentials path, load and return the credentials; otherwise, return empty
        if (credentialsPath.isPresent()) {
            LOGGER.info(
                    "Creating GSRecoverableWriter using credentials from {}",
                    credentialsPath.get());
            GoogleCredentials credentials =
                    configContext.loadStorageCredentialsFromFile(credentialsPath.get());
            return Optional.of(credentials);
        } else {
            LOGGER.info("Creating GSRecoverableWriter using no credentials");
            return Optional.empty();
        }
    }

    /**
     * Helper to serialize a Hadoop config to a string, for logging.
     *
     * @param hadoopConfig The Hadoop config.
     * @return A string with the Hadoop properties.
     * @throws RuntimeException On underlying IO failure
     */
    public static String stringifyHadoopConfig(org.apache.hadoop.conf.Configuration hadoopConfig)
            throws RuntimeException {
        try (Writer writer = new StringWriter()) {
            org.apache.hadoop.conf.Configuration.dumpConfiguration(hadoopConfig, writer);
            return writer.toString();
        } catch (IOException ex) {
            throw new RuntimeException(ex);
        }
    }

    /**
     * Interface that provides context-specific config helper functions, factored out to support
     * unit testing. *
     */
    public interface ConfigContext {
        /**
         * Returns a named environment variable.
         *
         * @param name Name of variable
         * @return Value of variable
         */
        Optional getenv(String name);

        /**
         * Loads the Hadoop configuration from a directory.
         *
         * @param configDir The Hadoop config directory.
         * @return The Hadoop configuration.
         */
        org.apache.hadoop.conf.Configuration loadHadoopConfigFromDir(String configDir);

        /**
         * Loads the Google credentials from a file.
         *
         * @param credentialsPath The path of the credentials file.
         * @return The Google credentials.
         */
        GoogleCredentials loadStorageCredentialsFromFile(String credentialsPath);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy