All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.delta.flink.internal.table.HadoopUtils Maven / Gradle / Ivy

There is a newer version: 3.2.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.delta.flink.internal.table;

import java.io.File;

import org.apache.flink.configuration.ConfigConstants;
import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Utility class to discover and process Hadoop configuration on Flink cluster.
 *
 * 

* This class was backport from Flink's flink-hadoop-fs module, and it contains a subset of methods * comparing to the original class. We kept only needed methods. *

* The reason for backport this to connector code was that original implementation requires various * additional hadoop classes to be loaded on the classpath which required adding additional hadoop * dependencies to the project. */ public class HadoopUtils { private static final Logger LOG = LoggerFactory.getLogger(HadoopUtils.class); /** * The prefixes that Flink adds to the Hadoop config. */ private static final String[] FLINK_CONFIG_PREFIXES = {"flink.hadoop."}; /** * Creates Hadoop configuration object, for that it looks for Hadoop config in env variables and * Flink cluster configuration (deprecated). The order of loaded configuration is as such: *

    *
  • HADOOP_HOME environment
  • *
  • hdfs-default.xml pointed by deprecated flink config option `fs.hdfs.hdfsdefault` * (deprecated) *
  • *
  • HADOOP_CONF_DIR environment
  • *
  • Properties from Flink cluster config prefixed with `flink.hadoop`
  • *
* * @param flinkConfiguration Flink cluster configuration. * @return Hadoop's configuration object. */ @SuppressWarnings("deprecation") public static Configuration getHadoopConfiguration( org.apache.flink.configuration.Configuration flinkConfiguration) { // Instantiate an HdfsConfiguration to load the hdfs-site.xml and hdfs-default.xml // from the classpath Configuration result = new Configuration(); boolean foundHadoopConfiguration = false; // We need to load both core-site.xml and hdfs-site.xml to determine the default fs path and // the hdfs configuration. // The properties of a newly added resource will override the ones in previous resources, so // a configuration // file with higher priority should be added later. // Approach 1: HADOOP_HOME environment variables String[] possibleHadoopConfPaths = new String[2]; final String hadoopHome = System.getenv("HADOOP_HOME"); if (hadoopHome != null) { LOG.debug("Searching Hadoop configuration files in HADOOP_HOME: {}", hadoopHome); possibleHadoopConfPaths[0] = hadoopHome + "/conf"; possibleHadoopConfPaths[1] = hadoopHome + "/etc/hadoop"; // hadoop 2.2 } for (String possibleHadoopConfPath : possibleHadoopConfPaths) { if (possibleHadoopConfPath != null) { foundHadoopConfiguration = addHadoopConfIfFound(result, possibleHadoopConfPath); } } // Approach 2: Flink configuration (deprecated) final String hdfsDefaultPath = flinkConfiguration.getString(ConfigConstants.HDFS_DEFAULT_CONFIG, null); if (hdfsDefaultPath != null) { result.addResource(new org.apache.hadoop.fs.Path(hdfsDefaultPath)); LOG.debug( "Using hdfs-default configuration-file path from Flink config: {}", hdfsDefaultPath); foundHadoopConfiguration = true; } final String hdfsSitePath = flinkConfiguration.getString(ConfigConstants.HDFS_SITE_CONFIG, null); if (hdfsSitePath != null) { result.addResource(new org.apache.hadoop.fs.Path(hdfsSitePath)); LOG.debug( "Using hdfs-site configuration-file path from Flink config: {}", hdfsSitePath); foundHadoopConfiguration = true; } final String hadoopConfigPath = flinkConfiguration.getString(ConfigConstants.PATH_HADOOP_CONFIG, null); if (hadoopConfigPath != null) { LOG.debug("Searching Hadoop configuration files in Flink config: {}", hadoopConfigPath); foundHadoopConfiguration = addHadoopConfIfFound(result, hadoopConfigPath) || foundHadoopConfiguration; } // Approach 3: HADOOP_CONF_DIR environment variable String hadoopConfDir = System.getenv("HADOOP_CONF_DIR"); if (hadoopConfDir != null) { LOG.debug("Searching Hadoop configuration files in HADOOP_CONF_DIR: {}", hadoopConfDir); foundHadoopConfiguration = addHadoopConfIfFound(result, hadoopConfDir) || foundHadoopConfiguration; } // Approach 4: Flink configuration // add all configuration key with prefix 'flink.hadoop.' in flink conf to hadoop conf for (String key : flinkConfiguration.keySet()) { for (String prefix : FLINK_CONFIG_PREFIXES) { if (key.startsWith(prefix)) { String newKey = key.substring(prefix.length()); String value = flinkConfiguration.getString(key, null); result.set(newKey, value); LOG.debug( "Adding Flink config entry for {} as {}={} to Hadoop config", key, newKey, value); foundHadoopConfiguration = true; } } } if (!foundHadoopConfiguration) { LOG.warn( "Could not find Hadoop configuration via any of the supported methods " + "(Flink configuration, environment variables)."); } return result; } /** * Search Hadoop configuration files in the given path, and add them to the configuration if * found. */ private static boolean addHadoopConfIfFound( Configuration configuration, String possibleHadoopConfPath) { boolean foundHadoopConfiguration = false; if (new File(possibleHadoopConfPath).exists()) { if (new File(possibleHadoopConfPath + "/core-site.xml").exists()) { configuration.addResource( new org.apache.hadoop.fs.Path(possibleHadoopConfPath + "/core-site.xml")); LOG.debug( "Adding " + possibleHadoopConfPath + "/core-site.xml to hadoop configuration"); foundHadoopConfiguration = true; } if (new File(possibleHadoopConfPath + "/hdfs-site.xml").exists()) { configuration.addResource( new org.apache.hadoop.fs.Path(possibleHadoopConfPath + "/hdfs-site.xml")); LOG.debug( "Adding " + possibleHadoopConfPath + "/hdfs-site.xml to hadoop configuration"); foundHadoopConfiguration = true; } } return foundHadoopConfiguration; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy