All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.spark.utils.HadoopUtils Maven / Gradle / Ivy

There is a newer version: 6.0.0
Show newest version
/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.spark.utils;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.metamodel.util.FileHelper;

import com.google.common.base.Strings;

public class HadoopUtils {
    private static void addResourceIfExists(final Configuration conf, final File hadoopConfigurationDirectory,
            final String filename) {
        final File file = new File(hadoopConfigurationDirectory, filename);
        if (file.exists()) {
            final InputStream inputStream = FileHelper.getInputStream(file);
            conf.addResource(inputStream, filename);
        }
    }

    /**
     * Gets a candidate directory based on a file path, if it exists, and if it
     * another candidate hasn't already been resolved.
     *
     * @param existingCandidate
     *            an existing candidate directory. If this is non-null, it will
     *            be returned immediately.
     * @param path
     *            the path of a directory
     * @return a candidate directory, or null if none was resolved.
     */
    private static File getDirectoryIfExists(final File existingCandidate, final String path) {
        if (existingCandidate != null) {
            return existingCandidate;
        }
        if (!Strings.isNullOrEmpty(path)) {
            final File directory = new File(path);
            if (directory.exists() && directory.isDirectory()) {
                return directory;
            }
        }
        return null;
    }

    public static File getHadoopConfigurationDirectoryToUse() {
        File candidate = getDirectoryIfExists(null, System.getProperty("YARN_CONF_DIR"));
        candidate = getDirectoryIfExists(candidate, System.getProperty("HADOOP_CONF_DIR"));
        candidate = getDirectoryIfExists(candidate, System.getenv("YARN_CONF_DIR"));
        candidate = getDirectoryIfExists(candidate, System.getenv("HADOOP_CONF_DIR"));
        return candidate;
    }

    public static Configuration getHadoopConfiguration() {
        return getHadoopConfiguration(getHadoopConfigurationDirectoryToUse());
    }

    public static Configuration getHadoopConfiguration(final File hadoopConfigurationDirectory) {
        final Configuration conf = new Configuration();
        if (hadoopConfigurationDirectory == null) {
            throw new IllegalStateException("Environment variable YARN_CONF_DIR or HADOOP_CONF_DIR must be set");
        }

        addResourceIfExists(conf, hadoopConfigurationDirectory, "core-site.xml");
        addResourceIfExists(conf, hadoopConfigurationDirectory, "hdfs-site.xml");

        return conf;
    }

    public static FileSystem getFileSystem() throws IOException {
        return FileSystem.newInstance(HadoopUtils.getHadoopConfiguration(getHadoopConfigurationDirectoryToUse()));
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy