All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.metamodel.util.HdfsResource Maven / Gradle / Ivy

There is a newer version: 5.3.3
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.metamodel.util;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.net.URI;
import java.util.Objects;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.metamodel.MetaModelException;

import com.google.common.base.Strings;

/**
 * A {@link Resource} implementation that connects to Apache Hadoop's HDFS
 * distributed file system.
 */
public class HdfsResource extends AbstractResource implements Serializable {

    private static final long serialVersionUID = 1L;

    public static final String SYSTEM_PROPERTY_HADOOP_CONF_DIR_ENABLED = "metamodel.hadoop.use_hadoop_conf_dir";

    public static final String SCHEME_HDFS = "hdfs";
    public static final String SCHEME_SWIFT = "swift";
    public static final String SCHEME_EMRFS = "emrfs";
    public static final String SCHEME_MAPRFS = "maprfs";
    public static final String SCHEME_S3 = "s3";
    public static final String SCHEME_FTP = "ftp";

    private final String _scheme;
    private final String _hadoopConfDir;
    private final String _hostname;
    private final int _port;
    private final String _filepath;
    private transient Path _path;

    /**
     * Creates a {@link HdfsResource}
     *
     * @param url
     *            a URL of the form: scheme://hostname:port/path/to/file
     */
    public HdfsResource(String url) {
        this(url, null);
    }

    /**
     * Creates a {@link HdfsResource}
     *
     * @param url
     *            a URL of the form: scheme://hostname:port/path/to/file
     * @param hadoopConfDir
     *            the path of a directory containing the Hadoop and HDFS
     *            configuration file(s).
     */
    public HdfsResource(String url, String hadoopConfDir) {
        if (url == null) {
            throw new IllegalArgumentException("Url cannot be null");
        }

        final URI uri = URI.create(url);

        _scheme = uri.getScheme();
        _hostname = uri.getHost();
        _port = uri.getPort();
        _filepath = uri.getPath();
        _hadoopConfDir = hadoopConfDir;
    }

    /**
     * Creates a {@link HdfsResource} using the "hdfs" scheme
     *
     * @param hostname
     *            the HDFS (namenode) hostname
     * @param port
     *            the HDFS (namenode) port number
     * @param filepath
     *            the path on HDFS to the file, starting with slash ('/')
     */
    public HdfsResource(String hostname, int port, String filepath) {
        this(SCHEME_HDFS, hostname, port, filepath, null);
    }

    /**
     * Creates a {@link HdfsResource}
     *
     * @param scheme
     *            the scheme to use (consider using {@link #SCHEME_HDFS} or any
     *            of the other "SCHEME_" constants).
     * @param hostname
     *            the HDFS (namenode) hostname
     * @param port
     *            the HDFS (namenode) port number
     * @param filepath
     *            the path on HDFS to the file, starting with slash ('/')
     * @param hadoopConfDir
     *            the path of a directory containing the Hadoop and HDFS
     *            configuration file(s).
     */
    public HdfsResource(String scheme, String hostname, int port, String filepath, String hadoopConfDir) {
        _scheme = scheme;
        _hostname = hostname;
        _port = port;
        _filepath = filepath;
        _hadoopConfDir = hadoopConfDir;
    }

    public String getScheme() {
        if (_scheme == null) {
            // should only happen for deserialized and old objects before
            // METAMODEL-220 introduced dynamic schemes
            return SCHEME_HDFS;
        }
        return _scheme;
    }

    public String getFilepath() {
        return _filepath;
    }

    public String getHostname() {
        return _hostname;
    }

    public int getPort() {
        return _port;
    }

    public String getHadoopConfDir() {
        return _hadoopConfDir;
    }

    @Override
    public String getName() {
        final int lastSlash = _filepath.lastIndexOf('/');
        if (lastSlash != -1) {
            return _filepath.substring(lastSlash + 1);
        }
        return _filepath;
    }

    @Override
    public String getQualifiedPath() {
        final StringBuilder sb = new StringBuilder();
        sb.append(getScheme());
        sb.append("://");
        if (_hostname != null) {
            sb.append(_hostname);
        }
        if (_port > 0) {
            sb.append(':');
            sb.append(_port);
        }
        sb.append(_filepath);
        return sb.toString();
    }

    @Override
    public boolean isReadOnly() {
        // We assume it is not read-only
        return false;
    }

    @Override
    public boolean isExists() {
        final FileSystem fs = getHadoopFileSystem();
        try {
            return fs.exists(getHadoopPath());
        } catch (Exception e) {
            throw wrapException(e);
        } finally {
            FileHelper.safeClose(fs);
        }
    }

    @Override
    public long getSize() {
        final FileSystem fs = getHadoopFileSystem();
        try {
            if (fs.isFile(getHadoopPath())) {
                return fs.getFileStatus(getHadoopPath()).getLen();
            } else {
                return fs.getContentSummary(getHadoopPath()).getLength();
            }
        } catch (Exception e) {
            throw wrapException(e);
        } finally {
            FileHelper.safeClose(fs);
        }
    }

    @Override
    public long getLastModified() {
        final FileSystem fs = getHadoopFileSystem();
        try {
            return fs.getFileStatus(getHadoopPath()).getModificationTime();
        } catch (Exception e) {
            throw wrapException(e);
        } finally {
            FileHelper.safeClose(fs);
        }
    }

    @Override
    public OutputStream write() throws ResourceException {
        final FileSystem fs = getHadoopFileSystem();
        try {
            final FSDataOutputStream out = fs.create(getHadoopPath(), true);
            return new HdfsFileOutputStream(out, fs);
        } catch (IOException e) {
            // we can close 'fs' in case of an exception
            FileHelper.safeClose(fs);
            throw wrapException(e);
        }
    }

    @Override
    public OutputStream append() throws ResourceException {
        final FileSystem fs = getHadoopFileSystem();
        try {
            final FSDataOutputStream out = fs.append(getHadoopPath());
            return new HdfsFileOutputStream(out, fs);
        } catch (IOException e) {
            // we can close 'fs' in case of an exception
            FileHelper.safeClose(fs);
            throw wrapException(e);
        }
    }

    @Override
    public InputStream read() throws ResourceException {
        final FileSystem fs = getHadoopFileSystem();
        final InputStream in;
        try {
            final Path hadoopPath = getHadoopPath();
            // return a wrapper InputStream which manages the 'fs' closeable
            if (fs.isFile(hadoopPath)) {
                in = fs.open(hadoopPath);
                return new HdfsFileInputStream(in, fs);
            } else {
                return new HdfsDirectoryInputStream(hadoopPath, fs);
            }
        } catch (Exception e) {
            // we can close 'fs' in case of an exception
            FileHelper.safeClose(fs);
            throw wrapException(e);
        }
    }

    private RuntimeException wrapException(Exception e) {
        if (e instanceof RuntimeException) {
            return (RuntimeException) e;
        }
        return new MetaModelException(e);
    }

    public Configuration getHadoopConfiguration() {
        final Configuration conf = new Configuration();
        if (_hostname != null && _port > 0) {
            conf.set("fs.defaultFS", getScheme() + "://" + _hostname + ":" + _port);
        }

        final File hadoopConfigurationDirectory = getHadoopConfigurationDirectoryToUse();
        if (hadoopConfigurationDirectory != null) {
            addResourceIfExists(conf, hadoopConfigurationDirectory, "core-site.xml");
            addResourceIfExists(conf, hadoopConfigurationDirectory, "hdfs-site.xml");
        }

        return conf;
    }

    private void addResourceIfExists(Configuration conf, File hadoopConfigurationDirectory, String filename) {
        final File file = new File(hadoopConfigurationDirectory, filename);
        if (file.exists()) {
            final InputStream inputStream = FileHelper.getInputStream(file);
            conf.addResource(inputStream, filename);
        }
    }

    private File getHadoopConfigurationDirectoryToUse() {
        File candidate = getDirectoryIfExists(null, _hadoopConfDir);
        if ("true".equals(System.getProperty(SYSTEM_PROPERTY_HADOOP_CONF_DIR_ENABLED))) {
            candidate = getDirectoryIfExists(candidate, System.getProperty("YARN_CONF_DIR"));
            candidate = getDirectoryIfExists(candidate, System.getProperty("HADOOP_CONF_DIR"));
            candidate = getDirectoryIfExists(candidate, System.getenv("YARN_CONF_DIR"));
            candidate = getDirectoryIfExists(candidate, System.getenv("HADOOP_CONF_DIR"));
        }
        return candidate;
    }

    /**
     * Gets a candidate directory based on a file path, if it exists, and if it
     * another candidate hasn't already been resolved.
     * 
     * @param existingCandidate
     *            an existing candidate directory. If this is non-null, it will
     *            be returned immediately.
     * @param path
     *            the path of a directory
     * @return a candidate directory, or null if none was resolved.
     */
    private File getDirectoryIfExists(File existingCandidate, String path) {
        if (existingCandidate != null) {
            return existingCandidate;
        }
        if (!Strings.isNullOrEmpty(path)) {
            final File directory = new File(path);
            if (directory.exists() && directory.isDirectory()) {
                return directory;
            }
        }
        return null;
    }

    public FileSystem getHadoopFileSystem() {
        try {
            return FileSystem.newInstance(getHadoopConfiguration());
        } catch (IOException e) {
            throw new MetaModelException("Could not connect to HDFS: " + e.getMessage(), e);
        }
    }

    public Path getHadoopPath() {
        if (_path == null) {
            _path = new Path(_filepath);
        }
        return _path;
    }

    @Override
    public int hashCode() {
        return Objects.hash(getScheme(), _filepath, _hostname, _port, _hadoopConfDir);
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj instanceof HdfsResource) {
            final HdfsResource other = (HdfsResource) obj;
            return Objects.equals(getScheme(), other.getScheme()) && Objects.equals(_filepath, other._filepath)
                    && Objects.equals(_hostname, other._hostname) && Objects.equals(_port, other._port)
                    && Objects.equals(_hadoopConfDir, other._hadoopConfDir);
        }
        return false;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy