All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sansa_stack.hadoop.util.FileSystemUtils Maven / Gradle / Ivy

package net.sansa_stack.hadoop.util;

import java.io.IOException;
import java.io.InputStream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;

public class FileSystemUtils {
    public static InputStream newInputStream(Path path, Configuration conf) throws IOException {
        // Q: Do we need to close the FileSystem? If so, the InputStream.close() could be wrapped.
        // A: Not totally sure but apparently we can/should rely on hadoop's FileSystem caching
        //    and therefore we do not need to close instances ourselves
        //    See also: https://stackoverflow.com/questions/55168902/should-hadoop-filesystem-be-closed
        FileSystem fs = FileSystem.get(conf);
        return newInputStream(path, fs, conf);
    }

    public static InputStream newInputStream(Path path, FileSystem fs, Configuration conf) throws IOException {
        CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
        return newInputStream(path, fs, compressionCodecFactory);
    }

    public static InputStream newInputStream(Path path, FileSystem fs, CompressionCodecFactory compressionCodecFactory) throws IOException {
        InputStream result = fs.open(path);
        if (compressionCodecFactory != null) {
            CompressionCodec codec = compressionCodecFactory.getCodec(path);
            if (null != codec) {
                result = codec.createInputStream(result);
            }
        }
        return result;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy