org.apache.parquet.hadoop.util.HadoopOutputFile Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.hadoop.util;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.io.OutputFile;
import org.apache.parquet.io.PositionOutputStream;
public class HadoopOutputFile implements OutputFile {
// need to supply a buffer size when setting block size. this is the default
// for hadoop 1 to present. copying it avoids loading DFSConfigKeys.
private static final int DFS_BUFFER_SIZE_DEFAULT = 4096;
private static final Set BLOCK_FS_SCHEMES = new HashSet();
static {
BLOCK_FS_SCHEMES.add("hdfs");
BLOCK_FS_SCHEMES.add("webhdfs");
BLOCK_FS_SCHEMES.add("viewfs");
}
// visible for testing
public static Set getBlockFileSystems() {
return BLOCK_FS_SCHEMES;
}
private static boolean supportsBlockSize(FileSystem fs) {
return BLOCK_FS_SCHEMES.contains(fs.getUri().getScheme());
}
private final FileSystem fs;
private final Path path;
private final Configuration conf;
public static HadoopOutputFile fromPath(Path path, Configuration conf) throws IOException {
FileSystem fs = path.getFileSystem(conf);
return new HadoopOutputFile(fs, fs.makeQualified(path), conf);
}
public static HadoopOutputFile fromPathUnchecked(Path path, Configuration conf) {
try {
return fromPath(path, conf);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private HadoopOutputFile(FileSystem fs, Path path, Configuration conf) {
this.fs = fs;
this.path = path;
this.conf = conf;
}
public Configuration getConfiguration() {
return conf;
}
@Override
public PositionOutputStream create(long blockSizeHint) throws IOException {
return HadoopStreams.wrap(fs.create(
path,
false /* do not overwrite */,
DFS_BUFFER_SIZE_DEFAULT,
fs.getDefaultReplication(path),
Math.max(fs.getDefaultBlockSize(path), blockSizeHint)));
}
@Override
public PositionOutputStream createOrOverwrite(long blockSizeHint) throws IOException {
return HadoopStreams.wrap(fs.create(
path,
true /* overwrite if exists */,
DFS_BUFFER_SIZE_DEFAULT,
fs.getDefaultReplication(path),
Math.max(fs.getDefaultBlockSize(path), blockSizeHint)));
}
@Override
public boolean supportsBlockSize() {
return supportsBlockSize(fs);
}
@Override
public long defaultBlockSize() {
return fs.getDefaultBlockSize(path);
}
@Override
public String getPath() {
return toString();
}
@Override
public String toString() {
return path.toString();
}
}