com.facebook.hive.orc.OrcOutputFormat Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-dwrf Show documentation
Show all versions of hive-dwrf Show documentation
DWRF file format for Hive
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.hive.orc;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Properties;
import com.facebook.hive.orc.compression.CompressionKind;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter;
import org.apache.hadoop.hive.serde2.ReaderWriterProfiler;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Progressable;
import com.facebook.hive.orc.OrcSerde.OrcSerdeRow;
/**
* A Hive OutputFormat for ORC files.
*/
public class OrcOutputFormat extends FileOutputFormat
implements HiveOutputFormat {
private static class OrcRecordWriter
implements RecordWriter,
StatsProvidingRecordWriter {
private Writer writer = null;
private final FileSystem fs;
private final Path path;
private final Configuration conf;
private final long stripeSize;
private final int compressionSize;
private final CompressionKind compress;
private final int rowIndexStride;
private final SerDeStats stats;
OrcRecordWriter(FileSystem fs, Path path, Configuration conf,
long stripeSize, String compress,
int compressionSize, int rowIndexStride) {
this.fs = fs;
this.path = path;
this.conf = conf;
this.stripeSize = stripeSize;
this.compress = CompressionKind.valueOf(compress);
this.compressionSize = compressionSize;
this.rowIndexStride = rowIndexStride;
this.stats = new SerDeStats();
}
@Override
public void write(NullWritable nullWritable,
OrcSerdeRow row) throws IOException {
if (writer == null) {
writer = OrcFile.createWriter(fs, path, this.conf, row.getInspector(),
stripeSize, compress, compressionSize, rowIndexStride);
}
writer.addRow(row.getRow());
}
@Override
public void write(Writable row) throws IOException {
OrcSerdeRow serdeRow = (OrcSerdeRow) row;
if (writer == null) {
writer = OrcFile.createWriter(fs, path, this.conf,
serdeRow.getInspector(), stripeSize, compress, compressionSize,
rowIndexStride);
}
writer.addRow(serdeRow.getRow());
}
@Override
public void close(Reporter reporter) throws IOException {
close(true);
}
@Override
public void close(boolean b) throws IOException {
// if we haven't written any rows, we need to create a file with a
// generic schema.
if (writer == null) {
// a row with no columns
ObjectInspector inspector = ObjectInspectorFactory.
getStandardStructObjectInspector(new ArrayList(),
new ArrayList());
writer = OrcFile.createWriter(fs, path, this.conf, inspector,
stripeSize, compress, compressionSize, rowIndexStride);
}
writer.close();
}
@Override
public SerDeStats getStats() {
stats.setRawDataSize(writer.getRowRawDataSize());
return stats;
}
}
@Override
public RecordWriter getRecordWriter(FileSystem fileSystem,
JobConf conf, String name, Progressable reporter) throws IOException {
ReaderWriterProfiler.setProfilerOptions(conf);
// To be compatible with older file formats like Sequence and RC
// Only works if mapred.work.output.dir is set in the conf
Path workOutputPath = FileOutputFormat.getWorkOutputPath(conf);
Path outputPath = workOutputPath == null ? new Path(name) : new Path(workOutputPath, name);
if (fileSystem == null && workOutputPath != null) {
fileSystem = workOutputPath.getFileSystem(conf);
}
return new OrcRecordWriter(fileSystem, outputPath, conf,
OrcConf.ConfVars.HIVE_ORC_STRIPE_SIZE.defaultLongVal,
OrcConf.ConfVars.HIVE_ORC_COMPRESSION.defaultVal,
OrcConf.ConfVars.HIVE_ORC_COMPRESSION_BLOCK_SIZE.defaultIntVal,
OrcConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE.defaultIntVal);
}
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf conf, Path path,
Class extends Writable> valueClass, boolean isCompressed, Properties tableProperties,
Progressable reporter) throws IOException {
ReaderWriterProfiler.setProfilerOptions(conf);
String stripeSizeStr = tableProperties.getProperty(OrcFile.STRIPE_SIZE);
long stripeSize;
if (stripeSizeStr != null) {
stripeSize = Long.valueOf(stripeSizeStr);
} else {
stripeSize = OrcConf.getLongVar(conf, OrcConf.ConfVars.HIVE_ORC_STRIPE_SIZE);
}
String compression = tableProperties.getProperty(OrcFile.COMPRESSION);
if (compression == null) {
compression = OrcConf.getVar(conf, OrcConf.ConfVars.HIVE_ORC_COMPRESSION);
}
String compressionSizeStr = tableProperties.getProperty(OrcFile.COMPRESSION_BLOCK_SIZE);
int compressionSize;
if (compressionSizeStr != null) {
compressionSize = Integer.valueOf(compressionSizeStr);
} else {
compressionSize = OrcConf.getIntVar(conf,
OrcConf.ConfVars.HIVE_ORC_COMPRESSION_BLOCK_SIZE);
}
String rowIndexStrideStr = tableProperties.getProperty(OrcFile.ROW_INDEX_STRIDE);
int rowIndexStride;
if (rowIndexStrideStr != null) {
rowIndexStride = Integer.valueOf(rowIndexStrideStr);
} else {
rowIndexStride = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE);
}
String enableIndexesStr = tableProperties.getProperty(OrcFile.ENABLE_INDEXES);
boolean enableIndexes;
if (enableIndexesStr != null) {
enableIndexes = Boolean.valueOf(enableIndexesStr);
} else {
enableIndexes = OrcConf.getBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_CREATE_INDEX);
}
if (!enableIndexes) {
rowIndexStride = 0;
}
return new OrcRecordWriter(path.getFileSystem(conf), path, conf,
stripeSize, compression, compressionSize, rowIndexStride);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy