All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.hive.orc.OrcOutputFormat Maven / Gradle / Ivy

There is a newer version: 0.18.9
Show newest version
//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.hive.orc;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Properties;

import com.facebook.hive.orc.compression.CompressionKind;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter;
import org.apache.hadoop.hive.serde2.ReaderWriterProfiler;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Progressable;

import com.facebook.hive.orc.OrcSerde.OrcSerdeRow;

/**
 * A Hive OutputFormat for ORC files.
 */
public class OrcOutputFormat extends FileOutputFormat
                        implements HiveOutputFormat {

  private static class OrcRecordWriter
      implements RecordWriter,
                 StatsProvidingRecordWriter {
    private Writer writer = null;
    private final FileSystem fs;
    private final Path path;
    private final Configuration conf;
    private final long stripeSize;
    private final int compressionSize;
    private final CompressionKind compress;
    private final int rowIndexStride;
    private final SerDeStats stats;

    OrcRecordWriter(FileSystem fs, Path path, Configuration conf,
                    long stripeSize, String compress,
                    int compressionSize, int rowIndexStride) {
      this.fs = fs;
      this.path = path;
      this.conf = conf;
      this.stripeSize = stripeSize;
      this.compress = CompressionKind.valueOf(compress);
      this.compressionSize = compressionSize;
      this.rowIndexStride = rowIndexStride;
      this.stats = new SerDeStats();
    }

    @Override
    public void write(NullWritable nullWritable,
                      OrcSerdeRow row) throws IOException {
      if (writer == null) {
        writer = OrcFile.createWriter(fs, path, this.conf, row.getInspector(),
            stripeSize, compress, compressionSize, rowIndexStride);
      }
      writer.addRow(row.getRow());
    }

    @Override
    public void write(Writable row) throws IOException {
      OrcSerdeRow serdeRow = (OrcSerdeRow) row;
      if (writer == null) {
        writer = OrcFile.createWriter(fs, path, this.conf,
            serdeRow.getInspector(), stripeSize, compress, compressionSize,
            rowIndexStride);
      }
      writer.addRow(serdeRow.getRow());
    }

    @Override
    public void close(Reporter reporter) throws IOException {
      close(true);
    }

    @Override
    public void close(boolean b) throws IOException {
      // if we haven't written any rows, we need to create a file with a
      // generic schema.
      if (writer == null) {
        // a row with no columns
        ObjectInspector inspector = ObjectInspectorFactory.
            getStandardStructObjectInspector(new ArrayList(),
                new ArrayList());
        writer = OrcFile.createWriter(fs, path, this.conf, inspector,
            stripeSize, compress, compressionSize, rowIndexStride);
      }
      writer.close();
    }

    @Override
    public SerDeStats getStats() {
      stats.setRawDataSize(writer.getRowRawDataSize());
      return stats;
    }
  }

  @Override
  public RecordWriter getRecordWriter(FileSystem fileSystem,
       JobConf conf, String name, Progressable reporter) throws IOException {
    ReaderWriterProfiler.setProfilerOptions(conf);

    // To be compatible with older file formats like Sequence and RC
    // Only works if mapred.work.output.dir is set in the conf
    Path workOutputPath = FileOutputFormat.getWorkOutputPath(conf);
    Path outputPath = workOutputPath == null ? new Path(name) : new Path(workOutputPath, name);

    if (fileSystem == null && workOutputPath != null) {
      fileSystem = workOutputPath.getFileSystem(conf);
    }

    return new OrcRecordWriter(fileSystem, outputPath, conf,
      OrcConf.ConfVars.HIVE_ORC_STRIPE_SIZE.defaultLongVal,
      OrcConf.ConfVars.HIVE_ORC_COMPRESSION.defaultVal,
      OrcConf.ConfVars.HIVE_ORC_COMPRESSION_BLOCK_SIZE.defaultIntVal,
      OrcConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE.defaultIntVal);
  }

  @Override
  public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf conf, Path path,
      Class valueClass, boolean isCompressed, Properties tableProperties,
      Progressable reporter) throws IOException {
    ReaderWriterProfiler.setProfilerOptions(conf);
    String stripeSizeStr = tableProperties.getProperty(OrcFile.STRIPE_SIZE);
    long stripeSize;
    if (stripeSizeStr != null) {
      stripeSize = Long.valueOf(stripeSizeStr);
    } else {
      stripeSize = OrcConf.getLongVar(conf, OrcConf.ConfVars.HIVE_ORC_STRIPE_SIZE);
    }

    String compression = tableProperties.getProperty(OrcFile.COMPRESSION);
    if (compression == null) {
      compression = OrcConf.getVar(conf, OrcConf.ConfVars.HIVE_ORC_COMPRESSION);
    }

    String compressionSizeStr = tableProperties.getProperty(OrcFile.COMPRESSION_BLOCK_SIZE);
    int compressionSize;
    if (compressionSizeStr != null) {
      compressionSize = Integer.valueOf(compressionSizeStr);
    } else {
      compressionSize = OrcConf.getIntVar(conf,
          OrcConf.ConfVars.HIVE_ORC_COMPRESSION_BLOCK_SIZE);
    }

    String rowIndexStrideStr = tableProperties.getProperty(OrcFile.ROW_INDEX_STRIDE);
    int rowIndexStride;
    if (rowIndexStrideStr != null) {
      rowIndexStride = Integer.valueOf(rowIndexStrideStr);
    } else {
      rowIndexStride = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE);
    }

    String enableIndexesStr = tableProperties.getProperty(OrcFile.ENABLE_INDEXES);
    boolean enableIndexes;
    if (enableIndexesStr != null) {
      enableIndexes = Boolean.valueOf(enableIndexesStr);
    } else {
      enableIndexes = OrcConf.getBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_CREATE_INDEX);
    }

    if (!enableIndexes) {
      rowIndexStride = 0;
    }

    return new OrcRecordWriter(path.getFileSystem(conf), path, conf,
      stripeSize, compression, compressionSize, rowIndexStride);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy