com.facebook.hive.orc.WriterImpl Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-dwrf Show documentation
DWRF file format for Hive
There is a newer version: 0.18.9
//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.facebook.hive.orc;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeMap;

import com.facebook.hive.orc.statistics.ColumnStatisticsImpl;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.io.RawDatasizeConst;
import org.apache.hadoop.hive.serde2.ReaderWriterProfiler;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;

import com.facebook.hive.orc.OrcConf.ConfVars;
import com.facebook.hive.orc.OrcProto.Stream.Kind;
import com.google.protobuf.ByteString;
import com.google.protobuf.CodedOutputStream;

/**
 * An ORC file writer. The file is divided into stripes, which is the natural
 * unit of work when reading. Each stripe is buffered in memory until the
 * memory reaches the stripe size and then it is written out broken down by
 * columns. Each column is written by a TreeWriter that is specific to that
 * type of column. TreeWriters may have children TreeWriters that handle the
 * sub-types. Each of the TreeWriters writes the column's data as a set of
 * streams.
 *
 * This class is synchronized so that multi-threaded access is ok. In
 * particular, because the MemoryManager is shared between writers, this class
 * assumes that checkMemory may be called from a separate thread.
 */
public class WriterImpl implements Writer, MemoryManager.Callback {

  private static final Log LOG = LogFactory.getLog(WriterImpl.class);

  private static final int HDFS_BUFFER_SIZE = 256 * 1024;
  private static final int MIN_ROW_INDEX_STRIDE = 1000;

  public static final int SHORT_BYTE_SIZE = 2;
  public static final int INT_BYTE_SIZE = 4;
  public static final int LONG_BYTE_SIZE = 8;

  // Specifies how many index entries are created for a present stream
  public static final int UNCOMPRESSED_PRESENT_STREAM_INDEX_ENTRIES = 3;
  public static final int COMPRESSED_PRESENT_STREAM_INDEX_ENTRIES = 4;

  private final FileSystem fs;
  private final Path path;
  private final long stripeSize;
  private final int rowIndexStride;
  private final CompressionKind compress;
  private final CompressionCodec codec;
  private final int bufferSize;
  // the streams that make up the current stripe
  private final Map streams =
    new TreeMap();

  private FSDataOutputStream rawWriter = null;
  // the compressed metadata information outStream
  private OutStream writer = null;
  // a protobuf outStream around streamFactory
  private CodedOutputStream protobufWriter = null;
  private long headerLength;
  private int columnCount;
  private long rowCount = 0;
  private long rowsInStripe = 0;
  private int rowsInIndex = 0;
  private long rawDataSize = 0;
  private final List stripes =
    new ArrayList();
  private final Map userMetadata =
    new TreeMap();
  private final StreamFactory streamFactory = new StreamFactory();
  private final TreeWriter treeWriter;
  private final OrcProto.RowIndex.Builder rowIndex =
      OrcProto.RowIndex.newBuilder();
  private final boolean buildIndex;
  private final MemoryManager memoryManager;
  private final boolean useVInts;
  private final int dfsBytesPerChecksum;
  private final long initialSize;
  private final long maxDictSize;

  private final Configuration conf;

  WriterImpl(FileSystem fs,
             Path path,
             Configuration conf,
             ObjectInspector inspector,
             long stripeSize,
             CompressionKind compress,
             int bufferSize,
             int rowIndexStride,
             MemoryManager memoryManager) throws IOException {
    this.fs = fs;
    this.path = path;
    this.conf = conf;
    this.stripeSize = stripeSize;
    this.compress = compress;
    this.bufferSize = bufferSize;
    this.rowIndexStride = rowIndexStride;
    this.memoryManager = memoryManager;
    buildIndex = rowIndexStride > 0;
    codec = createCodec(compress, conf);
    useVInts = OrcConf.getBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_USE_VINTS);
    treeWriter = createTreeWriter(inspector, streamFactory, false, conf, useVInts,
        memoryManager.isLowMemoryMode());
    dfsBytesPerChecksum = conf.getInt("io.bytes.per.checksum", 512);
    if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
      throw new IllegalArgumentException("Row stride must be at least " +
          MIN_ROW_INDEX_STRIDE);
    }
    maxDictSize = OrcConf.getLongVar(conf, OrcConf.ConfVars.HIVE_ORC_MAX_DICTIONARY_SIZE);
    // ensure that we are able to handle callbacks before we register ourselves
    initialSize = estimateStripeSize().getTotalMemory();
    memoryManager.addWriter(path, stripeSize, this, initialSize);
  }

  static CompressionCodec createCodec(CompressionKind kind) {
    // To be used for cases where we don't care about configuring the codec,
    // e.g. reads
    return createCodec(kind, null);
  }

  static CompressionCodec createCodec(CompressionKind kind, Configuration conf) {
    switch (kind) {
      case NONE:
        return null;
      case ZLIB:
        return new ZlibCodec(conf);
      case SNAPPY:
        return new SnappyCodec();
      case LZO:
        try {
          Class lzo =
              (Class)
                  Class.forName("com.facebook.hive.orc.LzoCodec");
          return lzo.newInstance();
        } catch (ClassNotFoundException e) {
          throw new IllegalArgumentException("LZO is not available.", e);
        } catch (InstantiationException e) {
          throw new IllegalArgumentException("Problem initializing LZO", e);
        } catch (IllegalAccessException e) {
          throw new IllegalArgumentException("Insufficient access to LZO", e);
        }
      default:
        throw new IllegalArgumentException("Unknown compression codec: " +
            kind);
    }
  }

  @Override
  public synchronized void enterLowMemoryMode() throws IOException {
    // Don't use dictionaries
    treeWriter.abandonDictionaries();
    // If the Zlib compression level is less than 6, raise it to 6 to compensate for the fact
    // we aren't using dictionaries
    if (codec != null && OrcConf.getIntVar(conf, ConfVars.HIVE_ORC_ZLIB_COMPRESSION_LEVEL) < 6) {
      OrcConf.setIntVar(conf, ConfVars.HIVE_ORC_ZLIB_COMPRESSION_LEVEL, 6);
      codec.reloadConfigurations(conf);
    }
  }

  @Override
  public synchronized boolean checkMemory(double newScale) throws IOException {
    long limit = (long) Math.round(stripeSize * newScale);
    MemoryEstimate size = estimateStripeSize();
    if (LOG.isDebugEnabled()) {
      LOG.debug("ORC writer " + path + " size = " + size.getTotalMemory() + " limit = " +
                limit);
    }
    if (size.getTotalMemory() > limit ||
        (maxDictSize > 0 && size.getDictionaryMemory() > maxDictSize)) {
      flushStripe();
      return true;
    }
    return false;
  }

  /**
   * This class is used to hold the contents of streams as they are buffered.
   * The TreeWriters write to the outStream and the codec compresses the
   * data as buffers fill up and stores them in the output list. When the
   * stripe is being written, the whole stream is written to the file.
   */
  private class BufferedStream implements OutStream.OutputReceiver {
    private final OutStream outStream;
    private final List output = new ArrayList();

    BufferedStream(String name, int bufferSize,
                   CompressionCodec codec) throws IOException {
      outStream = new OutStream(name, bufferSize, codec, this);
    }

    /**
     * Receive a buffer from the compression codec.
     * @param buffer the buffer to save
     * @throws IOException
     */
    @Override
    public void output(ByteBuffer buffer) {
      output.add(buffer);
    }

    /**
     * Get the number of bytes in buffers that are allocated to this stream.
     * @return number of bytes in buffers
     */
    public long getBufferSize() {
      long result = 0;
      for(ByteBuffer buf: output) {
        result += buf.capacity();
      }
      return outStream.getBufferSize() + result;
    }

    /**
     * Flush the stream to the codec.
     * @throws IOException
     */
    public void flush(boolean reuseBuffer) throws IOException {
      outStream.flush(reuseBuffer);
    }

    /**
     * Clear all of the buffers.
     * @throws IOException
     */
    public void clear() throws IOException {
      outStream.clear();
      output.clear();
    }

    /**
     * Check the state of suppress flag in output stream
     * @return value of suppress flag
     */
    public boolean isSuppressed() {
      return outStream.isSuppressed();
    }

    /**
     * Write the saved compressed buffers to the OutputStream.
     * @param out the stream to write to
     * @throws IOException
     */
    void spillTo(OutputStream out) throws IOException {
      for(ByteBuffer buffer: output) {
        out.write(buffer.array(), buffer.arrayOffset() + buffer.position(),
          buffer.remaining());
      }
    }

    @Override
    public String toString() {
      return outStream.toString();
    }
  }

  /**
   * An output receiver that writes the ByteBuffers to the output stream
   * as they are received.
   */
  private class DirectStream implements OutStream.OutputReceiver {
    private final FSDataOutputStream output;

    DirectStream(FSDataOutputStream output) {
      this.output = output;
    }

    @Override
    public void output(ByteBuffer buffer) throws IOException {
      output.write(buffer.array(), buffer.arrayOffset() + buffer.position(),
        buffer.remaining());
    }
  }

  static class RowIndexPositionRecorder implements PositionRecorder {
    private final OrcProto.RowIndexEntry.Builder builder;

    RowIndexPositionRecorder(OrcProto.RowIndexEntry.Builder builder) {
      this.builder = builder;
    }

    @Override
    public void addPosition(long position) {
      builder.addPositions(position);
    }
  }

  /**
   * Interface from the Writer to the TreeWriters. This limits the visibility
   * that the TreeWriters have into the Writer.
   */
  private class StreamFactory {
    /**
     * Create a stream to store part of a column.
     * @param column the column id for the stream
     * @param kind the kind of stream
     * @return The output outStream that the section needs to be written to.
     * @throws IOException
     */
    public OutStream createStream(int column,
                                               OrcProto.Stream.Kind kind
                                              ) throws IOException {
      StreamName name = new StreamName(column, kind);
      BufferedStream result = streams.get(name);
      if (result == null) {
        result = new BufferedStream(name.toString(), bufferSize, codec);
        streams.put(name, result);
      }
      return result.outStream;
    }

    /**
     * Get the next column id.
     * @return a number from 0 to the number of columns - 1
     */
    public int getNextColumnId() {
      return columnCount++;
    }

    /**
     * Get the stride rate of the row index.
     */
    public int getRowIndexStride() {
      return rowIndexStride;
    }

    /**
     * Should be building the row index.
     * @return true if we are building the index
     */
    public boolean buildIndex() {
      return buildIndex;
    }

    /**
     * Is the ORC file compressed?
     * @return are the streams compressed
     */
    public boolean isCompressed() {
      return codec != null;
    }
  }

  /**
   * The parent class of all of the writers for each column. Each column
   * is written by an instance of this class. The compound types (struct,
   * list, map, and union) have children tree writers that write the children
   * types.
   */
  private abstract static class TreeWriter {
    protected final int id;
    protected final ObjectInspector inspector;
    private final BitFieldWriter isPresent;
    private final boolean isCompressed;
    protected final ColumnStatisticsImpl indexStatistics;
    private final ColumnStatisticsImpl fileStatistics;
    protected TreeWriter[] childrenWriters;
    protected final RowIndexPositionRecorder rowIndexPosition;
    private final OrcProto.RowIndex.Builder rowIndex;
    private final OrcProto.RowIndexEntry.Builder rowIndexEntry;
    private final PositionedOutputStream rowIndexStream;
    private final Configuration conf;
    protected long stripeRawDataSize = 0;
    protected long rowRawDataSize = 0;
    protected final boolean useVInts;
    private int numStripes = 0;
    private boolean foundNulls;
    private OutStream isPresentOutStream;

    /**
     * Create a tree writer
     * @param columnId the column id of the column to write
     * @param inspector the object inspector to use
     * @param streamFactory limited access to the Writer's data.
     * @param nullable can the value be null?
     * @throws IOException
     */
    TreeWriter(int columnId, ObjectInspector inspector,
               StreamFactory streamFactory,
               boolean nullable, Configuration conf,
               boolean useVInts) throws IOException {
      this.id = columnId;
      this.inspector = inspector;
      this.conf = conf;
      this.useVInts = useVInts;
      this.isCompressed = streamFactory.isCompressed();
      if (nullable) {
        isPresentOutStream = streamFactory.createStream(id, OrcProto.Stream.Kind.PRESENT);
        isPresent = new BitFieldWriter(isPresentOutStream, 1);
      } else {
        isPresent = null;
      }
      this.foundNulls = false;
      indexStatistics = ColumnStatisticsImpl.create(inspector);
      fileStatistics = ColumnStatisticsImpl.create(inspector);
      childrenWriters = new TreeWriter[0];
      rowIndex = OrcProto.RowIndex.newBuilder();
      rowIndexEntry = OrcProto.RowIndexEntry.newBuilder();
      rowIndexPosition = new RowIndexPositionRecorder(rowIndexEntry);
      if (streamFactory.buildIndex()) {
        rowIndexStream = streamFactory.createStream(id,
            OrcProto.Stream.Kind.ROW_INDEX);
      } else {
        rowIndexStream = null;
      }
    }

    protected int getNumStripes() {
      return numStripes;
    }
    protected OrcProto.RowIndex.Builder getRowIndex() {
      return rowIndex;
    }

    protected ColumnStatisticsImpl getFileStatistics() {
      return fileStatistics;
    }

    protected OrcProto.RowIndexEntry.Builder getRowIndexEntry() {
      return rowIndexEntry;
    }

    /**
     * Add a new value to the column.
     * @param obj
     * @throws IOException
     */
    abstract void write(Object obj) throws IOException;

    void write(Object obj, long rawDataSize) throws IOException{

      if (obj != null) {
        setRawDataSize(rawDataSize);
      } else {
        // Estimate the raw size of null as 1 byte
        setRawDataSize(RawDatasizeConst.NULL_SIZE);
      }

      flushRow(obj);
    }

    /**
     * Update the row count and mark the isPresent bit
     */
    void flushRow(Object obj) throws IOException {
      if (obj != null) {
        indexStatistics.increment();
      }

      if (isPresent != null) {
        isPresent.write(obj == null ? 0 : 1);
        if(obj == null) {
          foundNulls = true;
        }
      }
    }

    private void removeIsPresentPositions() {
      for(int i=0; i < rowIndex.getEntryCount(); ++i) {
        OrcProto.RowIndexEntry.Builder entry = rowIndex.getEntryBuilder(i);
        List positions = entry.getPositionsList();
        // bit streams use 3 positions if uncompressed, 4 if compressed
        positions = positions.subList(isCompressed ? COMPRESSED_PRESENT_STREAM_INDEX_ENTRIES :
          UNCOMPRESSED_PRESENT_STREAM_INDEX_ENTRIES, positions.size());
        entry.clearPositions();
        entry.addAllPositions(positions);
      }
    }

    /**
     * Sets the row raw data size and updates the stripe raw data size
     *
     * @param rawDataSize
     */
    protected void setRawDataSize(long rawDataSize) {
      rowRawDataSize = rawDataSize;
      stripeRawDataSize += rawDataSize;
    }

    /**
     * Write the stripe out to the file.
     * @param builder the stripe footer that contains the information about the
     *                layout of the stripe. The TreeWriter is required to update
     *                the footer with its information.
     * @param requiredIndexEntries the number of index entries that are
     *                             required. this is to check to make sure the
     *                             row index is well formed.
     * @throws IOException
     */
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      numStripes++;
      if (isPresent != null) {
        isPresent.flush();

        // if no nulls are found in a stream, then suppress the stream
        if(!foundNulls) {
          isPresentOutStream.suppress();
          // since isPresent bitstream is suppressed, update the index to
          // remove the positions of the isPresent stream
          if (rowIndexStream != null) {
            removeIsPresentPositions();
          }
        }
      }

      // reset the flag for next stripe
      foundNulls = false;

      builder.addColumns(getEncoding());
      if (rowIndexStream != null) {
        if (rowIndex.getEntryCount() != requiredIndexEntries) {
          throw new IllegalArgumentException("Column has wrong number of " +
               "index entries found: " + rowIndex.getEntryCount() + " expected: " +
               requiredIndexEntries);
        }
        rowIndex.build().writeTo(rowIndexStream);
        rowIndexStream.flush();
      }
      rowIndex.clear();
      rowIndexEntry.clear();
      stripeRawDataSize = 0;
    }

    TreeWriter[] getChildrenWriters() {
      return childrenWriters;
    }

    /**
     * For all the TreeWriters that buffer rows, process
     * all the buffered rows.
     */
    void flush() throws IOException {
      for (TreeWriter writer : childrenWriters) {
        writer.flush();
      }
      return;
    }

    /**
     * Get the encoding for this column.
     * @return the information about the encoding of this column
     */
    OrcProto.ColumnEncoding getEncoding() {
      return OrcProto.ColumnEncoding.newBuilder().setKind(
          OrcProto.ColumnEncoding.Kind.DIRECT).build();
    }

    /**
     * Create a row index entry with the previous location and the current
     * index statistics. Also merges the index statistics into the file
     * statistics before they are cleared. Finally, it records the start of the
     * next index and ensures all of the children columns also create an entry.
     * @throws IOException
     */
    void createRowIndexEntry() throws IOException {
      fileStatistics.merge(indexStatistics);
      rowIndexEntry.setStatistics(indexStatistics.serialize());
      indexStatistics.reset();
      rowIndex.addEntry(rowIndexEntry);
      rowIndexEntry.clear();
      recordPosition(rowIndexPosition);
      for(TreeWriter child: childrenWriters) {
        child.createRowIndexEntry();
      }
    }

    /**
     * Record the current position in each of this column's streams.
     * @param recorder where should the locations be recorded
     * @throws IOException
     */
    void recordPosition(PositionRecorder recorder) throws IOException {
      if (isPresent != null) {
        isPresent.getPosition(recorder);
      }
    }

    /**
     * Estimate how much memory the writer is consuming excluding the streams.
     * @return the number of bytes.
     */
    void estimateMemory(MemoryEstimate memoryEstimate) {
      for (TreeWriter child: childrenWriters) {
        child.estimateMemory(memoryEstimate);
      }
    }

    public void abandonDictionaries() throws IOException {
      for (TreeWriter child: childrenWriters) {
        child.abandonDictionaries();
      }
    }

    long getStripeRawDataSize() {
      return stripeRawDataSize;
    }

    long getRowRawDataSize() {
      return rowRawDataSize;
    }
  }

  private static class BooleanTreeWriter extends TreeWriter {
    private final BitFieldWriter writer;

    BooleanTreeWriter(int columnId,
                      ObjectInspector inspector,
                      StreamFactory writer,
                      boolean nullable, Configuration conf,
                      boolean useVInts, boolean lowMemoryMode) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts);
      PositionedOutputStream out = writer.createStream(id,
          OrcProto.Stream.Kind.DATA);
      this.writer = new BitFieldWriter(out, 1);
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      super.write(obj, RawDatasizeConst.BOOLEAN_SIZE);
      if (obj != null) {
        boolean val = ((BooleanObjectInspector) inspector).get(obj);
        indexStatistics.updateBoolean(val);
        writer.write(val ? 1 : 0);
      }
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      writer.flush();
      recordPosition(rowIndexPosition);
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      writer.getPosition(recorder);
    }
  }

  private static class ByteTreeWriter extends TreeWriter {
    private final RunLengthByteWriter writer;

    ByteTreeWriter(int columnId,
                      ObjectInspector inspector,
                      StreamFactory writer,
                      boolean nullable, Configuration conf,
                      boolean useVInts, boolean lowMemoryMode) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts);
      this.writer = new RunLengthByteWriter(writer.createStream(id,
          OrcProto.Stream.Kind.DATA));
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      super.write(obj, RawDatasizeConst.BYTE_SIZE);
      if (obj != null) {
        byte val = ((ByteObjectInspector) inspector).get(obj);
        indexStatistics.updateInteger(val);
        writer.write(val);
      }
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      writer.flush();
      recordPosition(rowIndexPosition);
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      writer.getPosition(recorder);
    }
  }

  private static class IntegerTreeWriter extends TreeWriter {
    private final PositionedOutputStream output;
    private DynamicIntArray rows;
    private final PositionedOutputStream inDictionaryStream;
    private final BitFieldWriter inDictionary;
    private final List savedRowIndex =
        new ArrayList();
    private final boolean buildIndex;
    private final List rowIndexValueCount = new ArrayList();
    private final float dictionaryKeySizeThreshold;

    private IntDictionaryEncoder dictionary;
    private boolean useDictionaryEncoding = true;
    private final StreamFactory writer;
    private final int numBytes;
    private final Long[] buffer;
    private int bufferIndex = 0;
    private long bufferedBytes = 0;
    private final int recomputeStripeEncodingInterval;
    PositionedOutputStream rowOutput;
    private boolean abandonDictionaries = false;
    private final boolean sortKeys;
    private int dictionarySize;
    private final boolean useStrideDictionaries;

    IntegerTreeWriter(int columnId, ObjectInspector inspector, StreamFactory writerFactory,
        boolean nullable, Configuration conf, boolean useVInts, int numBytes,
        boolean lowMemoryMode) throws IOException {
      super(columnId, inspector, writerFactory, nullable, conf, useVInts);
      writer = writerFactory;
      sortKeys = OrcConf.getBoolVar(conf,
          OrcConf.ConfVars.HIVE_ORC_DICTIONARY_SORT_KEYS);
      useStrideDictionaries = OrcConf.getBoolVar(conf,
          OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY);


      this.numBytes = numBytes;
      recomputeStripeEncodingInterval = OrcConf.getIntVar(conf,
          OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL);

      if (!lowMemoryMode) {
        dictionary = new IntDictionaryEncoder(sortKeys, numBytes, useVInts);
        rows = new DynamicIntArray();
      } else {
        abandonDictionaries = true;
        rowOutput = writer.createStream(id,
            OrcProto.Stream.Kind.DATA);
        useDictionaryEncoding = false;
      }
      output = writer.createStream(id,
          OrcProto.Stream.Kind.DICTIONARY_DATA);
      inDictionaryStream = writer.createStream(id,
          OrcProto.Stream.Kind.IN_DICTIONARY);
      inDictionary = new BitFieldWriter(inDictionaryStream, 1);

      dictionaryKeySizeThreshold = OrcConf.getFloatVar(conf,
          OrcConf.ConfVars.HIVE_ORC_DICTIONARY_NUMERIC_KEY_SIZE_THRESHOLD);


      int bufferLength = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ROW_BUFFER_SIZE);

      buffer = new Long[bufferLength];

      recordPosition(rowIndexPosition);
      rowIndexValueCount.add(0L);
      buildIndex = writer.buildIndex();
      if (buildIndex && lowMemoryMode) {
        rowOutput.getPosition(rowIndexPosition);
      }
    }

    boolean determineEncodingStripe() {
      return (getNumStripes() % recomputeStripeEncodingInterval) == 0 && !abandonDictionaries;
    }

    @Override
    void write(Object obj) throws IOException {
      if (obj != null) {
        switch (inspector.getCategory()) {
          case PRIMITIVE:
            switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) {
              case SHORT:
                buffer[bufferIndex++] = new Long(((ShortObjectInspector) inspector).get(obj));
                setRawDataSize(RawDatasizeConst.SHORT_SIZE);
                break;
              case INT:
                buffer[bufferIndex++] = new Long(((IntObjectInspector) inspector).get(obj));
                setRawDataSize(RawDatasizeConst.INT_SIZE);
                break;
              case LONG:
                buffer[bufferIndex++] = new Long(((LongObjectInspector) inspector).get(obj));
                setRawDataSize(RawDatasizeConst.LONG_SIZE);
                break;
              default:
                throw new IllegalArgumentException("Bad Category: Dictionary Encoding not available for " +
                    ((PrimitiveObjectInspector) inspector).getPrimitiveCategory());
            }
            break;
          default:
            throw new IllegalArgumentException("Bad Category: DictionaryEncoding not available for " + inspector.getCategory());
        }
        bufferedBytes += RawDatasizeConst.LONG_SIZE;
      } else {
        buffer[bufferIndex++] = null;
        setRawDataSize(RawDatasizeConst.NULL_SIZE);
      }
      if (bufferIndex == buffer.length) {
        flush();
      }
    }

    @Override
    void flush() throws IOException {
      for (int i = 0; i < bufferIndex; i++) {
        Long val = buffer[i];
        buffer[i] = null;
        if (val != null) {
          if (useCarriedOverDirectEncoding()) {
            SerializationUtils.writeIntegerType(rowOutput,
                val, numBytes, true, useVInts);
          } else {
            rows.add(dictionary.add(val));
          }
          indexStatistics.updateInteger(val);
        }

        super.flushRow(val);
      }
      bufferIndex = 0;
      bufferedBytes = 0;
    }

    boolean getUseDictionaryEncoding() {
      return (rows.size() > 0 &&
          (float)(dictionary.size()) / (float)rows.size() <= dictionaryKeySizeThreshold);
    }

    /**
     * Returns true iff the encoding is not being determined using this stripe, and
     * the previously determined encoding was direct.
     */
    private boolean useCarriedOverDirectEncoding() {
      return !determineEncodingStripe() && !useDictionaryEncoding;
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      if (determineEncodingStripe()) {
        useDictionaryEncoding = getUseDictionaryEncoding();
      }
      if (useDictionaryEncoding) {
        rowOutput =
          new RunLengthIntegerWriter(writer.createStream(id,
          OrcProto.Stream.Kind.DATA), false, INT_BYTE_SIZE, useVInts);
      } else if (determineEncodingStripe()){
        rowOutput = writer.createStream(id,
          OrcProto.Stream.Kind.DATA);
      }

      final long[] dumpOrder;
      final int[] counts;
      if (useDictionaryEncoding) {
        // Traverse the dictionary keys writing out the bytes and lengths; and
        // creating the map from the original order to the final sorted order.
        dumpOrder = new long[dictionary.size()];
        counts = new int[dictionary.size()];
        dictionary.visit(new IntDictionaryEncoder.Visitor() {
          int currentId = 0;
          public void visit(IntDictionaryEncoder.VisitorContext context) throws IOException {
            int count = context.getCount();
            counts[context.getOriginalPosition()] = count;
            if (!useStrideDictionaries || count > 1) {
              dictionarySize++;
              context.writeBytes(output);
              dumpOrder[context.getOriginalPosition()] = currentId++;
            } else {
              dumpOrder[context.getOriginalPosition()] = dictionary.getValue(
                  context.getOriginalPosition());
            }
          }
        });
      } else {
        dumpOrder = null;
        counts = null;
      }

      if (!useCarriedOverDirectEncoding()) {
        writeData(useDictionaryEncoding, dumpOrder, counts);
      }
      // we need to build the rowindex before calling super, since it
      // writes it out.
      super.writeStripe(builder, requiredIndexEntries);
      if (useDictionaryEncoding) {
        output.unsuppress();
        inDictionary.flush();
        if (dictionarySize == dictionary.size()) {
          inDictionaryStream.suppress();
        } else {
          inDictionaryStream.unsuppress();
        }
        output.flush();
      } else {
        output.suppress();
        inDictionaryStream.suppress();
      }
      rowOutput.flush();
      savedRowIndex.clear();
      rowIndexValueCount.clear();
      recordPosition(rowIndexPosition);
      dictionarySize = 0;
      if (useCarriedOverDirectEncoding())  {
        rowOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA);
        rowOutput.getPosition(rowIndexPosition);
        dictionary = null;
        rows = null;
      } else {
        if (dictionary == null) {
          dictionary = new IntDictionaryEncoder(sortKeys, numBytes, useVInts);
        } else {
          dictionary.clear();
        }
        if (rows == null) {
          rows = new DynamicIntArray();
        } else {
          rows.clear();
        }
      }
      rowIndexValueCount.add(0L);
    }

    private void convertDictionaryToDirect() throws IOException {
      writeData(false, null, null);
    }

    private void writeData(boolean useDictionaryEncoding, long[] dumpOrder, int[] counts)
        throws IOException {
      int length = rows.size();
      int rowIndexEntry = 0;
      OrcProto.RowIndex.Builder rowIndex = getRowIndex();
      // write the values translated into the dump order.
      for(int i = 0; i <= length; ++i) {
        // now that we are writing out the row values, we can finalize the
        // row index
        if (buildIndex) {
          // If we are not using dictionary encoding and savedRowIndex is not empty, this means
          // the conversion from dictionary encoding to direct encoding, so allow rowIndexEntry to
          // exceed the number of saved row indeces, to create the index entry for the current
          //stride
          while (rowIndexEntry < rowIndexValueCount.size() &&
              i == rowIndexValueCount.get(rowIndexEntry) &&
              (rowIndexEntry < savedRowIndex.size() ||
                  (!useDictionaryEncoding && rowIndexEntry == savedRowIndex.size()))) {
            OrcProto.RowIndexEntry.Builder base = null;
            RowIndexPositionRecorder recorder;
            if (rowIndexEntry < savedRowIndex.size()) {
              base = savedRowIndex.get(rowIndexEntry).toBuilder();
              recorder = new RowIndexPositionRecorder(base);
            } else {
              recorder = rowIndexPosition;
            }
            if (useDictionaryEncoding && dumpOrder != null &&
                dictionarySize != dictionary.size()) {
              inDictionary.getPosition(recorder);
            }
            rowOutput.getPosition(recorder);
            if (rowIndexEntry < savedRowIndex.size()) {
              // If we are constructing an index entry from a saved row index, add it
              rowIndex.addEntry(base.build());
            }
            rowIndexEntry++;
          }
        }

        if (i < length) {
          if (useDictionaryEncoding && dumpOrder != null) {
            ((RunLengthIntegerWriter) rowOutput).write(dumpOrder[rows.get(i)]);
            if (!useStrideDictionaries || counts[rows.get(i)] > 1) {
              inDictionary.write(1);
            } else {
              inDictionary.write(0);
            }
          } else {
            SerializationUtils.writeIntegerType(rowOutput,
                dictionary.getValue(rows.get(i)), numBytes, true, useVInts);
          }
        }
      }
    }

    @Override
    OrcProto.ColumnEncoding getEncoding() {
      if (useDictionaryEncoding) {
        return OrcProto.ColumnEncoding.newBuilder().setKind(
            OrcProto.ColumnEncoding.Kind.DICTIONARY).
            setDictionarySize(dictionarySize).build();
      } else {
        return OrcProto.ColumnEncoding.newBuilder().setKind(
            OrcProto.ColumnEncoding.Kind.DIRECT).build();
      }
    }

    /**
     * This method doesn't call the super method, because unlike most of the
     * other TreeWriters, this one can't record the position in the streams
     * until the stripe is being flushed. Therefore it saves all of the entries
     * and augments them with the final information as the stripe is written.
     * @throws IOException
     */
    @Override
    void createRowIndexEntry() throws IOException {
      getFileStatistics().merge(indexStatistics);
      OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry();
      rowIndexEntry.setStatistics(indexStatistics.serialize());
      indexStatistics.reset();
      if (useCarriedOverDirectEncoding()) {
        getRowIndex().addEntry(rowIndexEntry);
      } else {
        savedRowIndex.add(rowIndexEntry.build());
      }
      rowIndexEntry.clear();
      recordPosition(rowIndexPosition);
      if (useCarriedOverDirectEncoding()) {
        rowOutput.getPosition(rowIndexPosition);
      } else {
        rowIndexValueCount.add(Long.valueOf(rows.size()));
      }
    }

    @Override
    void estimateMemory(MemoryEstimate memoryEstimate) {
      memoryEstimate.incrementTotalMemory((rows == null ? 0 : rows.size() * 4) +
          (dictionary == null ? 0 : dictionary.getByteSize()) + bufferedBytes +
          (rowOutput == null ? 0 : rowOutput.getBufferSize()));
      memoryEstimate.incrementDictionaryMemory(dictionary == null ? 0 :
        dictionary.getUncompressedLength());
    }

    @Override
    public void abandonDictionaries() throws IOException {
      boolean useCarriedOverDirectEncoding = useCarriedOverDirectEncoding();
      abandonDictionaries = true;
      if (!useCarriedOverDirectEncoding) {
        rowOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA);
        useDictionaryEncoding = false;
        convertDictionaryToDirect();
        if (rows.size() == 0) {
          rowOutput.getPosition(rowIndexPosition);
        }
      }
      dictionary = null;
      rows = null;
      savedRowIndex.clear();
    }
  }

  private static class FloatTreeWriter extends TreeWriter {
    private final PositionedOutputStream stream;

    FloatTreeWriter(int columnId,
                      ObjectInspector inspector,
                      StreamFactory writer,
                      boolean nullable, Configuration conf,
                      boolean useVInts, boolean lowMemoryMode) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts);
      this.stream = writer.createStream(id,
          OrcProto.Stream.Kind.DATA);
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      super.write(obj, RawDatasizeConst.FLOAT_SIZE);
      if (obj != null) {
        float val = ((FloatObjectInspector) inspector).get(obj);
        indexStatistics.updateDouble(val);
        SerializationUtils.writeFloat(stream, val);
      }
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      stream.flush();
      recordPosition(rowIndexPosition);
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      stream.getPosition(recorder);
    }
  }

  private static class DoubleTreeWriter extends TreeWriter {
    private final PositionedOutputStream stream;

    DoubleTreeWriter(int columnId,
                    ObjectInspector inspector,
                    StreamFactory writer,
                    boolean nullable, Configuration conf,
                    boolean useVInts, boolean lowMemoryMode) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts);
      this.stream = writer.createStream(id,
          OrcProto.Stream.Kind.DATA);
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      super.write(obj, RawDatasizeConst.DOUBLE_SIZE);
      if (obj != null) {
        double val = ((DoubleObjectInspector) inspector).get(obj);
        indexStatistics.updateDouble(val);
        SerializationUtils.writeDouble(stream, val);
      }
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      stream.flush();
      recordPosition(rowIndexPosition);
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      stream.getPosition(recorder);
    }
  }

  private static class StringTreeWriter extends TreeWriter {
    private final PositionedOutputStream stringOutput;
    private final RunLengthIntegerWriter lengthOutput;
    private final PositionedOutputStream inDictionaryStream;
    private final BitFieldWriter inDictionary;
    private StringDictionaryEncoder dictionary;
    private DynamicIntArray rows;
    private final RunLengthIntegerWriter directLengthOutput;
    private final RunLengthIntegerWriter strideDictionaryLengthOutput;
    private final List savedRowIndex =
        new ArrayList();
    private final boolean buildIndex;
    private final List rowIndexValueCount = new ArrayList();
    private final StreamFactory writer;
    // If the number of keys in a dictionary is greater than this fraction of the total number of
    // non-null rows, turn off dictionary encoding
    private final float dictionaryKeySizeThreshold;
    // If the number of keys in a dictionary is greater than this fraction of the total number of
    // non-null rows, don't use the estimated entropy heuristic to turn off dictionary encoding
    private final float entropyKeySizeThreshold;
    private final int entropyMinSamples;
    private final float entropyDictSampleFraction;
    private final int entropyThreshold;

    private boolean useDictionaryEncoding = true;
    private final boolean useStrideDictionaries;
    private final boolean sortKeys;

    private final Text[] buffer;
    private int bufferIndex = 0;
    private long bufferedBytes = 0;
    private final int recomputeStripeEncodingInterval;
    private PositionedOutputStream rowOutput;
    private final PositionedOutputStream strideDictionaryOutput;
    private boolean abandonDictionaries = false;
    private int dictionarySize;

    StringTreeWriter(int columnId,
                     ObjectInspector inspector,
                     StreamFactory writerFactory,
                     boolean nullable, Configuration conf,
                     boolean useVInts, boolean lowMemoryMode) throws IOException {
      super(columnId, inspector, writerFactory, nullable, conf, useVInts);
      writer = writerFactory;
      sortKeys = OrcConf.getBoolVar(conf,
          OrcConf.ConfVars.HIVE_ORC_DICTIONARY_SORT_KEYS);
      useStrideDictionaries = OrcConf.getBoolVar(conf,
          OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY);
      recomputeStripeEncodingInterval = OrcConf.getIntVar(conf,
          OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL);

      if (!lowMemoryMode) {
        dictionary = new StringDictionaryEncoder(sortKeys, useStrideDictionaries);
        rows = new DynamicIntArray();
      } else {
        abandonDictionaries = true;
        rowOutput = writer.createStream(id,
            OrcProto.Stream.Kind.DATA);
        useDictionaryEncoding = false;
      }
      stringOutput = writer.createStream(id,
          OrcProto.Stream.Kind.DICTIONARY_DATA);
      lengthOutput = new RunLengthIntegerWriter(writer.createStream(id,
          OrcProto.Stream.Kind.LENGTH), false, INT_BYTE_SIZE, useVInts);
      inDictionaryStream = writer.createStream(id, OrcProto.Stream.Kind.IN_DICTIONARY);
      inDictionary = new BitFieldWriter(inDictionaryStream, 1);
      strideDictionaryLengthOutput = new RunLengthIntegerWriter(writer.createStream(id,
          OrcProto.Stream.Kind.STRIDE_DICTIONARY_LENGTH), false, INT_BYTE_SIZE, useVInts);
      strideDictionaryOutput = writer.createStream(id,
          OrcProto.Stream.Kind.STRIDE_DICTIONARY);
      directLengthOutput = new RunLengthIntegerWriter(writer.createStream(id,
          OrcProto.Stream.Kind.LENGTH), false, INT_BYTE_SIZE, useVInts);
      dictionaryKeySizeThreshold = OrcConf.getFloatVar(conf,
          OrcConf.ConfVars.HIVE_ORC_DICTIONARY_STRING_KEY_SIZE_THRESHOLD);
      entropyKeySizeThreshold = OrcConf.getFloatVar(conf,
          OrcConf.ConfVars.HIVE_ORC_ENTROPY_KEY_STRING_SIZE_THRESHOLD);
      entropyMinSamples = OrcConf.getIntVar(conf,
          OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_MIN_SAMPLES);
      entropyDictSampleFraction = OrcConf.getFloatVar(conf,
          OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_DICT_SAMPLE_FRACTION);
      entropyThreshold = OrcConf.getIntVar(conf,
          OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD);

      int bufferLength = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ROW_BUFFER_SIZE);
      buffer = new Text[bufferLength];

      recordPosition(rowIndexPosition);
      rowIndexValueCount.add(0L);
      buildIndex = writer.buildIndex();
      if (buildIndex && lowMemoryMode) {
        rowOutput.getPosition(rowIndexPosition);
        directLengthOutput.getPosition(rowIndexPosition);
      }
    }

    boolean determineEncodingStripe() {
      return (getNumStripes() % recomputeStripeEncodingInterval) == 0 && !abandonDictionaries;
    }

    @Override
    void write(Object obj) throws IOException {
      if (obj != null) {
        Text val = ((StringObjectInspector) inspector).getPrimitiveWritableObject(obj);
        buffer[bufferIndex++] = new Text(val);
        setRawDataSize(val.getLength());
        bufferedBytes += val.getLength();
      } else {
          buffer[bufferIndex++] = null;
          setRawDataSize(RawDatasizeConst.NULL_SIZE);
      }
      if (bufferIndex == buffer.length) {
        flush();
      }
    }

    @Override
    void flush() throws IOException {
      for (int i = 0; i < bufferIndex; i++) {
        Text val = buffer[i];
        // Make sure we don't end up storing it twice
        buffer[i] = null;
        if (val != null) {
          indexStatistics.updateString(val.toString());
          if (useCarriedOverDirectEncoding()) {
            rowOutput.write(val.getBytes());
            directLengthOutput.write(val.getLength());
          } else {
            rows.add(dictionary.add(val, savedRowIndex.size()));
          }
        }
        super.flushRow(val);
      }
      bufferIndex = 0;
      bufferedBytes = 0;
    }

    private boolean isEntropyThresholdExceeded(Set chars, Text text, int index) {
      dictionary.getText(text, index);
      for (char character : text.toString().toCharArray()) {
        chars.add(character);
      }
      return chars.size() > entropyThreshold;
    }

    private int[] getSampleIndecesForEntropy() {
      int numSamples = Math.max(entropyMinSamples,
          (int)(entropyDictSampleFraction * dictionary.size()));
      int[] indeces = new int[dictionary.size()];
      int[] samples = new int[numSamples];
      Random rand = new Random();

      // The goal of this loop is to select numSamples number of distinct indeces of
      // dictionary
      //
      // The loop works as follows, start with an array of zeros
      // On each iteration pick a random number in the range of 0 to size of the dictionary
      // minus one minus the number of previous iterations, thus the effective size of the
      // array is decreased by one with each iteration (not actually but logically)
      // Look at the value of the array at that random index, if it is 0, the sample index is
      // the random index because we've never looked at this position before, if it's
      // nonzero, that value is the sample index
      // Then take the value at the end of the logical size of the array (size of the
      // dictionary minus one minus the number of iterations) if it's 0 put that index into
      // the array at the random index, otherwise put the nonzero value
      // Thus, by reducing the logical size of the array and moving the value at the end of
      // the array we are removing indexes we have previously visited and making sure we do
      // not lose any indexes
      for (int i = 0; i < numSamples; i++) {
        int index = rand.nextInt(dictionary.size() - i);
        if (indeces[index] == 0) {
          samples[i] = index;
          } else {
            samples[i] = indeces[index];
        }
        indeces[index] = indeces[dictionary.size() - i - 1] == 0 ?
            dictionary.size() - i - 1 : indeces[dictionary.size() - i - 1];
      }

      return samples;
    }

    private boolean useDictionaryEncodingEntropyHeuristic() {
      Set chars = new HashSet();
      Text text = new Text();
      if (dictionary.size() > entropyMinSamples) {

        int[] samples = getSampleIndecesForEntropy();

        for (int sampleIndex : samples) {
          if (isEntropyThresholdExceeded(chars, text, sampleIndex)) {
            return true;
          }
        }
      } else {
        for (int i = 0; i < dictionary.size(); i++) {
          if (isEntropyThresholdExceeded(chars, text, i)) {
            return true;
          }
        }
      }

      return false;
    }

    /**
     * Returns true iff the encoding is not being determined using this stripe, and
     * the previously determined encoding was direct.
     */
    private boolean useCarriedOverDirectEncoding() {
      return !determineEncodingStripe() && !useDictionaryEncoding;
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {

      if (determineEncodingStripe()) {

          // Set the flag indicating whether or not to use dictionary encoding based on whether
          // or not the fraction of distinct keys over number of non-null rows is less than the
          // configured threshold, and whether or not the number of distinct characters in a sample
          // of entries in the dictionary (the estimated entropy) exceeds the configured threshold
        if (rows.size() > 0) {
          useDictionaryEncoding = true;
          // The fraction of non-null values in this column that are repeats of values in the
          // dictionary
          float repeatedValuesFraction =
            (float)(rows.size() - dictionary.size()) / (float)rows.size();

          // If the number of repeated values is small enough, consider using the entropy heuristic
          // If the number of repeated values is high, even in the presence of low entropy,
          // dictionary encoding can provide benefits beyond just zlib
          if (repeatedValuesFraction <= entropyKeySizeThreshold) {
            useDictionaryEncoding = useDictionaryEncodingEntropyHeuristic();
          }

          // dictionaryKeySizeThreshold is the fraction of keys that are distinct beyond
          // which dictionary encoding is turned off
          // so 1 - dictionaryKeySizeThreshold is the number of repeated values below which
          // dictionary encoding should be turned off
          useDictionaryEncoding = useDictionaryEncoding && (repeatedValuesFraction > 1.0 - dictionaryKeySizeThreshold);
        }
      }

      if (useDictionaryEncoding) {
        rowOutput = new RunLengthIntegerWriter(writer.createStream(id,
            OrcProto.Stream.Kind.DATA), false, INT_BYTE_SIZE, useVInts);
      } else if (determineEncodingStripe()) {
        rowOutput = writer.createStream(id,
            OrcProto.Stream.Kind.DATA);
      }

      final int[] dumpOrder;
      final int[] counts;
      final int[] strideDictionarySizes;
      final boolean[] strideDictionaryIndexPopulated;

      if (useDictionaryEncoding) {
        dumpOrder = new int[dictionary.size()];
        counts = new int[dictionary.size()];
        strideDictionarySizes = new int[savedRowIndex.size()];
        strideDictionaryIndexPopulated = new boolean[savedRowIndex.size()];
        OrcProto.RowIndexEntry.Builder base = savedRowIndex.get(0).toBuilder();
        PositionRecorder recorder = new RowIndexPositionRecorder(base);
        strideDictionaryOutput.getPosition(recorder);
        strideDictionaryLengthOutput.getPosition(recorder);
        savedRowIndex.set(0, base.build());
        strideDictionaryIndexPopulated[0] = true;

        // Traverse the red-black tree writing out the bytes and lengths; and
        // creating the map from the original order to the final sorted order.
        dictionary.visit(new StringDictionaryEncoder.Visitor() {
          private int currentId = 0;
          private int directId = 0;
          private int previousIndex = 0;
          @Override
          public void visit(StringDictionaryEncoder.VisitorContext context
                           ) throws IOException {
            counts[context.getOriginalPosition()] = context.getCount();
            if (!useStrideDictionaries || context.getCount() > 1) {
              dictionarySize++;
              context.writeBytes(stringOutput);
              lengthOutput.write(context.getLength());
              dumpOrder[context.getOriginalPosition()] = currentId++;
            } else {
              int nextIndex = context.getIndexStride();
              if (nextIndex != previousIndex) {
                for (int i = previousIndex; i < nextIndex; i++) {
                  OrcProto.RowIndexEntry.Builder base = savedRowIndex.get(i + 1).toBuilder();
                  PositionRecorder recorder = new RowIndexPositionRecorder(base);
                  strideDictionaryOutput.getPosition(recorder);
                  strideDictionaryLengthOutput.getPosition(recorder);
                  savedRowIndex.set(i + 1, base.build());
                  strideDictionaryIndexPopulated[i + 1] = true;
                }

                previousIndex = context.getIndexStride();
                directId = 0;
              }
              context.writeBytes(strideDictionaryOutput);
              strideDictionarySizes[previousIndex]++;
              strideDictionaryLengthOutput.write(context.getLength());
              dumpOrder[context.getOriginalPosition()] = directId++;
            }
          }
        });
      } else {
        dumpOrder = null;
        counts = null;
        strideDictionarySizes = null;
        strideDictionaryIndexPopulated = null;
      }

      if (!useCarriedOverDirectEncoding()) {
        writeData(useDictionaryEncoding, dumpOrder, counts, strideDictionarySizes,
            strideDictionaryIndexPopulated);
      }
      // we need to build the rowindex before calling super, since it
      // writes it out.
      super.writeStripe(builder, requiredIndexEntries);
      rowOutput.flush();
      if (useDictionaryEncoding) {
        stringOutput.unsuppress();
        lengthOutput.unsuppress();
        inDictionary.flush();
        strideDictionaryOutput.flush();
        strideDictionaryLengthOutput.flush();
        if (dictionarySize == dictionary.size()) {
          inDictionaryStream.suppress();
          strideDictionaryOutput.suppress();
          strideDictionaryLengthOutput.suppress();
        } else {
          inDictionaryStream.unsuppress();
          strideDictionaryOutput.unsuppress();
          strideDictionaryLengthOutput.unsuppress();
        }
        directLengthOutput.suppress();
        stringOutput.flush();
        lengthOutput.flush();
      } else {
        directLengthOutput.unsuppress();
        stringOutput.suppress();
        lengthOutput.suppress();
        inDictionaryStream.suppress();
        strideDictionaryOutput.suppress();
        strideDictionaryLengthOutput.suppress();
        directLengthOutput.flush();
      }

      // reset all of the fields to be ready for the next stripe.
      savedRowIndex.clear();
      rowIndexValueCount.clear();
      recordPosition(rowIndexPosition);
      dictionarySize = 0;
      if (useCarriedOverDirectEncoding())  {
        rowOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA);
        rowOutput.getPosition(rowIndexPosition);
        directLengthOutput.getPosition(rowIndexPosition);
        dictionary = null;
        rows = null;
      } else {
        if (dictionary == null) {
          dictionary = new StringDictionaryEncoder(sortKeys, useStrideDictionaries);
        } else {
          dictionary.clear();
        }
        if (rows == null) {
          rows = new DynamicIntArray();
        } else {
          rows.clear();
        }
      }
      rowIndexValueCount.add(0L);

    }

    private void convertDictionaryToDirect() throws IOException {
      // If this is still in the first index stride, savedRowIndex is empty, so we need to
      // explicitly record the positions for the first index stride
      writeData(false, null, null, null, null);
    }

    private void writeData(boolean useDictionaryEncoding, int[] dumpOrder, int[] counts,
        int[] strideDictionarySizes, boolean[] strideDictionaryIndexPopulated) throws IOException {
      int rowIndexEntry = 0;
      OrcProto.RowIndex.Builder rowIndex = getRowIndex();
      int length = rows.size();
      Text text = new Text();
      for(int i = 0; i <= length; ++i) {
        // now that we are writing out the row values, we can finalize the
        // row index
        if (buildIndex) {
       // If we are not using dictionary encoding and savedRowIndex is not empty, this means
          // the conversion from dictionary encoding to direct encoding, so allow rowIndexEntry to
          // exceed the number of saved row indeces, to create the index entry for the current
          //stride
          while (rowIndexEntry < rowIndexValueCount.size() &&
              i == rowIndexValueCount.get(rowIndexEntry) &&
              (rowIndexEntry < savedRowIndex.size() ||
                  (!useDictionaryEncoding && rowIndexEntry == savedRowIndex.size()))) {
            OrcProto.RowIndexEntry.Builder base = null;
            RowIndexPositionRecorder recorder;
            if (rowIndexEntry < savedRowIndex.size()) {
              base = savedRowIndex.get(rowIndexEntry).toBuilder();
              recorder = new RowIndexPositionRecorder(base);
            } else {
              recorder = rowIndexPosition;
            }

            if (useStrideDictionaries && useDictionaryEncoding &&
                !strideDictionaryIndexPopulated[rowIndexEntry] &&
                dictionary.size() != dictionarySize) {
              strideDictionaryOutput.getPosition(recorder);
              strideDictionaryLengthOutput.getPosition(recorder);
            }
            if (useStrideDictionaries && strideDictionarySizes != null &&
                dictionary.size() != dictionarySize) {
              base.addPositions(strideDictionarySizes[rowIndexEntry]);
            }
            recordOutputPosition(rowOutput, recorder);
            if (rowIndexEntry < savedRowIndex.size()) {
              // If we are constructing an index entry from a saved row index, add it
              rowIndex.addEntry(base.build());
            }
            rowIndexEntry++;
          }
        }
        if (i != length) {
          if (useDictionaryEncoding) {
            rowOutput.write(dumpOrder[rows.get(i)]);
            if (!useStrideDictionaries || counts[rows.get(i)] > 1) {
              inDictionary.write(1);
            } else {
              inDictionary.write(0);
            }
          } else {
            dictionary.getText(text, rows.get(i));
            rowOutput.write(text.getBytes(), 0, text.getLength());
            directLengthOutput.write(text.getLength());
          }
        }
      }
    }

    // Calls getPosition on the row output stream if dictionary encoding is used, and the direct
    // output stream if direct encoding is used
    private void recordOutputPosition(PositionedOutputStream rowOutput,
        RowIndexPositionRecorder recorder) throws IOException {
      rowOutput.getPosition(recorder);
      if (!useDictionaryEncoding) {
        directLengthOutput.getPosition(recorder);
      } else if (dictionary.size() != dictionarySize) {
        inDictionary.getPosition(recorder);
      }
    }

    @Override
    OrcProto.ColumnEncoding getEncoding() {
      // Returns the encoding used for the last call to writeStripe
      if (useDictionaryEncoding) {
        return OrcProto.ColumnEncoding.newBuilder().setKind(
            OrcProto.ColumnEncoding.Kind.DICTIONARY).
            setDictionarySize(dictionarySize).build();
      } else {
        return OrcProto.ColumnEncoding.newBuilder().setKind(
            OrcProto.ColumnEncoding.Kind.DIRECT).build();
      }
    }

    /**
     * This method doesn't call the super method, because unlike most of the
     * other TreeWriters, this one can't record the position in the streams
     * until the stripe is being flushed. Therefore it saves all of the entries
     * and augments them with the final information as the stripe is written.
     * @throws IOException
     */
    @Override
    void createRowIndexEntry() throws IOException {
      getFileStatistics().merge(indexStatistics);
      OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry();
      rowIndexEntry.setStatistics(indexStatistics.serialize());
      indexStatistics.reset();
      if (useCarriedOverDirectEncoding()) {
        getRowIndex().addEntry(rowIndexEntry);
      } else {
        savedRowIndex.add(rowIndexEntry.build());
      }
      rowIndexEntry.clear();
      recordPosition(rowIndexPosition);
      if (useCarriedOverDirectEncoding()) {
        rowOutput.getPosition(rowIndexPosition);
        directLengthOutput.getPosition(rowIndexPosition);
      } else {
        rowIndexValueCount.add(Long.valueOf(rows.size()));
      }
    }

    @Override
    void estimateMemory(MemoryEstimate memoryEstimate) {
      memoryEstimate.incrementTotalMemory((rows == null ? 0 : rows.getSizeInBytes()) +
          (dictionary == null ? 0 : dictionary.getSizeInBytes()) + bufferedBytes +
          (rowOutput == null ? 0 : rowOutput.getBufferSize()) +
          (directLengthOutput == null ? 0 : directLengthOutput.getBufferSize()) +
          (strideDictionaryOutput == null ? 0 : strideDictionaryOutput.getBufferSize()) +
          (strideDictionaryLengthOutput == null ? 0 :
            strideDictionaryLengthOutput.getBufferSize()));
      memoryEstimate.incrementDictionaryMemory(dictionary == null ? 0 :
        dictionary.getUncompressedLength());
    }

    @Override
    public void abandonDictionaries() throws IOException {
      boolean useCarriedOverDirectEncoding = useCarriedOverDirectEncoding();
      abandonDictionaries = true;
      if (!useCarriedOverDirectEncoding) {
        rowOutput = writer.createStream(id,
            OrcProto.Stream.Kind.DATA);
        useDictionaryEncoding = false;
        convertDictionaryToDirect();
        if (rows.size() == 0) {
          rowOutput.getPosition(rowIndexPosition);
          directLengthOutput.getPosition(rowIndexPosition);
        }
      }
      dictionary = null;
      rows = null;
      savedRowIndex.clear();
    }
  }

  private static class BinaryTreeWriter extends TreeWriter {
    private final PositionedOutputStream stream;
    private final RunLengthIntegerWriter length;

    BinaryTreeWriter(int columnId,
                     ObjectInspector inspector,
                     StreamFactory writer,
                     boolean nullable, Configuration conf,
                     boolean useVInts, boolean lowMemoryMode) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts);
      this.stream = writer.createStream(id,
          OrcProto.Stream.Kind.DATA);
      this.length = new RunLengthIntegerWriter(writer.createStream(id,
          OrcProto.Stream.Kind.LENGTH), false, INT_BYTE_SIZE, useVInts);
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      long rawDataSize = 0;
      if (obj != null) {
        BytesWritable val =
            ((BinaryObjectInspector) inspector).getPrimitiveWritableObject(obj);
        stream.write(val.getBytes(), 0, val.getLength());
        length.write(val.getLength());

        // Raw data size is the length of the BytesWritable, i.e. the number of bytes
        rawDataSize = val.getLength();
      }
      super.write(obj, rawDataSize);
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      stream.flush();
      length.flush();
      recordPosition(rowIndexPosition);
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      stream.getPosition(recorder);
      length.getPosition(recorder);
    }
  }

  public static final int MILLIS_PER_SECOND = 1000;
  public static final long BASE_TIMESTAMP =
      Timestamp.valueOf("2015-01-01 00:00:00").getTime() / MILLIS_PER_SECOND;

  private static class TimestampTreeWriter extends TreeWriter {
    private final RunLengthIntegerWriter seconds;
    private final RunLengthIntegerWriter nanos;

    TimestampTreeWriter(int columnId,
                     ObjectInspector inspector,
                     StreamFactory writer,
                     boolean nullable, Configuration conf,
                     boolean useVInts, boolean lowMemoryMode) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts);
      this.seconds = new RunLengthIntegerWriter(writer.createStream(id,
          OrcProto.Stream.Kind.DATA), true, LONG_BYTE_SIZE, useVInts);
      this.nanos = new RunLengthIntegerWriter(writer.createStream(id,
          OrcProto.Stream.Kind.NANO_DATA), false, LONG_BYTE_SIZE, useVInts);
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      // Raw data size is:
      //   the number of bytes needed to store the milliseconds since the epoch
      //   (8 since it's a long)
      //   +
      //   the number of bytes needed to store the nanos field (4 since it's an int)
      super.write(obj, RawDatasizeConst.TIMESTAMP_SIZE);
      if (obj != null) {
        Timestamp val =
            ((TimestampObjectInspector) inspector).
                getPrimitiveJavaObject(obj);
        seconds.write((val.getTime() / MILLIS_PER_SECOND) - BASE_TIMESTAMP);
        nanos.write(formatNanos(val.getNanos()));
      }
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      seconds.flush();
      nanos.flush();
      recordPosition(rowIndexPosition);
    }

    private static long formatNanos(int nanos) {
      if (nanos == 0) {
        return 0;
      } else if (nanos % 100 != 0) {
        return ((long) nanos) << 3;
      } else {
        nanos /= 100;
        int trailingZeros = 1;
        while (nanos % 10 == 0 && trailingZeros < 7) {
          nanos /= 10;
          trailingZeros += 1;
        }
        return ((long) nanos) << 3 | trailingZeros;
      }
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      seconds.getPosition(recorder);
      nanos.getPosition(recorder);
    }
  }

  private static class StructTreeWriter extends TreeWriter {
    private final List fields;
    StructTreeWriter(int columnId,
                     ObjectInspector inspector,
                     StreamFactory writer,
                     boolean nullable, Configuration conf,
                     boolean useVInts, boolean lowMemoryMode) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts);
      StructObjectInspector structObjectInspector =
        (StructObjectInspector) inspector;
      fields = structObjectInspector.getAllStructFieldRefs();
      childrenWriters = new TreeWriter[fields.size()];
      for(int i=0; i < childrenWriters.length; ++i) {
        childrenWriters[i] = createTreeWriter(
          fields.get(i).getFieldObjectInspector(), writer, true, conf, useVInts,
          lowMemoryMode);
      }
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      long rawDataSize = 0;
      if (obj != null) {
        StructObjectInspector insp = (StructObjectInspector) inspector;
        List