org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of hive-exec Show documentation
The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.io.orc;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.ql.io.RecordUpdater;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;

/**
 * A RecordUpdater where the files are stored as ORC.
 */
public class OrcRecordUpdater implements RecordUpdater {

  private static final Log LOG = LogFactory.getLog(OrcRecordUpdater.class);

  public static final String ACID_KEY_INDEX_NAME = "hive.acid.key.index";
  public static final String ACID_FORMAT = "_orc_acid_version";
  public static final String ACID_STATS = "hive.acid.stats";
  public static final int ORC_ACID_VERSION = 0;


  final static int INSERT_OPERATION = 0;
  final static int UPDATE_OPERATION = 1;
  final static int DELETE_OPERATION = 2;

  final static int OPERATION = 0;
  final static int ORIGINAL_TRANSACTION = 1;
  final static int BUCKET = 2;
  final static int ROW_ID = 3;
  final static int CURRENT_TRANSACTION = 4;
  final static int ROW = 5;
  final static int FIELDS = 6;

  final static int DELTA_BUFFER_SIZE = 16 * 1024;
  final static long DELTA_STRIPE_SIZE = 16 * 1024 * 1024;

  private static final Charset UTF8 = Charset.forName("UTF-8");

  private final AcidOutputFormat.Options options;
  private final Path path;
  private final FileSystem fs;
  private Writer writer;
  private final FSDataOutputStream flushLengths;
  private final OrcStruct item;
  private final IntWritable operation = new IntWritable();
  private final LongWritable currentTransaction = new LongWritable(-1);
  private final LongWritable originalTransaction = new LongWritable(-1);
  private final IntWritable bucket = new IntWritable();
  private final LongWritable rowId = new LongWritable();
  private long insertedRows = 0;
  // This records how many rows have been inserted or deleted.  It is separate from insertedRows
  // because that is monotonically increasing to give new unique row ids.
  private long rowCountDelta = 0;
  private final KeyIndexBuilder indexBuilder = new KeyIndexBuilder();
  private StructField recIdField = null; // field to look for the record identifier in
  private StructField rowIdField = null; // field inside recId to look for row id in
  private StructField originalTxnField = null;  // field inside recId to look for original txn in
  private StructObjectInspector rowInspector; // OI for the original row
  private StructObjectInspector recIdInspector; // OI for the record identifier struct
  private LongObjectInspector rowIdInspector; // OI for the long row id inside the recordIdentifier
  private LongObjectInspector origTxnInspector; // OI for the original txn inside the record
  // identifer

  static class AcidStats {
    long inserts;
    long updates;
    long deletes;

    AcidStats() {
      // nothing
    }

    AcidStats(String serialized) {
      String[] parts = serialized.split(",");
      inserts = Long.parseLong(parts[0]);
      updates = Long.parseLong(parts[1]);
      deletes = Long.parseLong(parts[2]);
    }

    String serialize() {
      StringBuilder builder = new StringBuilder();
      builder.append(inserts);
      builder.append(",");
      builder.append(updates);
      builder.append(",");
      builder.append(deletes);
      return builder.toString();
    }
  }

  static Path getSideFile(Path main) {
    return new Path(main + AcidUtils.DELTA_SIDE_FILE_SUFFIX);
  }

  static int getOperation(OrcStruct struct) {
    return ((IntWritable) struct.getFieldValue(OPERATION)).get();
  }

  static long getCurrentTransaction(OrcStruct struct) {
    return ((LongWritable) struct.getFieldValue(CURRENT_TRANSACTION)).get();
  }

  static long getOriginalTransaction(OrcStruct struct) {
    return ((LongWritable) struct.getFieldValue(ORIGINAL_TRANSACTION)).get();
  }

  static int getBucket(OrcStruct struct) {
    return ((IntWritable) struct.getFieldValue(BUCKET)).get();
  }

  static long getRowId(OrcStruct struct) {
    return ((LongWritable) struct.getFieldValue(ROW_ID)).get();
  }

  static OrcStruct getRow(OrcStruct struct) {
    if (struct == null) {
      return null;
    } else {
      return (OrcStruct) struct.getFieldValue(ROW);
    }
  }

  /**
   * An extension to AcidOutputFormat that allows users to add additional
   * options.
   */
  public static class OrcOptions extends AcidOutputFormat.Options {
    OrcFile.WriterOptions orcOptions = null;

    public OrcOptions(Configuration conf) {
      super(conf);
    }

    public OrcOptions orcOptions(OrcFile.WriterOptions opts) {
      this.orcOptions = opts;
      return this;
    }

    public OrcFile.WriterOptions getOrcOptions() {
      return orcOptions;
    }
  }

  /**
   * Create an object inspector for the ACID event based on the object inspector
   * for the underlying row.
   * @param rowInspector the row's object inspector
   * @return an object inspector for the event stream
   */
  static StructObjectInspector createEventSchema(ObjectInspector rowInspector) {
    List fields = new ArrayList();
    fields.add(new OrcStruct.Field("operation",
        PrimitiveObjectInspectorFactory.writableIntObjectInspector, OPERATION));
    fields.add(new OrcStruct.Field("originalTransaction",
        PrimitiveObjectInspectorFactory.writableLongObjectInspector,
        ORIGINAL_TRANSACTION));
    fields.add(new OrcStruct.Field("bucket",
        PrimitiveObjectInspectorFactory.writableIntObjectInspector, BUCKET));
    fields.add(new OrcStruct.Field("rowId",
        PrimitiveObjectInspectorFactory.writableLongObjectInspector, ROW_ID));
    fields.add(new OrcStruct.Field("currentTransaction",
        PrimitiveObjectInspectorFactory.writableLongObjectInspector,
        CURRENT_TRANSACTION));
    fields.add(new OrcStruct.Field("row", rowInspector, ROW));
    return new OrcStruct.OrcStructInspector(fields);
  }

  public static List getAcidEventFields() {
    return Lists.newArrayList("operation", "originalTransaction", "bucket", "rowId",
        "currentTransaction", "row");
  }

  OrcRecordUpdater(Path path,
                   AcidOutputFormat.Options options) throws IOException {
    this.options = options;
    this.bucket.set(options.getBucket());
    this.path = AcidUtils.createFilename(path, options);
    FileSystem fs = options.getFilesystem();
    if (fs == null) {
      fs = path.getFileSystem(options.getConfiguration());
    }
    this.fs = fs;
    try {
      FSDataOutputStream strm = fs.create(new Path(path, ACID_FORMAT), false);
      strm.writeInt(ORC_ACID_VERSION);
      strm.close();
    } catch (IOException ioe) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Failed to create " + path + "/" + ACID_FORMAT + " with " +
            ioe);
      }
    }
    if (options.getMinimumTransactionId() != options.getMaximumTransactionId()
        && !options.isWritingBase()){
      flushLengths = fs.create(getSideFile(this.path), true, 8,
          options.getReporter());
    } else {
      flushLengths = null;
    }
    OrcFile.WriterOptions writerOptions = null;
    if (options instanceof OrcOptions) {
      writerOptions = ((OrcOptions) options).getOrcOptions();
    }
    if (writerOptions == null) {
      writerOptions = OrcFile.writerOptions(options.getConfiguration());
    }
    writerOptions.fileSystem(fs).callback(indexBuilder);
    if (!options.isWritingBase()) {
      writerOptions.blockPadding(false);
      writerOptions.bufferSize(DELTA_BUFFER_SIZE);
      writerOptions.stripeSize(DELTA_STRIPE_SIZE);
    }
    rowInspector = (StructObjectInspector)options.getInspector();
    writerOptions.inspector(createEventSchema(findRecId(options.getInspector(),
        options.getRecordIdColumn())));
    this.writer = OrcFile.createWriter(this.path, writerOptions);
    item = new OrcStruct(FIELDS);
    item.setFieldValue(OPERATION, operation);
    item.setFieldValue(CURRENT_TRANSACTION, currentTransaction);
    item.setFieldValue(ORIGINAL_TRANSACTION, originalTransaction);
    item.setFieldValue(BUCKET, bucket);
    item.setFieldValue(ROW_ID, rowId);
  }

  // Find the record identifier column (if there) and return a possibly new ObjectInspector that
  // will strain out the record id for the underlying writer.
  private ObjectInspector findRecId(ObjectInspector inspector, int rowIdColNum) {
    if (!(inspector instanceof StructObjectInspector)) {
      throw new RuntimeException("Serious problem, expected a StructObjectInspector, but got a " +
          inspector.getClass().getName());
    }
    if (rowIdColNum < 0) {
      return inspector;
    } else {
      RecIdStrippingObjectInspector newInspector =
          new RecIdStrippingObjectInspector(inspector, rowIdColNum);
      recIdField = newInspector.getRecId();
      List fields =
          ((StructObjectInspector) recIdField.getFieldObjectInspector()).getAllStructFieldRefs();
      // Go by position, not field name, as field names aren't guaranteed.  The order of fields
      // in RecordIdentifier is transactionId, bucketId, rowId
      originalTxnField = fields.get(0);
      origTxnInspector = (LongObjectInspector)originalTxnField.getFieldObjectInspector();
      rowIdField = fields.get(2);
      rowIdInspector = (LongObjectInspector)rowIdField.getFieldObjectInspector();


      recIdInspector = (StructObjectInspector) recIdField.getFieldObjectInspector();
      return newInspector;
    }
  }

  private void addEvent(int operation, long currentTransaction, long rowId, Object row)
      throws IOException {
    this.operation.set(operation);
    this.currentTransaction.set(currentTransaction);
    // If this is an insert, originalTransaction should be set to this transaction.  If not,
    // it will be reset by the following if anyway.
    long originalTransaction = currentTransaction;
    if (operation == DELETE_OPERATION || operation == UPDATE_OPERATION) {
      Object rowIdValue = rowInspector.getStructFieldData(row, recIdField);
      originalTransaction = origTxnInspector.get(
          recIdInspector.getStructFieldData(rowIdValue, originalTxnField));
      rowId = rowIdInspector.get(recIdInspector.getStructFieldData(rowIdValue, rowIdField));
    }
    this.rowId.set(rowId);
    this.originalTransaction.set(originalTransaction);
    item.setFieldValue(OrcRecordUpdater.ROW, (operation == DELETE_OPERATION ? null : row));
    indexBuilder.addKey(operation, originalTransaction, bucket.get(), rowId);
    writer.addRow(item);
  }

  @Override
  public void insert(long currentTransaction, Object row) throws IOException {
    if (this.currentTransaction.get() != currentTransaction) {
      insertedRows = 0;
    }
    addEvent(INSERT_OPERATION, currentTransaction, insertedRows++, row);
    rowCountDelta++;
  }

  @Override
  public void update(long currentTransaction, Object row) throws IOException {
    if (this.currentTransaction.get() != currentTransaction) {
      insertedRows = 0;
    }
    addEvent(UPDATE_OPERATION, currentTransaction, -1L, row);
  }

  @Override
  public void delete(long currentTransaction, Object row) throws IOException {
    if (this.currentTransaction.get() != currentTransaction) {
      insertedRows = 0;
    }
    addEvent(DELETE_OPERATION, currentTransaction, -1, row);
    rowCountDelta--;

  }

  @Override
  public void flush() throws IOException {
    // We only support flushes on files with multiple transactions, because
    // flushes create significant overhead in HDFS. Record updaters with a
    // single transaction should be closed rather than flushed.
    if (flushLengths == null) {
      throw new IllegalStateException("Attempting to flush a RecordUpdater on "
         + path + " with a single transaction.");
    }
    long len = writer.writeIntermediateFooter();
    flushLengths.writeLong(len);
    OrcInputFormat.SHIMS.hflush(flushLengths);
  }

  @Override
  public void close(boolean abort) throws IOException {
    if (abort) {
      if (flushLengths == null) {
        fs.delete(path, false);
      }
    } else {
      if (writer != null) writer.close();
    }
    if (flushLengths != null) {
      flushLengths.close();
      fs.delete(getSideFile(path), false);
    }
    writer = null;
  }

  @Override
  public SerDeStats getStats() {
    SerDeStats stats = new SerDeStats();
    stats.setRowCount(rowCountDelta);
    // Don't worry about setting raw data size diff.  I have no idea how to calculate that
    // without finding the row we are updating or deleting, which would be a mess.
    return stats;
  }

  @VisibleForTesting
  Writer getWriter() {
    return writer;
  }

  private static final Charset utf8 = Charset.forName("UTF-8");
  private static final CharsetDecoder utf8Decoder = utf8.newDecoder();

  static RecordIdentifier[] parseKeyIndex(Reader reader) {
    String[] stripes;
    try {
      ByteBuffer val =
          reader.getMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME)
              .duplicate();
      stripes = utf8Decoder.decode(val).toString().split(";");
    } catch (CharacterCodingException e) {
      throw new IllegalArgumentException("Bad string encoding for " +
          OrcRecordUpdater.ACID_KEY_INDEX_NAME, e);
    }
    RecordIdentifier[] result = new RecordIdentifier[stripes.length];
    for(int i=0; i < stripes.length; ++i) {
      if (stripes[i].length() != 0) {
        String[] parts = stripes[i].split(",");
        result[i] = new RecordIdentifier();
        result[i].setValues(Long.parseLong(parts[0]),
            Integer.parseInt(parts[1]), Long.parseLong(parts[2]));
      }
    }
    return result;
  }

  static class KeyIndexBuilder implements OrcFile.WriterCallback {
    StringBuilder lastKey = new StringBuilder();
    long lastTransaction;
    int lastBucket;
    long lastRowId;
    AcidStats acidStats = new AcidStats();

    @Override
    public void preStripeWrite(OrcFile.WriterContext context
    ) throws IOException {
      lastKey.append(lastTransaction);
      lastKey.append(',');
      lastKey.append(lastBucket);
      lastKey.append(',');
      lastKey.append(lastRowId);
      lastKey.append(';');
    }

    @Override
    public void preFooterWrite(OrcFile.WriterContext context
                               ) throws IOException {
      context.getWriter().addUserMetadata(ACID_KEY_INDEX_NAME,
          UTF8.encode(lastKey.toString()));
      context.getWriter().addUserMetadata(ACID_STATS,
          UTF8.encode(acidStats.serialize()));
    }

    void addKey(int op, long transaction, int bucket, long rowId) {
      switch (op) {
        case INSERT_OPERATION:
          acidStats.inserts += 1;
          break;
        case UPDATE_OPERATION:
          acidStats.updates += 1;
          break;
        case DELETE_OPERATION:
          acidStats.deletes += 1;
          break;
        default:
          throw new IllegalArgumentException("Unknown operation " + op);
      }
      lastTransaction = transaction;
      lastBucket = bucket;
      lastRowId = rowId;
    }
  }

  /**
   * An ObjectInspector that will strip out the record identifier so that the underlying writer
   * doesn't see it.
   */
  private static class RecIdStrippingObjectInspector extends StructObjectInspector {
    private StructObjectInspector wrapped;
    List fields;
    StructField recId;

    RecIdStrippingObjectInspector(ObjectInspector oi, int rowIdColNum) {
      if (!(oi instanceof StructObjectInspector)) {
        throw new RuntimeException("Serious problem, expected a StructObjectInspector, " +
            "but got a " + oi.getClass().getName());
      }
      wrapped = (StructObjectInspector)oi;
      List wrappedFields = wrapped.getAllStructFieldRefs();
      fields = new ArrayList(wrapped.getAllStructFieldRefs().size());
      for (int i = 0; i < wrappedFields.size(); i++) {
        if (i == rowIdColNum) {
          recId = wrappedFields.get(i);
        } else {
          fields.add(wrappedFields.get(i));
        }
      }
    }

    @Override
    public List getAllStructFieldRefs() {
      return fields;
    }

    @Override
    public StructField getStructFieldRef(String fieldName) {
      return wrapped.getStructFieldRef(fieldName);
    }

    @Override
    public Object getStructFieldData(Object data, StructField fieldRef) {
      // For performance don't check that that the fieldRef isn't recId everytime,
      // just assume that the caller used getAllStructFieldRefs and thus doesn't have that fieldRef
      return wrapped.getStructFieldData(data, fieldRef);
    }

    @Override
    public List