org.apache.hive.streaming.HiveStreamingConnection Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-streaming
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hive.streaming;

import java.io.IOException;
import java.io.InputStream;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.StreamCapabilities;
import org.apache.hadoop.hive.common.BlobStorageUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.TxnToWriteId;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.HdfsUtils;
import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hive.common.util.ShutdownHookManager;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.annotations.VisibleForTesting;

/**
 * Streaming connection implementation for hive. To create a streaming connection, use the builder API
 * to create record writer first followed by the connection itself. Once connection is created, clients can
 * begin a transaction, keep writing using the connection, commit the transaction and close connection when done.
 * To bind to the correct metastore, HiveConf object has to be created from hive-site.xml or HIVE_CONF_DIR.
 * If hive conf is manually created, metastore uri has to be set correctly. If hive conf object is not specified,
 * "thrift://localhost:9083" will be used as default.
 * 


 * NOTE: The streaming connection APIs and record writer APIs are not thread-safe. Streaming connection creation,
 * begin/commit/abort transactions, write and close has to be called in the same thread. If close() or
 * abortTransaction() has to be triggered from a separate thread it has to be co-ordinated via external variables or
 * synchronization mechanism
 * 


 * Example usage:
 * {@code
 * // create delimited record writer whose schema exactly matches table schema
 * StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder()
 *                                      .withFieldDelimiter(',')
 *                                      .build();
 * // create and open streaming connection (default.src table has to exist already)
 * StreamingConnection connection = HiveStreamingConnection.newBuilder()
 *                                    .withDatabase("default")
 *                                    .withTable("src")
 *                                    .withAgentInfo("nifi-agent")
 *                                    .withRecordWriter(writer)
 *                                    .withHiveConf(hiveConf)
 *                                    .connect();
 * // begin a transaction, write records and commit 1st transaction
 * connection.beginTransaction();
 * connection.write("key1,val1".getBytes());
 * connection.write("key2,val2".getBytes());
 * connection.commitTransaction();
 * // begin another transaction, write more records and commit 2nd transaction
 * connection.beginTransaction();
 * connection.write("key3,val3".getBytes());
 * connection.write("key4,val4".getBytes());
 * connection.commitTransaction();
 * // close the streaming connection
 * connection.close();
 * }
 * 
 */
public class HiveStreamingConnection implements StreamingConnection {
  private static final Logger LOG = LoggerFactory.getLogger(HiveStreamingConnection.class.getName());

  private static final String DEFAULT_METASTORE_URI = "thrift://localhost:9083";
  private static final int DEFAULT_TRANSACTION_BATCH_SIZE = 1;
  private static final boolean DEFAULT_STREAMING_OPTIMIZATIONS_ENABLED = true;

  public enum TxnState {
    INACTIVE("I"), OPEN("O"), COMMITTED("C"), ABORTED("A"),
    PREPARED_FOR_COMMIT("P");

    private final String code;

    TxnState(String code) {
      this.code = code;
    }

    public String toString() {
      return code;
    }
  }

  // fields populated from builder
  private String database;
  private String table;
  private List staticPartitionValues;
  private String agentInfo;
  private int transactionBatchSize;
  private RecordWriter recordWriter;
  private StreamingTransaction currentTransactionBatch;
  private HiveConf conf;
  private boolean streamingOptimizations;
  private AtomicBoolean isConnectionClosed = new AtomicBoolean(false);

  // internal fields
  private boolean isPartitionedTable;
  private IMetaStoreClient msClient;
  private IMetaStoreClient heartbeatMSClient;
  private final String username;
  private final boolean secureMode;
  private Table tableObject = null;
  private String metastoreUri;
  private ConnectionStats connectionStats;
  private final Long writeId;
  private final Integer statementId;
  private boolean manageTransactions;
  private int countTransactions = 0;
  private Set partitions;
  private Map writePaths;
  private Runnable onShutdownRunner;

  private HiveStreamingConnection(Builder builder) throws StreamingException {
    this.database = builder.database.toLowerCase();
    this.table = builder.table.toLowerCase();
    this.staticPartitionValues = builder.staticPartitionValues;
    this.conf = builder.hiveConf;
    this.agentInfo = builder.agentInfo;
    this.streamingOptimizations = builder.streamingOptimizations;
    this.writeId = builder.writeId;
    this.statementId = builder.statementId;
    this.tableObject = builder.tableObject;
    this.setPartitionedTable(builder.isPartitioned);
    this.manageTransactions = builder.manageTransactions;
    this.writePaths = new HashMap<>();

    UserGroupInformation loggedInUser = null;
    try {
      loggedInUser = UserGroupInformation.getLoginUser();
    } catch (IOException e) {
      LOG.warn("Unable to get logged in user via UGI. err: {}", e.getMessage());
    }
    if (loggedInUser == null) {
      this.username = System.getProperty("user.name");
      this.secureMode = false;
    } else {
      this.username = loggedInUser.getShortUserName();
      this.secureMode = loggedInUser.hasKerberosCredentials();
    }
    this.transactionBatchSize = builder.transactionBatchSize;
    this.recordWriter = builder.recordWriter;
    this.connectionStats = new ConnectionStats();
    if (agentInfo == null) {
      try {
        agentInfo = username + ":" + InetAddress.getLocalHost().getHostName() + ":" + Thread.currentThread().getName();
      } catch (UnknownHostException e) {
        // ignore and use UUID instead
        this.agentInfo = UUID.randomUUID().toString();
      }
    }
    if (conf == null) {
      conf = createHiveConf(this.getClass(), DEFAULT_METASTORE_URI);
    }

    overrideConfSettings(conf);
    if (manageTransactions) {
      this.metastoreUri = conf.get(MetastoreConf.ConfVars.THRIFT_URIS.getHiveName());
      this.msClient = getMetaStoreClient(conf, metastoreUri, secureMode,
          "streaming-connection");
      // We use a separate metastore client for heartbeat calls to ensure heartbeat RPC calls are
      // isolated from the other transaction related RPC calls.
      this.heartbeatMSClient = getMetaStoreClient(conf, metastoreUri, secureMode,
          "streaming-connection-heartbeat");
      validateTable();
    }

    LOG.info("STREAMING CONNECTION INFO: {}", toConnectionInfoString());
  }

  public static Builder newBuilder() {
    return new Builder();
  }

  public static class Builder {
    private String database;
    private String table;
    private List staticPartitionValues;
    private String agentInfo;
    private HiveConf hiveConf;
    private int transactionBatchSize = DEFAULT_TRANSACTION_BATCH_SIZE;
    private boolean streamingOptimizations = DEFAULT_STREAMING_OPTIMIZATIONS_ENABLED;
    private RecordWriter recordWriter;
    private long writeId = -1;
    private int statementId = -1;
    private boolean manageTransactions = true;
    private Table tableObject;
    private boolean isPartitioned;

    /**
     * Specify database to use for streaming connection.
     *
     * @param database - db name
     * @return - builder
     */
    public Builder withDatabase(final String database) {
      this.database = database;
      return this;
    }

    /**
     * Specify table to use for streaming connection.
     *
     * @param table - table name
     * @return - builder
     */
    public Builder withTable(final String table) {
      this.table = table;
      return this;
    }

    /**
     * Specify the name of partition to use for streaming connection.
     *
     * @param staticPartitionValues - static partition values
     * @return - builder
     */
    public Builder withStaticPartitionValues(final List staticPartitionValues) {
      this.staticPartitionValues = staticPartitionValues == null ? null : new ArrayList<>(staticPartitionValues);
      return this;
    }

    /**
     * Specify agent info to use for streaming connection.
     *
     * @param agentInfo - agent info
     * @return - builder
     */
    public Builder withAgentInfo(final String agentInfo) {
      this.agentInfo = agentInfo;
      return this;
    }

    /**
     * Specify hive configuration object to use for streaming connection.
     * Generate this object by point to already existing hive-site.xml or HIVE_CONF_DIR.
     * Make sure if metastore URI has been set correctly else thrift://localhost:9083 will be
     * used as default.
     *
     * @param hiveConf - hive conf object
     * @return - builder
     */
    public Builder withHiveConf(final HiveConf hiveConf) {
      this.hiveConf = hiveConf;
      return this;
    }

    /**
     * Transaction batch size to use (default value is 10). This is expert level configuration.
     * For every transaction batch a delta directory will be created which will impact
     * when compaction will trigger.
     * NOTE: This is evolving API and is subject to change/might not be honored in future releases.
     *
     * @param transactionBatchSize - transaction batch size
     * @return - builder
     */
    @InterfaceStability.Evolving
    public Builder withTransactionBatchSize(final int transactionBatchSize) {
      this.transactionBatchSize = transactionBatchSize;
      return this;
    }

    /**
     * Whether to enable streaming optimizations. This is expert level configurations.
     * Disabling streaming optimizations will have significant impact to performance and memory consumption.
     *
     * @param enable - flag to enable or not
     * @return - builder
     */
    public Builder withStreamingOptimizations(final boolean enable) {
      this.streamingOptimizations = enable;
      return this;
    }

    /**
     * Record writer to use for writing records to destination table.
     *
     * @param recordWriter - record writer
     * @return - builder
     */
    public Builder withRecordWriter(final RecordWriter recordWriter) {
      this.recordWriter = recordWriter;
      return this;
    }

    /**
     * Specify this parameter if we want the current connection
     * to join an ongoing transaction without having to query
     * the metastore to create it.
     * @param writeId write id
     * @return builder
     */
    public Builder withWriteId(final long writeId) {
      this.writeId = writeId;
      manageTransactions = false;
      return this;
    }

    /**
     * Specify this parameter to set an statement id in the writer.
     * This really only makes sense to be specified when a writeId is
     * provided as well
     * @param statementId statement id
     * @return builder
     */
    public Builder withStatementId(final int statementId) {
      this.statementId = statementId;
      return this;
    }

    /**
     * Specify the table object since sometimes no connections
     * to the metastore will be opened.
     * @param table table object.
     * @return builder
     */
    public Builder withTableObject(Table table) {
      this.tableObject = table;
      this.isPartitioned = tableObject.getPartitionKeys() != null
          && !tableObject.getPartitionKeys().isEmpty();
      return this;
    }

    /**
     * Returning a streaming connection to hive.
     *
     * @return - hive streaming connection
     */
    public HiveStreamingConnection connect() throws StreamingException {
      if (database == null) {
        throw new StreamingException("Database cannot be null for streaming connection");
      }
      if (table == null) {
        if (tableObject == null) {
          throw new StreamingException("Table and table object cannot be "
              + "null for streaming connection");
        } else {
          table = tableObject.getTableName();
        }
      }

      if (tableObject != null && !tableObject.getTableName().equals(table)) {
        throw new StreamingException("Table must match tableObject table name");
      }

      if (recordWriter == null) {
        throw new StreamingException("Record writer cannot be null for streaming connection");
      }
      if ((writeId != -1 && tableObject == null) ||
          (writeId == -1 && tableObject != null)){
        throw new StreamingException("If writeId is set, tableObject "
            + "must be set as well and vice versa");
      }

      HiveStreamingConnection streamingConnection = new HiveStreamingConnection(this);
      streamingConnection.onShutdownRunner = streamingConnection::close;
      // assigning higher priority than FileSystem shutdown hook so that streaming connection gets closed first before
      // filesystem close (to avoid ClosedChannelException)
      ShutdownHookManager.addShutdownHook(streamingConnection.onShutdownRunner,  FileSystem.SHUTDOWN_HOOK_PRIORITY + 1);
      Thread.setDefaultUncaughtExceptionHandler((t, e) -> streamingConnection.close());
      return streamingConnection;
    }
  }

  private void setPartitionedTable(Boolean isPartitionedTable) {
    this.isPartitionedTable = isPartitionedTable;
  }

  @Override
  public String toString() {
    return "{ metaStoreUri: " + metastoreUri + ", database: " + database + ", table: " + table + " }";
  }

  private String toConnectionInfoString() {
    return "{ metastore-uri: " + metastoreUri + ", " +
      "database: " + database + ", " +
      "table: " + table + ", " +
      "partitioned-table: " + isPartitionedTable() + ", " +
      "dynamic-partitioning: " + isDynamicPartitioning() + ", " +
      "username: " + username + ", " +
      "secure-mode: " + secureMode + ", " +
      "record-writer: " + recordWriter.getClass().getSimpleName() + ", " +
      "agent-info: " + agentInfo + ", " +
      "writeId: " + writeId +  ", " +
      "statementId: " + statementId + " }";
  }

  @VisibleForTesting
  String toTransactionString() {
    return currentTransactionBatch == null ? "" : currentTransactionBatch.toString();
  }

  @Override
  public PartitionInfo createPartitionIfNotExists(final List partitionValues) throws StreamingException {
    String partLocation = null;
    String partName = null;
    boolean exists = false;

    try {
      Map partSpec = Warehouse.makeSpecFromValues(tableObject.getPartitionKeys(), partitionValues);

      Path location = new Path(tableObject.getDataLocation(), Warehouse.makePartPath(partSpec));
      location = new Path(Utilities.getQualifiedPath(conf, location));
      partLocation = location.toString();
      partName = Warehouse.makePartName(tableObject.getPartitionKeys(), partitionValues);
      Partition partition =
          org.apache.hadoop.hive.ql.metadata.Partition.createMetaPartitionObject(tableObject, partSpec, location);

      if (getMSC() == null) {
        // We assume it doesn't exist if we can't check it
        // so the driver will decide
        return new PartitionInfo(partName, partLocation, false);
      }

      getMSC().add_partition(partition);
      if (LOG.isDebugEnabled()) {
        LOG.debug("Created partition {} for table {}", partName,
            tableObject.getFullyQualifiedName());
      }
    } catch (AlreadyExistsException e) {
      exists = true;
    } catch (HiveException | TException e) {
      throw new StreamingException("Unable to creation partition for values: " + partitionValues + " connection: " +
        toConnectionInfoString(), e);
    }
    return new PartitionInfo(partName, partLocation, exists);
  }

  /**
   * Returns the file that would be used to store rows under this.
   * parameters
   * @param partitionValues partition values
   * @param bucketId bucket id
   * @param minWriteId min write Id
   * @param maxWriteId max write Id
   * @param statementId statement Id
   * @return the location of the file.
   * @throws StreamingException when the path is not found
   */
  @Override
  public Path getDeltaFileLocation(List partitionValues,
      Integer bucketId, Long minWriteId, Long maxWriteId, Integer statementId)
      throws StreamingException {
    return recordWriter.getDeltaFileLocation(partitionValues,
        bucketId, minWriteId, maxWriteId, statementId, tableObject);
  }

  IMetaStoreClient getMSC() {
    connectionStats.incrementMetastoreCalls();
    return msClient;
  }

  IMetaStoreClient getHeatbeatMSC() {
    connectionStats.incrementMetastoreCalls();
    return heartbeatMSClient;
  }

  private void validateTable() throws InvalidTable, ConnectionError {
    try {
      tableObject = new Table(getMSC().getTable(database, table));
    } catch (Exception e) {
      LOG.warn("Unable to validate the table for connection: " + toConnectionInfoString(), e);
      throw new InvalidTable(database, table, e);
    }
    // 1 - check that the table is Acid
    if (!AcidUtils.isFullAcidTable(tableObject)) {
      LOG.error("HiveEndPoint " + this + " must use an acid table");
      throw new InvalidTable(database, table, "is not an Acid table");
    }

    if (tableObject.getPartitionKeys() != null && !tableObject.getPartitionKeys().isEmpty()) {
      setPartitionedTable(true);
    } else {
      setPartitionedTable(false);
    }

    // partition values are specified on non-partitioned table
    if (!isPartitionedTable() && (staticPartitionValues != null && !staticPartitionValues.isEmpty())) {
      // Invalid if table is not partitioned, but endPoint's partitionVals is not empty
      String errMsg = this.toString() + " specifies partitions for un-partitioned table";
      LOG.error(errMsg);
      throw new ConnectionError(errMsg);
    }

    // batch size is only used for managed transactions, not for unmanaged single transactions
    if (transactionBatchSize > 1) {
      try (FileSystem fs = tableObject.getDataLocation().getFileSystem(conf)) {
        if (BlobStorageUtils.isBlobStorageFileSystem(conf, fs)) {
          // currently not all filesystems implement StreamCapabilities, while FSDataOutputStream does
          Path path = new Path("/tmp", "_tmp_stream_verify_" + UUID.randomUUID().toString());
          try(FSDataOutputStream out = fs.create(path, false)){
            if (!out.hasCapability(StreamCapabilities.HFLUSH)) {
              throw new ConnectionError(
                  "The backing filesystem only supports transaction batch sizes of 1, but " + transactionBatchSize
                      + " was requested.");
            }
            fs.deleteOnExit(path);
          } catch (IOException e){
            throw new ConnectionError("Could not create path for database", e);
          }
        }
      } catch (IOException e) {
        throw new ConnectionError("Could not retrieve FileSystem of table", e);
      }
    }
  }

  private void beginNextTransaction() throws StreamingException {
    if (currentTransactionBatch == null) {
      currentTransactionBatch = createNewTransactionBatch();
      LOG.info("Opened new transaction batch {}", currentTransactionBatch);
    }

    if (currentTransactionBatch.isClosed()) {
      throw new StreamingException("Cannot begin next transaction on a closed streaming connection");
    }

    if (currentTransactionBatch.remainingTransactions() == 0) {
      LOG.info("Transaction batch {} is done. Rolling over to next transaction batch.",
        currentTransactionBatch);
      closeCurrentTransactionBatch();
      currentTransactionBatch = createNewTransactionBatch();
      LOG.info("Rolled over to new transaction batch {}", currentTransactionBatch);
    }
    currentTransactionBatch.beginNextTransaction();
  }

  private StreamingTransaction createNewTransactionBatch() throws StreamingException {
    countTransactions++;
    if (manageTransactions) {
      return new TransactionBatch(this);
    } else {
      if (countTransactions > 1) {
        throw new StreamingException("If a writeId is passed for the "
            + "construction of HiveStreaming only one transaction batch"
            + " can be done");
      }
      return new UnManagedSingleTransaction(this);
    }
  }

  private void checkClosedState() throws StreamingException {
    if (isConnectionClosed.get()) {
      throw new StreamingException("Streaming connection is closed already.");
    }
  }

  private void checkState() throws StreamingException {
    checkClosedState();
    if (currentTransactionBatch == null) {
      throw new StreamingException("Transaction batch is null. Missing beginTransaction?");
    }
    if (currentTransactionBatch.getCurrentTransactionState() != TxnState.OPEN) {
      throw new StreamingException("Transaction state is not OPEN. Missing beginTransaction?");
    }
  }

  private void closeCurrentTransactionBatch() throws StreamingException {
    currentTransactionBatch.close();
    writePaths.clear();
  }

  @Override
  public void beginTransaction() throws StreamingException {
    checkClosedState();
    partitions = new HashSet<>();
    beginNextTransaction();
  }

  @Override
  public void commitTransaction() throws StreamingException {
    commitTransaction(null);
  }

  @Override
  public void commitTransaction(Set partitions)
      throws StreamingException {
    commitTransaction(partitions, null, null);
  }

  @Override
  public void commitTransaction(Set partitions, String key,
      String value) throws StreamingException {
    checkState();

    Set createdPartitions = new HashSet<>();
    if (partitions != null) {
      for (String partition: partitions) {
        try {
          PartitionInfo info = createPartitionIfNotExists(
              Warehouse.getPartValuesFromPartName(partition));
          if (!info.isExists()) {
            createdPartitions.add(partition);
          }
        } catch (MetaException e) {
          throw new StreamingException("Partition " + partition + " is invalid.", e);
        }
      }
      connectionStats.incrementTotalPartitions(partitions.size());
    }

    currentTransactionBatch.commit(createdPartitions, key, value);
    this.partitions.addAll(
        currentTransactionBatch.getPartitions());
    connectionStats.incrementCreatedPartitions(createdPartitions.size());
    connectionStats.incrementCommittedTransactions();
  }

  @Override
  public void abortTransaction() throws StreamingException {
    checkState();
    currentTransactionBatch.abort();
    connectionStats.incrementAbortedTransactions();
  }

  @Override
  public void write(final byte[] record) throws StreamingException {
    checkState();
    currentTransactionBatch.write(record);
  }

  @Override
  public void write(final InputStream inputStream) throws StreamingException {
    checkState();
    currentTransactionBatch.write(inputStream);
  }

  /**
   * Close connection
   */
  @Override
  public void close() {
    if (isConnectionClosed.get()) {
      return;
    }
    isConnectionClosed.set(true);
    try {
      if (currentTransactionBatch != null) {
        closeCurrentTransactionBatch();
      }
    } catch (StreamingException e) {
      LOG.warn("Unable to close current transaction batch: " + currentTransactionBatch, e);
    } finally {
      if (manageTransactions) {
        getMSC().close();
        getHeatbeatMSC().close();
        try {
          // Close the HMS that is used for addWriteNotificationLog
          Hive.get(conf).getSynchronizedMSC().close();
        } catch (Exception e) {
          LOG.warn("Error while closing HMS connection", e);
        }
      }
      //remove shutdown hook entry added while creating this connection via HiveStreamingConnection.Builder#connect()
      if (!ShutdownHookManager.isShutdownInProgress()) {
        ShutdownHookManager.removeShutdownHook(this.onShutdownRunner);
      }
    }
    LOG.info("Closed streaming connection. Agent: {} Stats: {}", getAgentInfo(), getConnectionStats());
  }

  @Override
  public ConnectionStats getConnectionStats() {
    return connectionStats;
  }

  private static IMetaStoreClient getMetaStoreClient(HiveConf conf, String metastoreUri, boolean secureMode,
    String owner)
    throws ConnectionError {
    if (metastoreUri != null) {
      conf.set(MetastoreConf.ConfVars.THRIFT_URIS.getHiveName(), metastoreUri);
    }
    if (secureMode) {
      conf.setBoolean(MetastoreConf.ConfVars.USE_THRIFT_SASL.getHiveName(), true);
    }
    try {
      LOG.info("Creating metastore client for {}", owner);
      return HiveMetaStoreUtils.getHiveMetastoreClient(conf);
    } catch (MetaException | IOException e) {
      throw new ConnectionError("Error connecting to Hive Metastore URI: "
        + metastoreUri + ". " + e.getMessage(), e);
    }
  }

  private static class WriteDirInfo {
    List partitionVals;
    Path writeDir;

    WriteDirInfo(List partitionVals, Path writeDir) {
      this.partitionVals = partitionVals;
      this.writeDir = writeDir;
    }

    List getPartitionVals() {
      return this.partitionVals;
    }

    Path getWriteDir() {
      return this.writeDir;
    }
  }

  @Override
  public void addWriteDirectoryInfo(List partitionValues, Path writeDir) {
    String key = (partitionValues == null) ? tableObject.getFullyQualifiedName()
            : partitionValues.toString();
    if (writePaths.containsKey(key)) {
      // This method is invoked once per bucket file within delta directory. So, same partition or
      // table entry shall exist already. But the written delta directory should remain same for all
      // bucket files.
      WriteDirInfo dirInfo = writePaths.get(key);
      assert(dirInfo.getWriteDir().equals(writeDir));
    } else {
      writePaths.put(key, new WriteDirInfo(partitionValues, writeDir));
    }
  }

  /**
   * Add Write notification events if it is enabled.
   * @throws StreamingException File operation errors or HMS errors.
   */
  @Override
  public void addWriteNotificationEvents() throws StreamingException {
    if (!conf.getBoolVar(HiveConf.ConfVars.FIRE_EVENTS_FOR_DML)) {
      LOG.debug("Write notification log is ignored as dml event logging is disabled.");
      return;
    }
    try {
      // Traverse the write paths for the current streaming connection and add one write notification
      // event per table or partitions.
      // For non-partitioned table, there will be only one entry in writePath and corresponding
      // partitionVals is null.
      Long currentTxnId = getCurrentTxnId();
      Long currentWriteId = getCurrentWriteId();
      for (WriteDirInfo writeInfo : writePaths.values()) {
        LOG.debug("TxnId: " + currentTxnId + ", WriteId: " + currentWriteId
                + " - Logging write event for the files in path " + writeInfo.getWriteDir());

        // List the new files added inside the write path (delta directory).
        FileSystem fs = tableObject.getDataLocation().getFileSystem(conf);
        List newFiles = HdfsUtils.listLocatedFileStatus(fs, writeInfo.getWriteDir(), null, true);

        // If no files are added by this streaming writes, then no need to log write notification event.
        if (newFiles.isEmpty()) {
          LOG.debug("TxnId: " + currentTxnId + ", WriteId: " + currentWriteId
                  + " - Skipping empty path " + writeInfo.getWriteDir());
          continue;
        }

        // Add write notification events into HMS table.
        Hive.addWriteNotificationLog(conf, tableObject, writeInfo.getPartitionVals(),
                currentTxnId, currentWriteId, newFiles, null);
      }
    } catch (IOException | TException | HiveException e) {
      throw new StreamingException("Failed to log write notification events.", e);
    }
  }

  @VisibleForTesting
  TxnState getCurrentTransactionState() {
    return currentTransactionBatch.getCurrentTransactionState();
  }

  @VisibleForTesting
  int remainingTransactions() {
    return currentTransactionBatch.remainingTransactions();
  }

  @VisibleForTesting
  Long getCurrentTxnId() {
    return currentTransactionBatch.getCurrentTxnId();
  }

  private HiveConf createHiveConf(Class clazz, String metaStoreUri) {
    HiveConf conf = new HiveConf(clazz);
    if (metaStoreUri != null) {
      conf.set(MetastoreConf.ConfVars.THRIFT_URIS.getHiveName(), metaStoreUri);
    }
    return conf;
  }

  private void overrideConfSettings(HiveConf conf) {
    setHiveConf(conf, HiveConf.ConfVars.HIVE_TXN_MANAGER, DbTxnManager.class.getName());
    setHiveConf(conf, HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, true);
    setHiveConf(conf, MetastoreConf.ConfVars.EXECUTE_SET_UGI.getHiveName());
    setHiveConf(conf, HiveConf.ConfVars.DYNAMIC_PARTITIONING_MODE, "nonstrict");
    if (streamingOptimizations) {
      setHiveConf(conf, HiveConf.ConfVars.HIVE_ORC_DELTA_STREAMING_OPTIMIZATIONS_ENABLED, true);
    }
    // since same thread creates metastore client for streaming connection thread and heartbeat thread we explicitly
    // disable metastore client cache
    setHiveConf(conf, HiveConf.ConfVars.METASTORE_CLIENT_CACHE_ENABLED, false);
  }

  private static void setHiveConf(HiveConf conf, HiveConf.ConfVars var, String value) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Overriding HiveConf setting : " + var + " = " + value);
    }
    conf.setVar(var, value);
  }

  private static void setHiveConf(HiveConf conf, HiveConf.ConfVars var, boolean value) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Overriding HiveConf setting : " + var + " = " + value);
    }
    conf.setBoolVar(var, value);
  }

  private static void setHiveConf(HiveConf conf, String var) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Overriding HiveConf setting : " + var + " = " + true);
    }
    conf.setBoolean(var, true);
  }

  public List getTxnToWriteIds() {
    if (currentTransactionBatch != null) {
      return currentTransactionBatch.getTxnToWriteIds();
    }
    return null;
  }

  @Override
  public HiveConf getHiveConf() {
    return conf;
  }

  @Override
  public String getMetastoreUri() {
    return metastoreUri;
  }

  @Override
  public Table getTable() {
    return tableObject;
  }

  @Override
  public List getStaticPartitionValues() {
    return staticPartitionValues;
  }

  @Override
  public String getAgentInfo() {
    return agentInfo;
  }

  @Override
  public boolean isPartitionedTable() {
    return isPartitionedTable;
  }

  @Override
  public boolean isDynamicPartitioning() {
    return isPartitionedTable() && (staticPartitionValues == null || staticPartitionValues.isEmpty());
  }

  @Override
  public Set getPartitions() {
    return partitions;
  }

  public String getUsername() {
    return username;
  }

  public String getDatabase() {
    return database;
  }

  public RecordWriter getRecordWriter() {
    return recordWriter;
  }

  public int getTransactionBatchSize() {
    return transactionBatchSize;
  }

  public HiveConf getConf() {
    return conf;
  }

  public Long getWriteId() {
    return writeId;
  }

  public Integer getStatementId() {
    return statementId;
  }

  public Long getCurrentWriteId() {
    return currentTransactionBatch.getCurrentWriteId();
  }
}