org.apache.hadoop.hive.ql.io.AcidUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-exec
There is a newer version: 4.0.1
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.io;

import static org.apache.hadoop.hive.common.AcidConstants.SOFT_DELETE_PATH_SUFFIX;
import static org.apache.hadoop.hive.common.AcidConstants.SOFT_DELETE_TABLE;
import static org.apache.hadoop.hive.common.FileUtils.HIDDEN_FILES_PATH_FILTER;
import static org.apache.hadoop.hive.metastore.TransactionalValidationListener.INSERTONLY_TRANSACTIONAL_PROPERTY;
import static org.apache.hadoop.hive.metastore.TransactionalValidationListener.DEFAULT_TRANSACTIONAL_PROPERTY;
import static org.apache.hadoop.hive.ql.exec.Utilities.COPY_KEYWORD;

import static org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.common.FileUtils;

import com.google.common.base.Strings;
import com.google.common.base.Preconditions;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.protobuf.InvalidProtocolBufferException;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hive.common.AcidConstants;
import org.apache.hadoop.hive.common.AcidMetaDataFile;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidReadTxnList;
import org.apache.hadoop.hive.common.ValidReaderWriteIdList;
import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.LockComponentBuilder;
import org.apache.hadoop.hive.metastore.TransactionalValidationListener;
import org.apache.hadoop.hive.metastore.api.DataOperationType;
import org.apache.hadoop.hive.metastore.api.CompactionType;
import org.apache.hadoop.hive.metastore.api.LockComponent;
import org.apache.hadoop.hive.metastore.api.LockType;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.TxnType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.txn.CompactionState;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc;
import org.apache.hadoop.hive.ql.ddl.view.create.CreateMaterializedViewDesc;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity.WriteType;
import org.apache.hadoop.hive.ql.io.AcidInputFormat.DeltaFileMetaData;
import org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater;
import org.apache.hadoop.hive.ql.io.orc.Reader;
import org.apache.hadoop.hive.ql.io.orc.Writer;
import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
import org.apache.hadoop.hive.ql.lockmgr.LockException;
import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.LoadSemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hive.common.util.Ref;
import org.apache.orc.FileFormatException;
import org.apache.orc.impl.OrcAcidUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;

import javax.annotation.concurrent.Immutable;
import java.nio.charset.Charset;
import java.util.stream.Collectors;
import java.util.stream.LongStream;
import java.util.stream.Stream;

/**
 * Utilities that are shared by all of the ACID input and output formats. They
 * are used by the compactor and cleaner and thus must be format agnostic.
 */
public class AcidUtils {
  // This key will be put in the conf file when planning an acid operation
  public static final String CONF_ACID_KEY = "hive.doing.acid";
  public static final String BASE_PREFIX = AcidConstants.BASE_PREFIX;
  public static final String COMPACTOR_TABLE_PROPERTY = "compactiontable";
  public static final PathFilter baseFileFilter = new PathFilter() {
    @Override
    public boolean accept(Path path) {
      return path.getName().startsWith(BASE_PREFIX);
    }
  };
  public static final String DELTA_PREFIX = AcidConstants.DELTA_PREFIX;
  public static final String DELETE_DELTA_PREFIX = AcidConstants.DELETE_DELTA_PREFIX;
  /**
   * Acid Streaming Ingest writes multiple transactions to the same file.  It also maintains a
   * {@link org.apache.orc.impl.OrcAcidUtils#getSideFile(Path)} side file which stores the length of
   * the primary file as of the last commit ({@link OrcRecordUpdater#flush()}).  That is the 'logical length'.
   * Once the primary is closed, the side file is deleted (logical length = actual length) but if
   * the writer dies or the primary file is being read while its still being written to, anything
   * past the logical length should be ignored.
   *
   * @see org.apache.orc.impl.OrcAcidUtils#DELTA_SIDE_FILE_SUFFIX
   * @see org.apache.orc.impl.OrcAcidUtils#getLastFlushLength(FileSystem, Path)
   * @see #getLogicalLength(FileSystem, FileStatus)
   */
  public static final String DELTA_SIDE_FILE_SUFFIX = "_flush_length";
  public static final PathFilter deltaFileFilter = new PathFilter() {
    @Override
    public boolean accept(Path path) {
      return path.getName().startsWith(DELTA_PREFIX);
    }
  };
  public static final PathFilter deleteEventDeltaDirFilter = new PathFilter() {
    @Override
    public boolean accept(Path path) {
      return path.getName().startsWith(DELETE_DELTA_PREFIX);
    }
  };
  public static final String BUCKET_PREFIX = "bucket_";
  public static final PathFilter bucketFileFilter = new PathFilter() {
    @Override
    public boolean accept(Path path) {
      return path.getName().startsWith(BUCKET_PREFIX) &&
          !path.getName().endsWith(DELTA_SIDE_FILE_SUFFIX);
    }
  };
  public static final String BUCKET_DIGITS = AcidConstants.BUCKET_DIGITS;
  public static final String LEGACY_FILE_BUCKET_DIGITS = AcidConstants.LEGACY_FILE_BUCKET_DIGITS;
  public static final String DELTA_DIGITS = AcidConstants.DELTA_DIGITS;
  /**
   * 10K statements per tx.  Probably overkill ... since that many delta files
   * would not be good for performance
   */
  public static final String STATEMENT_DIGITS = AcidConstants.STATEMENT_DIGITS;
  /**
   * This must be in sync with {@link #STATEMENT_DIGITS}
   */
  public static final int MAX_STATEMENTS_PER_TXN = 10000;
  public static final Pattern LEGACY_BUCKET_DIGIT_PATTERN = Pattern.compile("^[0-9]{6}");
  public static final Pattern BUCKET_PATTERN = Pattern.compile("bucket_([0-9]+)(_[0-9]+)?$");
  private static final Set READ_TXN_TOKENS = new HashSet<>();

  private static Cache dirCache;
  private static AtomicBoolean dirCacheInited = new AtomicBoolean();

  static {
    READ_TXN_TOKENS.addAll(Arrays.asList(
      HiveParser.TOK_DESCDATABASE,
      HiveParser.TOK_DESCTABLE,
      HiveParser.TOK_EXPLAIN,
      HiveParser.TOK_EXPLAIN_SQ_REWRITE
    ));
  }

  /**
   * A write into a non-aicd table produces files like 0000_0 or 0000_0_copy_1
   * (Unless via Load Data statement)
   */
  public static final PathFilter originalBucketFilter = new PathFilter() {
    @Override
    public boolean accept(Path path) {
      return ORIGINAL_PATTERN.matcher(path.getName()).matches() ||
        ORIGINAL_PATTERN_COPY.matcher(path.getName()).matches();
    }
  };

  private AcidUtils() {
    // NOT USED
  }
  private static final Logger LOG = LoggerFactory.getLogger(AcidUtils.class);

  public static final Pattern ORIGINAL_PATTERN =
      Pattern.compile("[0-9]+_[0-9]+");
  /**
   * @see org.apache.hadoop.hive.ql.exec.Utilities#COPY_KEYWORD
   */
  public static final Pattern ORIGINAL_PATTERN_COPY =
    Pattern.compile("[0-9]+_[0-9]+" + COPY_KEYWORD + "[0-9]+");

  public static final PathFilter acidHiddenFileFilter = new PathFilter() {
    @Override
    public boolean accept(Path p) {
      String name = p.getName();
      // Don't filter out MetaDataFile.METADATA_FILE
      if (name.startsWith(MetaDataFile.METADATA_FILE)) {
        return true;
      }
      // Don't filter out OrcAcidVersion.ACID_FORMAT
      if (name.startsWith(OrcAcidVersion.ACID_FORMAT)) {
        return true;
      }
      return HIDDEN_FILES_PATH_FILTER.accept(p);
    }
  };

  public static final PathFilter acidTempDirFilter = new PathFilter() {
    @Override
    public boolean accept(Path dirPath) {
      String dirPathStr = dirPath.toString();
      // We don't want to filter out temp tables
      if (dirPathStr.contains(SessionState.TMP_PREFIX)) {
        return true;
      }
      if ((dirPathStr.contains("/.")) || (dirPathStr.contains("/_"))) {
        return false;
      } else {
        return true;
      }
    }
  };

  public static final String VISIBILITY_PREFIX = AcidConstants.VISIBILITY_PREFIX;
  public static final Pattern VISIBILITY_PATTERN = AcidConstants.VISIBILITY_PATTERN;

  private static final HadoopShims SHIMS = ShimLoader.getHadoopShims();

  /**
   * Create the bucket filename in Acid format
   * @param subdir the subdirectory for the bucket.
   * @param bucket the bucket number
   * @return the filename
   */
  public static Path createBucketFile(Path subdir, int bucket) {
    return createBucketFile(subdir, bucket, null, true);
  }

  public static Path createBucketFile(Path subdir, int bucket, Integer attemptId) {
    return createBucketFile(subdir, bucket, attemptId, true);
  }

  /**
   * Create acid or original bucket name
   * @param subdir the subdirectory for the bucket.
   * @param bucket the bucket number
   * @return the filename
   */
  private static Path createBucketFile(Path subdir, int bucket, Integer attemptId, boolean isAcidSchema) {
    if(isAcidSchema) {
      String fileName = BUCKET_PREFIX + String.format(BUCKET_DIGITS, bucket);
      if (attemptId != null) {
        fileName = fileName + "_" + attemptId;
      }
      return new Path(subdir, fileName);
    }
    else {
      return new Path(subdir,
        String.format(BUCKET_DIGITS, bucket));
    }
  }

  /**
   * This is format of delta dir name prior to Hive 1.3.x
   */
  public static String deltaSubdir(long min, long max) {
    return DELTA_PREFIX + String.format(DELTA_DIGITS, min) + "_" +
        String.format(DELTA_DIGITS, max);
  }

  /**
   * Each write statement in a transaction creates its own delta dir.
   * @since 1.3.x
   */
  public static String deltaSubdir(long min, long max, int statementId) {
    return deltaSubdir(min, max) + "_" + String.format(STATEMENT_DIGITS, statementId);
  }

  /**
   * This is format of delete delta dir name prior to Hive 2.2.x
   */
  @VisibleForTesting
  public static String deleteDeltaSubdir(long min, long max) {
    return DELETE_DELTA_PREFIX + String.format(DELTA_DIGITS, min) + "_" +
        String.format(DELTA_DIGITS, max);
  }

  /**
   * Each write statement in a transaction creates its own delete delta dir,
   * when split-update acid operational property is turned on.
   * @since 2.2.x
   */
  @VisibleForTesting
  public static String deleteDeltaSubdir(long min, long max, int statementId) {
    return deleteDeltaSubdir(min, max) + "_" + String.format(STATEMENT_DIGITS, statementId);
  }

  public static String baseDir(long writeId) {
    return AcidConstants.baseDir(writeId);
  }

  /**
   * Return a base or delta directory string
   * according to the given "baseDirRequired".
   */
  public static String baseOrDeltaSubdir(boolean baseDirRequired, long min, long max, int statementId) {
    if (!baseDirRequired) {
       return deltaSubdir(min, max, statementId);
    } else {
       return baseDir(min);
    }
  }

  /**
   * Return a base or delta directory path according to the given "options".
   */
  public static Path baseOrDeltaSubdirPath(Path directory, AcidOutputFormat.Options options) {
    String subdir;
    if (options.isWritingBase()) {
      subdir = BASE_PREFIX + String.format(DELTA_DIGITS,
              options.getMaximumWriteId());
    } else if(options.getStatementId() == -1) {
      //when minor compaction runs, we collapse per statement delta files inside a single
      //transaction so we no longer need a statementId in the file name
      subdir = options.isWritingDeleteDelta() ?
              deleteDeltaSubdir(options.getMinimumWriteId(),
                      options.getMaximumWriteId())
              : deltaSubdir(options.getMinimumWriteId(),
              options.getMaximumWriteId());
    } else {
      subdir = options.isWritingDeleteDelta() ?
              deleteDeltaSubdir(options.getMinimumWriteId(),
                      options.getMaximumWriteId(),
                      options.getStatementId())
              : deltaSubdir(options.getMinimumWriteId(),
              options.getMaximumWriteId(),
              options.getStatementId());
    }
    subdir = addVisibilitySuffix(subdir, options.getVisibilityTxnId());
    return new Path(directory, subdir);
  }

  /**
   * Create a filename for a bucket file.
   * @param directory the partition directory
   * @param options the options for writing the bucket
   * @return the filename that should store the bucket
   */
  public static Path createFilename(Path directory,
                                    AcidOutputFormat.Options options) {
    if (options.getOldStyle()) {
      return new Path(directory, String.format(LEGACY_FILE_BUCKET_DIGITS,
          options.getBucketId()) + "_0");
    } else {
      return createBucketFile(baseOrDeltaSubdirPath(directory, options), options.getBucketId(), options.getAttemptId());
    }
  }

  /**
   * Since Hive 4.0, compactor produces directories with {@link #VISIBILITY_PATTERN} suffix.
   * _v0 is equivalent to no suffix, for backwards compatibility.
   */
  public static String addVisibilitySuffix(String baseOrDeltaDir, long visibilityTxnId) {
    if(visibilityTxnId == 0) {
      return baseOrDeltaDir;
    }
    return baseOrDeltaDir + VISIBILITY_PREFIX
        + String.format(DELTA_DIGITS, visibilityTxnId);
  }

  public static boolean isLocklessReadsEnabled(Table table, HiveConf conf) {
    return HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_ACID_LOCKLESS_READS_ENABLED)
        && AcidUtils.isTransactionalTable(table);
  }

  public static boolean isTableSoftDeleteEnabled(Table table, HiveConf conf) {
    boolean isSoftDelete = HiveConf.getBoolVar(conf, ConfVars.HIVE_ACID_CREATE_TABLE_USE_SUFFIX)
      || HiveConf.getBoolVar(conf, ConfVars.HIVE_ACID_LOCKLESS_READS_ENABLED);
    return isSoftDelete && AcidUtils.isTransactionalTable(table)
      && Boolean.parseBoolean(table.getProperty(SOFT_DELETE_TABLE));
  }

  /**
   * Represents bucketId and copy_N suffix
   */
  public static final class BucketMetaData {
    private static final BucketMetaData INVALID = new BucketMetaData(-1, 0);
    /**
     * @param bucketFileName {@link #ORIGINAL_PATTERN} or {@link #ORIGINAL_PATTERN_COPY}
     */
    public static BucketMetaData parse(String bucketFileName) {
      if (ORIGINAL_PATTERN.matcher(bucketFileName).matches()) {
        int bucketId = Integer
            .parseInt(bucketFileName.substring(0, bucketFileName.indexOf('_')));
        return new BucketMetaData(bucketId, 0);
      }
      else if(ORIGINAL_PATTERN_COPY.matcher(bucketFileName).matches()) {
        int copyNumber = Integer.parseInt(
            bucketFileName.substring(bucketFileName.lastIndexOf('_') + 1));
        int bucketId = Integer
            .parseInt(bucketFileName.substring(0, bucketFileName.indexOf('_')));
        return new BucketMetaData(bucketId, copyNumber);
      }
      else if (bucketFileName.startsWith(BUCKET_PREFIX)) {
        return new BucketMetaData(Integer
            .parseInt(bucketFileName.substring(bucketFileName.indexOf('_') + 1)), 0);
      }
      return INVALID;
    }
    public static BucketMetaData parse(Path bucketFile) {
      return parse(bucketFile.getName());
    }
    /**
     * -1 if non-standard file name
     */
    public final int bucketId;
    /**
     * 0 means no copy_N suffix
     */
    public final int copyNumber;
    private BucketMetaData(int bucketId, int copyNumber) {
      this.bucketId = bucketId;
      this.copyNumber = copyNumber;
    }
  }

  /**
   * Determine if a table is used during query based compaction.
   * @param tblProperties table properties
   * @return true, if the tblProperties contains {@link AcidUtils#COMPACTOR_TABLE_PROPERTY}
   */
  public static boolean isCompactionTable(Properties tblProperties) {
    return tblProperties != null && isCompactionTable(Maps.fromProperties(tblProperties));
  }

  /**
   * Determine if a table is used during query based compaction.
   * @param parameters table properties map
   * @return true, if the parameters contains {@link AcidUtils#COMPACTOR_TABLE_PROPERTY}
   */
  public static boolean isCompactionTable(Map parameters) {
    return StringUtils.isNotBlank(parameters.get(COMPACTOR_TABLE_PROPERTY));
  }

  /**
   * Get the bucket id from the file path
   * @param bucketFile - bucket file path
   * @return - bucket id
   */
  public static int parseBucketId(Path bucketFile) {
    String filename = bucketFile.getName();
    if (ORIGINAL_PATTERN.matcher(filename).matches() || ORIGINAL_PATTERN_COPY.matcher(filename).matches()) {
      return Integer.parseInt(filename.substring(0, filename.indexOf('_')));
    } else if (filename.startsWith(BUCKET_PREFIX)) {
      Matcher matcher = BUCKET_PATTERN.matcher(filename);
      if (matcher.matches()) {
        String bucketId = matcher.group(1);
        filename = filename.substring(0,matcher.end(1));
        if (Utilities.FILE_OP_LOGGER.isDebugEnabled()) {
          Utilities.FILE_OP_LOGGER.debug("Parsing bucket ID = " + bucketId + " from file name '" + filename + "'");
        }
        return Integer.parseInt(bucketId);
      }
    }
    return -1;
  }

  public static Integer parseAttemptId(Path bucketFile) {
    String filename = bucketFile.getName();
    Matcher matcher = BUCKET_PATTERN.matcher(filename);
    Integer attemptId = null;
    if (matcher.matches()) {
      attemptId = matcher.group(2) != null ? Integer.valueOf(matcher.group(2).substring(1)) : null;
    }
    if (Utilities.FILE_OP_LOGGER.isDebugEnabled()) {
      Utilities.FILE_OP_LOGGER.debug("Parsing attempt ID = " + attemptId + " from file name '" + bucketFile + "'");
    }
    return attemptId;
  }

  /**
   * Parse a bucket filename back into the options that would have created
   * the file.
   * @param bucketFile the path to a bucket file
   * @param conf the configuration
   * @return the options used to create that filename
   */
  public static AcidOutputFormat.Options
                    parseBaseOrDeltaBucketFilename(Path bucketFile,
                                                   Configuration conf) {
    AcidOutputFormat.Options result = new AcidOutputFormat.Options(conf);
    String filename = bucketFile.getName();
    int bucket = parseBucketId(bucketFile);
    Integer attemptId = parseAttemptId(bucketFile);
    if (ORIGINAL_PATTERN.matcher(filename).matches() || ORIGINAL_PATTERN_COPY.matcher(filename).matches()) {
      long minWriteId = 0;
      long maxWriteId = 0;
      int statementId = -1;
      if (bucketFile.getParent().getName().startsWith(DELTA_PREFIX)) {
        ParsedDeltaLight parsedDelta = ParsedDeltaLight.parse(bucketFile.getParent());
        minWriteId = parsedDelta.getMinWriteId();
        maxWriteId = parsedDelta.getMaxWriteId();
        statementId = parsedDelta.getStatementId();
      }
      result
          .setOldStyle(true)
          .minimumWriteId(minWriteId)
          .maximumWriteId(maxWriteId)
          .statementId(statementId)
          .bucket(bucket)
          .writingBase(!bucketFile.getParent().getName().startsWith(DELTA_PREFIX));
    }
    else if (filename.startsWith(BUCKET_PREFIX)) {
      if (bucketFile.getParent().getName().startsWith(BASE_PREFIX)) {
        result
            .setOldStyle(false)
            .minimumWriteId(0)
            .maximumWriteId(ParsedBaseLight.parseBase(bucketFile.getParent()).getWriteId())
            .bucket(bucket)
            .writingBase(true);
      } else if (bucketFile.getParent().getName().startsWith(DELTA_PREFIX)) {
        ParsedDeltaLight parsedDelta = ParsedDeltaLight.parse(bucketFile.getParent());
        result
            .setOldStyle(false)
            .minimumWriteId(parsedDelta.minWriteId)
            .maximumWriteId(parsedDelta.maxWriteId)
            .statementId(parsedDelta.statementId)
            .bucket(bucket)
            .attemptId(attemptId);
      } else if (bucketFile.getParent().getName().startsWith(DELETE_DELTA_PREFIX)) {
        ParsedDeltaLight parsedDelta = ParsedDeltaLight.parse(bucketFile.getParent());
        result
            .setOldStyle(false)
            .minimumWriteId(parsedDelta.minWriteId)
            .maximumWriteId(parsedDelta.maxWriteId)
            .statementId(parsedDelta.statementId)
            .bucket(bucket);
      }
    } else {
      result.setOldStyle(true).bucket(bucket).minimumWriteId(0)
          .maximumWriteId(0);
    }
    return result;
  }

  /**
   * If the direct insert is on for ACID tables, the files will contain an "_attemptID" postfix.
   * In order to be able to read the files from the delete deltas, we need to know which
   * attemptId belongs to which delta. To make this lookup easy, this method created a map
   * to link the deltas to the attemptId.
   * @param pathToDeltaMetaData
   * @param deleteDeltaDirs
   * @param bucket
   * @return
   */
  public static Map getDeltaToAttemptIdMap(
      Map pathToDeltaMetaData, Path[] deleteDeltaDirs, int bucket) {
    Map deltaToAttemptId = new HashMap<>();
    for (Path delta : deleteDeltaDirs) {
      AcidInputFormat.DeltaMetaData deltaMetaData = pathToDeltaMetaData.get(delta.getName());
      for (DeltaFileMetaData files : deltaMetaData.getDeltaFiles()) {
        if (bucket == files.getBucketId()) {
          deltaToAttemptId.put(delta.getName(), files.getAttemptId());
          break;
        }
      }
    }
    return deltaToAttemptId;
  }

  //This is used for (full) Acid tables.  InsertOnly use NOT_ACID
  public enum Operation implements Serializable {
    NOT_ACID, INSERT, UPDATE, DELETE;
  }

  /**
   * Logically this should have been defined in Operation but that causes a dependency
   * on metastore package from exec jar (from the cluster) which is not allowed.
   * This method should only be called from client side where metastore.* classes are present.
   * Not following this will not be caught by unit tests since they have all the jar loaded.
   */
  public static DataOperationType toDataOperationType(Operation op) {
    switch (op) {
      case NOT_ACID:
        return DataOperationType.UNSET;
      case INSERT:
        return DataOperationType.INSERT;
      case UPDATE:
        return DataOperationType.UPDATE;
      case DELETE:
        return DataOperationType.DELETE;
      default:
        throw new IllegalArgumentException("Unexpected Operation: " + op);
    }
  }
  public enum AcidBaseFileType {
  /**
   * File w/o Acid meta columns.  This this would be the case for files that were added to the table
   * before it was converted to Acid but not yet major compacted.  May also be the the result of
   * Load Data statement on an acid table.
   */
  ORIGINAL_BASE,
  /**
   * File that has Acid metadata columns embedded in it.  Found in base_x/ or delta_x_y/.
   */
  ACID_SCHEMA,
  }

  /**
   * A simple wrapper class that stores the information about a base file and its type.
   * Orc splits can be generated on three kinds of base files: an original file (non-acid converted
   * files), a regular base file (created by major compaction) or an insert delta (which can be
   * treated as a base when split-update is enabled for acid).
   */
  public static class FileInfo {
    final private HdfsFileStatusWithId fileId;
    final private AcidBaseFileType acidBaseFileType;

    public FileInfo(HdfsFileStatusWithId fileId, AcidBaseFileType acidBaseFileType) {
      this.fileId = fileId;
      this.acidBaseFileType = acidBaseFileType;
    }

    public boolean isOriginal() {
      return this.acidBaseFileType == AcidBaseFileType.ORIGINAL_BASE;
    }

    public HdfsFileStatusWithId getHdfsFileStatusWithId() {
      return this.fileId;
    }
  }

  /**
   * Current syntax for creating full acid transactional tables is any one of following 3 ways:
   * create table T (a int, b int) stored as orc tblproperties('transactional'='true').
   * create table T (a int, b int) stored as orc tblproperties('transactional'='true',
   * 'transactional_properties'='default').
   * create table T (a int, b int) stored as orc tblproperties('transactional'='true',
   * 'transactional_properties'='split_update').
   * These are all identical and create a table capable of insert/update/delete/merge operations
   * with full ACID semantics at Snapshot Isolation.  These tables require ORC input/output format.
   *
   * To create a 1/4 acid, aka Micro Managed table:
   * create table T (a int, b int) stored as orc tblproperties('transactional'='true',
   * 'transactional_properties'='insert_only').
   * These tables only support insert operation (also with full ACID semantics at SI).
   *
   */
  public static class AcidOperationalProperties {
    private int description = 0x00;
    public static final int SPLIT_UPDATE_BIT = 0x01;
    public static final String SPLIT_UPDATE_STRING = "split_update";
    public static final int HASH_BASED_MERGE_BIT = 0x02;
    public static final String HASH_BASED_MERGE_STRING = "hash_merge";
    public static final int INSERT_ONLY_BIT = 0x04;
    public static final int INSERT_ONLY_FETCH_BUCKET_ID_BIT = 0x08;
    public static final int FETCH_DELETED_ROWS_BIT = 0x10;
    public static final String INSERT_ONLY_STRING = "insert_only";
    public static final String INSERT_ONLY_FETCH_BUCKET_ID_STRING = "insert_only_fetch_bucket_id";
    public static final String FETCH_DELETED_ROWS_STRING = "fetch_deleted_rows";
    public static final String DEFAULT_VALUE_STRING = TransactionalValidationListener.DEFAULT_TRANSACTIONAL_PROPERTY;
    public static final String INSERTONLY_VALUE_STRING = TransactionalValidationListener.INSERTONLY_TRANSACTIONAL_PROPERTY;

    private AcidOperationalProperties() {
    }


    /**
     * Returns an acidOperationalProperties object that represents default ACID behavior for tables
     * that do no explicitly specify/override the default behavior.
     * @return the acidOperationalProperties object.
     */
    public static AcidOperationalProperties getDefault() {
      AcidOperationalProperties obj = new AcidOperationalProperties();
      obj.setSplitUpdate(true);
      obj.setHashBasedMerge(false);
      obj.setInsertOnly(false);
      return obj;
    }

    /**
     * Returns an acidOperationalProperties object for tables that uses ACID framework but only
     * supports INSERT operation and does not require ORC or bucketing
     * @return the acidOperationalProperties object
     */
    public static AcidOperationalProperties getInsertOnly() {
      AcidOperationalProperties obj = new AcidOperationalProperties();
      obj.setInsertOnly(true);
      return obj;
    }

    /**
     * Returns an acidOperationalProperties object that is represented by an encoded string.
     * @param propertiesStr an encoded string representing the acidOperationalProperties.
     * @return the acidOperationalProperties object.
     */
    public static AcidOperationalProperties parseString(String propertiesStr) {
      if (propertiesStr == null) {
        return AcidOperationalProperties.getDefault();
      }
      if (propertiesStr.equalsIgnoreCase(DEFAULT_VALUE_STRING)) {
        return AcidOperationalProperties.getDefault();
      }
      if (propertiesStr.equalsIgnoreCase(INSERTONLY_VALUE_STRING)) {
        return AcidOperationalProperties.getInsertOnly();
      }
      AcidOperationalProperties obj = new AcidOperationalProperties();
      String[] options = propertiesStr.split("\\|");
      for (String option : options) {
        if (option.trim().length() == 0) continue; // ignore empty strings
        switch (option) {
          case SPLIT_UPDATE_STRING:
            obj.setSplitUpdate(true);
            break;
          case HASH_BASED_MERGE_STRING:
            obj.setHashBasedMerge(true);
            break;
          default:
            throw new IllegalArgumentException(
                "Unexpected value " + option + " for ACID operational properties!");
        }
      }
      return obj;
    }

    /**
     * Returns an acidOperationalProperties object that is represented by an encoded 32-bit integer.
     * @param properties an encoded 32-bit representing the acidOperationalProperties.
     * @return the acidOperationalProperties object.
     */
    public static AcidOperationalProperties parseInt(int properties) {
      AcidOperationalProperties obj = new AcidOperationalProperties();
      if ((properties & SPLIT_UPDATE_BIT)  > 0) {
        obj.setSplitUpdate(true);
      }
      if ((properties & HASH_BASED_MERGE_BIT)  > 0) {
        obj.setHashBasedMerge(true);
      }
      if ((properties & INSERT_ONLY_BIT) > 0) {
        obj.setInsertOnly(true);
      }
      if ((properties & INSERT_ONLY_FETCH_BUCKET_ID_BIT) > 0) {
        obj.setInsertOnlyFetchBucketId(true);
      }
      if ((properties & FETCH_DELETED_ROWS_BIT) > 0) {
        obj.setFetchDeletedRows(true);
      }
      return obj;
    }

    /**
     * Sets the split update property for ACID operations based on the boolean argument.
     * When split update is turned on, an update ACID event is interpreted as a combination of
     * delete event followed by an update event.
     * @param isSplitUpdate a boolean property that turns on split update when true.
     * @return the acidOperationalProperties object.
     */
    public AcidOperationalProperties setSplitUpdate(boolean isSplitUpdate) {
      return set(isSplitUpdate, SPLIT_UPDATE_BIT);
    }

    /**
     * Sets the hash-based merge property for ACID operations that combines delta files using
     * GRACE hash join based approach, when turned on. (Currently unimplemented!)
     * @param isHashBasedMerge a boolean property that turns on hash-based merge when true.
     * @return the acidOperationalProperties object.
     */
    public AcidOperationalProperties setHashBasedMerge(boolean isHashBasedMerge) {
      return set(isHashBasedMerge, HASH_BASED_MERGE_BIT);
    }

    public AcidOperationalProperties setInsertOnly(boolean isInsertOnly) {
      return set(isInsertOnly, INSERT_ONLY_BIT);
    }

    public AcidOperationalProperties setInsertOnlyFetchBucketId(boolean fetchBucketId) {
      return set(fetchBucketId, INSERT_ONLY_FETCH_BUCKET_ID_BIT);
    }

    public AcidOperationalProperties setFetchDeletedRows(boolean fetchDeletedRows) {
      return set(fetchDeletedRows, FETCH_DELETED_ROWS_BIT);
    }

    private AcidOperationalProperties set(boolean value, int bit) {
      description = (value ? (description | bit) : (description & ~bit));
      return this;
    }

    public boolean isSplitUpdate() {
      return (description & SPLIT_UPDATE_BIT) > 0;
    }

    public boolean isHashBasedMerge() {
      return (description & HASH_BASED_MERGE_BIT) > 0;
    }

    public boolean isInsertOnly() {
      return (description & INSERT_ONLY_BIT) > 0;
    }

    public boolean isFetchBucketId() {
      return (description & INSERT_ONLY_FETCH_BUCKET_ID_BIT) > 0;
    }

    public boolean isFetchDeletedRows() {
      return (description & FETCH_DELETED_ROWS_BIT) > 0;
    }

    public int toInt() {
      return description;
    }

    @Override
    public String toString() {
      StringBuilder str = new StringBuilder();
      if (isSplitUpdate()) {
        str.append("|" + SPLIT_UPDATE_STRING);
      }
      if (isHashBasedMerge()) {
        str.append("|" + HASH_BASED_MERGE_STRING);
      }
      if (isInsertOnly()) {
        str.append("|" + INSERT_ONLY_STRING);
      }
      if (isFetchBucketId()) {
        str.append("|" + INSERT_ONLY_FETCH_BUCKET_ID_STRING);
      }
      if (isFetchBucketId()) {
        str.append("|" + FETCH_DELETED_ROWS_STRING);
      }
      return str.toString();
    }
  }

  public interface Directory {
    List getFiles() throws IOException;
    FileSystem getFs();
    Path getPath();
    List getDeleteDeltas();
  }

  public interface ParsedDirectory {
    public List getFiles(FileSystem fs, Ref useFileIds) throws IOException;
  }

  /**
   * Since version 3 but prior to version 4, format of a base is "base_X" where X is a writeId.
   * If this base was produced by a compactor, X is the highest writeId that the compactor included.
   * If this base is produced by Insert Overwrite stmt, X is a writeId of the transaction that
   * executed the insert.
   * Since Hive Version 4.0, the format of a base produced by a compactor is
   * base_X_vY.  X is like before, i.e. the highest writeId compactor included and Y is the
   * visibilityTxnId of the transaction in which the compactor ran.
   * (v(isibility) is a literal to help parsing).
   */
  public static class ParsedBaseLight {
    protected final long writeId;
    protected final long visibilityTxnId;
    protected final Path baseDirPath;

    ParsedBaseLight(long writeId, Path baseDirPath) {
      this(writeId, 0, baseDirPath);
    }
    ParsedBaseLight(long writeId, long visibilityTxnId, Path baseDirPath) {
      this.writeId = writeId;
      this.visibilityTxnId = visibilityTxnId;
      this.baseDirPath = baseDirPath;
    }
    public long getWriteId() {
      return writeId;
    }
    public long getVisibilityTxnId() {
      return visibilityTxnId;
    }
    public Path getBaseDirPath() {
      return baseDirPath;
    }

    public static ParsedBaseLight parseBase(Path path) {
      String filename = path.getName();
      if (!filename.startsWith(BASE_PREFIX)) {
        throw new IllegalArgumentException(filename + " does not start with " + BASE_PREFIX);
      }
      int idxOfv = filename.indexOf(VISIBILITY_PREFIX);
      if (idxOfv < 0) {
        return new ParsedBaseLight(Long.parseLong(filename.substring(BASE_PREFIX.length())), path);
      }
      return new ParsedBaseLight(Long.parseLong(filename.substring(BASE_PREFIX.length(), idxOfv)),
          Long.parseLong(filename.substring(idxOfv + VISIBILITY_PREFIX.length())), path);
    }

    @Override
    public String toString() {
      return "Path: " + baseDirPath + "; writeId: " + writeId + "; visibilityTxnId: " + visibilityTxnId;
    }
  }
  /**
   * In addition to {@link ParsedBaseLight} this knows if the data is in raw format, i.e. doesn't
   * have acid metadata columns embedded in the files.  To determine this in some cases
   * requires looking at the footer of the data file which can be expensive so if this info is
   * not needed {@link ParsedBaseLight} should be used.
   */
  public static final class ParsedBase extends ParsedBaseLight implements ParsedDirectory {

    private boolean rawFormat;
    private List files;

    ParsedBase(ParsedBaseLight pb, List files) {
      super(pb.writeId, pb.visibilityTxnId, pb.baseDirPath);
      this.files = files;
    }

    public boolean isRawFormat() {
      return rawFormat;
    }

    public void setRawFormat(boolean rawFormat) {
      this.rawFormat = rawFormat;
    }

    /**
     * Returns the files from the base directory.
     * The list is either populated by AcidUtils or it will be listed through the provided FileSystem object.
     * If the list was not filled and no FS is provided, returns null.
     * @param fs FileSystem optional
     * @param useFileIds to use fileId based listing or not. Optional
     * @return list of files in the base directory
     * @throws IOException ex
     */
    public List getFiles(FileSystem fs, Ref useFileIds) throws IOException {
      // If the list was not populated before, do it now
      if (files == null && fs != null) {
        files = HdfsUtils.listFileStatusWithId(fs, baseDirPath, useFileIds, false, HIDDEN_FILES_PATH_FILTER);
      }
      return files;
    }

    public void setFiles(List files) {
      this.files = files;
    }
    @Override
    public String toString() {
      return super.toString() + "; rawFormat: " + rawFormat;
    }
  }

  /**
   * In addition to {@link ParsedDeltaLight} this knows if the data is in raw format, i.e. doesn't
   * have acid metadata columns embedded in the files.  To determine this in some cases
   * requires looking at the footer of the data file which can be expensive so if this info is
   * not needed {@link ParsedDeltaLight} should be used.
   */
  @Immutable
  public static final class ParsedDelta extends ParsedDeltaLight implements ParsedDirectory {
    private final boolean isRawFormat;
    private List files;

    private ParsedDelta(ParsedDeltaLight delta, boolean isRawFormat, List files) {
      super(delta.minWriteId, delta.maxWriteId, delta.path, delta.statementId, delta.isDeleteDelta, delta.visibilityTxnId);
      this.isRawFormat = isRawFormat;
      this.files = files;
    }
    /**
     * Files w/o Acid meta columns embedded in the file. See {@link AcidBaseFileType#ORIGINAL_BASE}
     */
    public boolean isRawFormat() {
      return isRawFormat;
    }

    /**
     * Returns the files from the delta directory.
     * The list is either populated by AcidUtils or it will be listed through the provided FileSystem object.
     * If the list was not filled and no FS is provided, returns null.
     * @param fs FileSystem optional
     * @param useFileIds to use fileId based listing or not. Optional
     * @return list of files in the delta directory
     * @throws IOException ex
     */
    public List getFiles(FileSystem fs, Ref useFileIds) throws IOException {
      // If the list was not populated before, do it now
      if (files == null && fs != null) {
        files = HdfsUtils.listFileStatusWithId(fs, path, useFileIds, false, isRawFormat() ? AcidUtils.originalBucketFilter : AcidUtils.bucketFileFilter);
      }
      return files;
    }
  }
  /**
   * This encapsulates info obtained form the file path.
   * See also {@link ParsedDelta}.
   */
  @Immutable
  public static class ParsedDeltaLight implements Comparable {
    final long minWriteId;
    final long maxWriteId;
    final Path path;
    //-1 is for internal (getAcidState()) purposes and means the delta dir
    //had no statement ID
    final int statementId;
    final boolean isDeleteDelta; // records whether delta dir is of type 'delete_delta_x_y...'
    /**
     * transaction Id of txn which created this delta.  This dir should be considered
     * invisible unless this txn is committed
     *
     * TODO: define TransactionallyVisible interface - add getVisibilityTxnId() etc and all comments
     * use in {@link ParsedBaseLight}, {@link ParsedDelta}, {@link AcidInputFormat.Options}, AcidInputFormat.DeltaMetaData etc
     */
    final long visibilityTxnId;

    public static ParsedDeltaLight parse(Path deltaDir) {
      String filename = deltaDir.getName();
      int idxOfVis = filename.indexOf(VISIBILITY_PREFIX);
      long visibilityTxnId = 0; // visibilityTxnId:0 is always visible
      if (idxOfVis >= 0) {
        visibilityTxnId = Long.parseLong(filename.substring(idxOfVis + VISIBILITY_PREFIX.length()));
        filename = filename.substring(0, idxOfVis);
      }
      boolean isDeleteDelta = filename.startsWith(DELETE_DELTA_PREFIX);
      String rest = filename.substring((isDeleteDelta ? DELETE_DELTA_PREFIX : DELTA_PREFIX).length());
      int split = rest.indexOf('_');
      // split2 may be -1 if no statementId
      int split2 = rest.indexOf('_', split + 1);
      long min = Long.parseLong(rest.substring(0, split));
      long max =
          split2 == -1 ? Long.parseLong(rest.substring(split + 1)) : Long.parseLong(rest.substring(split + 1, split2));
      if (split2 == -1) {
        // pre 1.3.x delta files
        return new ParsedDeltaLight(min, max, deltaDir, -1, isDeleteDelta, visibilityTxnId);
      }
      int statementId = Integer.parseInt(rest.substring(split2 + 1));
      return new ParsedDeltaLight(min, max, deltaDir, statementId, isDeleteDelta, visibilityTxnId);
    }

    private ParsedDeltaLight(long min, long max, Path path, int statementId,
        boolean isDeleteDelta, long visibilityTxnId) {
      this.minWriteId = min;
      this.maxWriteId = max;
      this.path = path;
      this.statementId = statementId;
      this.isDeleteDelta = isDeleteDelta;
      this.visibilityTxnId = visibilityTxnId;
    }

    public long getMinWriteId() {
      return minWriteId;
    }

    public long getMaxWriteId() {
      return maxWriteId;
    }

    public Path getPath() {
      return path;
    }

    public boolean hasStatementId() {
      return statementId >= 0;
    }

    public int getStatementId() {
      return hasStatementId() ? statementId : 0;
    }

    public boolean isDeleteDelta() {
      return isDeleteDelta;
    }
    public long getVisibilityTxnId() {
      return visibilityTxnId;
    }
    /**
     * Only un-compacted delta_x_y (x != y) (created by streaming ingest with batch size > 1)
     * may contain a {@link OrcAcidUtils#getSideFile(Path)}.
     * @return
     */
    boolean mayContainSideFile() {
      return !isDeleteDelta() && getMinWriteId() != getMaxWriteId() && getVisibilityTxnId() <= 0;
    }
    /**
     * Compactions (Major/Minor) merge deltas/bases but delete of old files
     * happens in a different process; thus it's possible to have bases/deltas with
     * overlapping writeId boundaries.  The sort order helps figure out the "best" set of files
     * to use to get data.
     * This sorts "wider" delta before "narrower" i.e. delta_5_20 sorts before delta_5_10 (and delta_11_20)
     */
    @Override
    public int compareTo(ParsedDeltaLight parsedDelta) {
      if (minWriteId != parsedDelta.minWriteId) {
        if (minWriteId < parsedDelta.minWriteId) {
          return -1;
        } else {
          return 1;
        }
      } else if (maxWriteId != parsedDelta.maxWriteId) {
        if (maxWriteId < parsedDelta.maxWriteId) {
          return 1;
        } else {
          return -1;
        }
      }
      else if(statementId != parsedDelta.statementId) {
        /**
         * We want deltas after minor compaction (w/o statementId) to sort
         * earlier so that getAcidState() considers compacted files (into larger ones) obsolete
         * Before compaction, include deltas with all statementIds for a given writeId
         * in a {@link AcidDirectory}
         */
        if(statementId < parsedDelta.statementId) {
          return -1;
        }
        else {
          return 1;
        }
      }
      else {
        return path.compareTo(parsedDelta.path);
      }
    }
  }

  /**
   * Convert a list of deltas to a list of delta directories.
   * @param deltas the list of deltas out of a Directory object.
   * @return a list of delta directory paths that need to be read
   */
  public static Path[] getPaths(List deltas) {
    Path[] result = new Path[deltas.size()];
    for(int i=0; i < result.length; ++i) {
      result[i] = deltas.get(i).getPath();
    }
    return result;
  }

  /**
   * Convert the list of deltas into an equivalent list of begin/end
   * write id pairs.  Assumes {@code deltas} is sorted.
   * @param deltas sorted delete delta list
   * @param fs FileSystem
   * @return the list of write ids to serialize
   * @throws IOException ex
   */
  public static List serializeDeleteDeltas(List deltas, FileSystem fs) throws IOException {
    List result = new ArrayList<>(deltas.size());
    AcidInputFormat.DeltaMetaData last = null;
    for (ParsedDelta parsedDelta : deltas) {
      assert parsedDelta.isDeleteDelta() : "expected delete_delta, got " + parsedDelta.getPath();
      final Integer stmtId = parsedDelta.statementId >= 0 ? parsedDelta.statementId : null;
      if ((last != null)
              && (last.getMinWriteId() == parsedDelta.getMinWriteId())
              && (last.getMaxWriteId() == parsedDelta.getMaxWriteId())) {
        if (stmtId != null) {
          last.getStmtIds().add(stmtId);
        }
        for (HadoopShims.HdfsFileStatusWithId fileStatus : parsedDelta.getFiles(fs, null)) {
          last.getDeltaFiles().add(new AcidInputFormat.DeltaFileMetaData(fileStatus, stmtId, parseBucketId(fileStatus.getFileStatus().getPath())));
        }
      } else {
        List stmtIds = new ArrayList<>();
        if (stmtId != null) {
          stmtIds.add(stmtId);
        }
        last = new AcidInputFormat.DeltaMetaData(parsedDelta.getMinWriteId(), parsedDelta.getMaxWriteId(),
            stmtIds, parsedDelta.getVisibilityTxnId(), parsedDelta.getFiles(fs, null).stream()
            .map(file -> new AcidInputFormat.DeltaFileMetaData(file, stmtId, parseBucketId(file.getFileStatus().getPath())))
            .collect(Collectors.toList()));
        result.add(last);
      }
    }

    return result;
  }

  /**
   * Convert the list of begin/end write id pairs to a list of delete delta
   * directories.  Note that there may be multiple delete_delta files for the exact same txn range starting
   * with 2.2.x;
   * see {@link org.apache.hadoop.hive.ql.io.AcidUtils#deltaSubdir(long, long, int)}
   * @param root the root directory
   * @param deleteDeltas list of begin/end write id pairs
   * @return the list of delta paths
   */
  public static Path[] deserializeDeleteDeltas(Path root, final List deleteDeltas,
      Map pathToDeltaMetaData) {
    List results = new ArrayList<>(deleteDeltas.size());
    for (AcidInputFormat.DeltaMetaData dmd : deleteDeltas) {
      results.addAll(dmd.getPaths(root).stream().map(Pair::getLeft).collect(Collectors.toList()));
      if (pathToDeltaMetaData != null) {
        for (Pair pathPair : dmd.getPaths(root)) {
          pathToDeltaMetaData.put(pathPair.getLeft().getName(), dmd);
        }
      }
    }
    return results.toArray(new Path[results.size()]);
  }

  /**
   * This will look at a footer of one of the files in the delta to see if the
   * file is in Acid format, i.e. has acid metadata columns.  The assumption is
   * that for any dir, either all files are acid or all are not.
   */
  public static ParsedDelta parsedDelta(Path deltaDir, FileSystem fs) throws IOException {
    return parsedDelta(deltaDir, fs, null);
  }

  private static ParsedDelta parsedDelta(Path deltaDir, FileSystem fs, HdfsDirSnapshot dirSnapshot)
      throws IOException {
    ParsedDeltaLight deltaLight = ParsedDeltaLight.parse(deltaDir);
    //small optimization - delete delta can't be in raw format
    boolean isRawFormat = !deltaLight.isDeleteDelta && MetaDataFile.isRawFormat(deltaDir, fs, dirSnapshot);
    List files = null;
    if (dirSnapshot != null) {
      final PathFilter filter = isRawFormat ? AcidUtils.originalBucketFilter : AcidUtils.bucketFileFilter;
      // If we already know the files, store it for future use
      files = dirSnapshot.getFiles().stream()
          .filter(fileStatus -> filter.accept(fileStatus.getPath()))
          .map(HdfsFileStatusWithoutId::new)
          .collect(Collectors.toList());
    }
    return new ParsedDelta(deltaLight, isRawFormat, files);
  }

  /**
   * Is the given directory in ACID format?
   * @param directory the partition directory to check
   * @param conf the query configuration
   * @return true, if it is an ACID directory
   * @throws IOException
   */
  public static boolean isAcid(Path directory, Configuration conf) throws IOException {
    return isAcid(null, directory, conf);
  }

  public static boolean isAcid(FileSystem fileSystem, Path directory,
                               Configuration conf) throws IOException {
    FileSystem fs = fileSystem == null ? directory.getFileSystem(conf) : fileSystem;
    for(FileStatus file: fs.listStatus(directory)) {
      String filename = file.getPath().getName();
      if (filename.startsWith(BASE_PREFIX) ||
          filename.startsWith(DELTA_PREFIX) ||
          filename.startsWith(DELETE_DELTA_PREFIX)) {
        if (file.isDirectory()) {
          return true;
        }
      }
    }
    return false;
  }


  /**
   * Get the ACID state of the given directory. It finds the minimal set of
   * base and diff directories. Note that because major compactions don't
   * preserve the history, we can't use a base directory that includes a
   * write id that we must exclude.
   * @param fileSystem optional, it it is not provided, it will be derived from the candidateDirectory
   * @param candidateDirectory the partition directory to analyze
   * @param conf the configuration
   * @param writeIdList the list of write ids that we are reading
   * @param useFileIds It will be set to true, if the FileSystem supports listing with fileIds
   * @param ignoreEmptyFiles Ignore files with 0 length
   * @return the state of the directory
   * @throws IOException on filesystem errors
   */
  public static AcidDirectory getAcidState(FileSystem fileSystem, Path candidateDirectory, Configuration conf,
      ValidWriteIdList writeIdList, Ref useFileIds, boolean ignoreEmptyFiles) throws IOException {
    return getAcidState(fileSystem, candidateDirectory, conf, writeIdList, useFileIds, ignoreEmptyFiles, null);
  }

  /**
   * GetAcidState implementation which uses the provided dirSnapshot.
   * Generates a new one if needed and the provided one is null.
   * @param fileSystem optional, it it is not provided, it will be derived from the candidateDirectory
   * @param candidateDirectory the partition directory to analyze
   * @param conf the configuration
   * @param writeIdList the list of write ids that we are reading
   * @param useFileIds It will be set to true, if the FileSystem supports listing with fileIds
   * @param ignoreEmptyFiles Ignore files with 0 length
   * @param dirSnapshots The listed directory snapshot, if null new will be generated
   * @return the state of the directory
   * @throws IOException on filesystem errors
   */
  public static AcidDirectory getAcidState(FileSystem fileSystem, Path candidateDirectory, Configuration conf,
      ValidWriteIdList writeIdList, Ref useFileIds, boolean ignoreEmptyFiles, Map dirSnapshots) throws IOException {
    ValidTxnList validTxnList = getValidTxnList(conf);

    FileSystem fs = fileSystem == null ? candidateDirectory.getFileSystem(conf) : fileSystem;
    AcidDirectory directory = new AcidDirectory(candidateDirectory, fs, useFileIds);

    List childrenWithId = HdfsUtils.tryListLocatedHdfsStatus(useFileIds, fs, candidateDirectory, HIDDEN_FILES_PATH_FILTER);

    if (childrenWithId != null) {
      for (HdfsFileStatusWithId child : childrenWithId) {
        getChildState(directory, child, writeIdList,validTxnList, ignoreEmptyFiles);
      }
    } else {
      if (dirSnapshots == null) {
        dirSnapshots = getHdfsDirSnapshots(fs, candidateDirectory);
      }
      getChildState(directory, dirSnapshots, writeIdList, validTxnList, ignoreEmptyFiles);
    }
    // If we have a base, the original files are obsolete.
    if (directory.getBase() != null) {
      // Add original files to obsolete list if any
      for (HdfsFileStatusWithId fswid : directory.getOriginalFiles()) {
        directory.getObsolete().add(fswid.getFileStatus().getPath());
      }
      // Add original directories to obsolete list if any
      directory.getObsolete().addAll(directory.getOriginalDirectories());
      // remove the entries so we don't get confused later and think we should
      // use them.
      directory.getOriginalFiles().clear();
      directory.getOriginalDirectories().clear();
    } else {
      // Okay, we're going to need these originals.
      // Recurse through them and figure out what we really need.
      // If we already have the original list, do nothing
      // If childrenWithId != null, we would have already populated "original"
      if (childrenWithId != null) {
        for (Path origDir : directory.getOriginalDirectories()) {
          directory.getOriginalFiles().addAll(HdfsUtils.listFileStatusWithId(fs, origDir, useFileIds, true, null));
        }
      }
    }
    // Filter out all delta directories that are shadowed by others
    findBestWorkingDeltas(writeIdList, directory);

    if(directory.getOldestBase() != null && directory.getBase() == null &&
        isCompactedBase(directory.getOldestBase(), fs, dirSnapshots)) {
      /*
       * If here, it means there was a base_x (> 1 perhaps) but none were suitable for given
       * {@link writeIdList}.  Note that 'original' files are logically a base_Long.MIN_VALUE and thus
       * cannot have any data for an open txn.  We could check {@link deltas} has files to cover
       * [1,n] w/o gaps but this would almost never happen...
       *
       * We only throw for base_x produced by Compactor since that base erases all history and
       * cannot be used for a client that has a snapshot in which something inside this base is
       * open.  (Nor can we ignore this base of course)  But base_x which is a result of IOW,
       * contains all history so we treat it just like delta wrt visibility.  Imagine, IOW which
       * aborts. It creates a base_x, which can and should just be ignored.*/
      long[] exceptions = writeIdList.getInvalidWriteIds();
      String minOpenWriteId = exceptions != null && exceptions.length > 0 ?
        Long.toString(exceptions[0]) : "x";
      throw new IOException(ErrorMsg.ACID_NOT_ENOUGH_HISTORY.format(
        Long.toString(writeIdList.getHighWatermark()),
              minOpenWriteId, directory.getOldestBase().toString()));
    }

    Path basePath = directory.getBaseDirectory();
    if (basePath != null) {
      boolean isBaseInRawFormat = MetaDataFile.isRawFormat(basePath, fs, dirSnapshots != null ? dirSnapshots.get(basePath) : null);
      directory.getBase().setRawFormat(isBaseInRawFormat);
    }
    LOG.debug("in directory " + candidateDirectory.toUri().toString() + " base = " + basePath + " deltas = " +
        directory.getCurrentDirectories().size());
    /*
     * If this sort order is changed and there are tables that have been converted to transactional
     * and have had any update/delete/merge operations performed but not yet MAJOR compacted, it
     * may result in data loss since it may change how
     * {@link org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.OriginalReaderPair} assigns
     * {@link RecordIdentifier#rowId} for read (that have happened) and compaction (yet to happen).
     */
    // this does "Path.uri.compareTo(that.uri)"
    directory.getOriginalFiles().sort(Comparator.comparing(HdfsFileStatusWithId::getFileStatus));
    return directory;
  }

  private static void findBestWorkingDeltas(ValidWriteIdList writeIdList, AcidDirectory directory) {
    Collections.sort(directory.getCurrentDirectories());
    //so now, 'current directories' should be sorted like delta_5_20 delta_5_10 delta_11_20 delta_51_60 for example
    //and we want to end up with the best set containing all relevant data: delta_5_20 delta_51_60,
    //subject to list of 'exceptions' in 'writeIdList' (not show in above example).
    List deltas = new ArrayList<>();
    long current = directory.getBase() == null ? 0 : directory.getBase().getWriteId();
    int lastStmtId = -1;
    ParsedDelta prev = null;
    for(ParsedDelta next: directory.getCurrentDirectories()) {
      if (next.maxWriteId > current) {
        // are any of the new transactions ones that we care about?
        if (writeIdList.isWriteIdRangeValid(current+1, next.maxWriteId) !=
                ValidWriteIdList.RangeResponse.NONE) {
          deltas.add(next);
          current = next.maxWriteId;
          lastStmtId = next.statementId;
          prev = next;
        }
      }
      else if(next.maxWriteId == current && lastStmtId >= 0) {
        //make sure to get all deltas within a single transaction;  multi-statement txn
        //generate multiple delta files with the same txnId range
        //of course, if maxWriteId has already been minor compacted, all per statement deltas are obsolete
        deltas.add(next);
        prev = next;
      }
      else if (prev != null && next.maxWriteId == prev.maxWriteId
                  && next.minWriteId == prev.minWriteId
                  && next.statementId == prev.statementId) {
        // The 'next' parsedDelta may have everything equal to the 'prev' parsedDelta, except
        // the path. This may happen when we have split update and we have two types of delta
        // directories- 'delta_x_y' and 'delete_delta_x_y' for the SAME txn range.

        // Also note that any delete_deltas in between a given delta_x_y range would be made
        // obsolete. For example, a delta_30_50 would make delete_delta_40_40 obsolete.
        // This is valid because minor compaction always compacts the normal deltas and the delete
        // deltas for the same range. That is, if we had 3 directories, delta_30_30,
        // delete_delta_40_40 and delta_50_50, then running minor compaction would produce
        // delta_30_50 and delete_delta_30_50.

        deltas.add(next);
        prev = next;
      }
      else {
        directory.getObsolete().add(next.path);
      }
    }
    directory.getCurrentDirectories().clear();
    directory.getCurrentDirectories().addAll(deltas);
  }

  private static ValidTxnList getValidTxnList(Configuration conf) {
    ValidTxnList validTxnList = null;
    String s = conf.get(ValidTxnList.VALID_TXNS_KEY);
    if(!Strings.isNullOrEmpty(s)) {
      /*
       * getAcidState() is sometimes called on non-transactional tables, e.g.
       * OrcInputFileFormat.FileGenerator.callInternal().  e.g. orc_merge3.q In that case
       * writeIdList is bogus - doesn't even have a table name.
       * see https://issues.apache.org/jira/browse/HIVE-20856.
       *
       * For now, assert that ValidTxnList.VALID_TXNS_KEY is set only if this is really a read
       * of a transactional table.
       */
      validTxnList = new ValidReadTxnList();
      validTxnList.readFromString(s);
    }
    return validTxnList;
  }


  /**
   * In case of the cleaner, we don't need to go into file level, it is enough to collect base/delta/deletedelta directories.
   *
   * @param fs the filesystem used for the directory lookup
   * @param path the path of the table or partition needs to be cleaned
   * @return The listed directory snapshot needs to be checked for cleaning
   * @throws IOException on filesystem errors
   */
  public static Map getHdfsDirSnapshotsForCleaner(final FileSystem fs, final Path path)
          throws IOException {
    Map dirToSnapshots = new HashMap<>();
    Deque> stack = new ArrayDeque<>();
    stack.push(FileUtils.listStatusIterator(fs, path, acidHiddenFileFilter));
    while (!stack.isEmpty()) {
      RemoteIterator itr = stack.pop();
      while (itr.hasNext()) {
        FileStatus fStatus = itr.next();
        Path fPath = fStatus.getPath();
        if (baseFileFilter.accept(fPath) ||
                deltaFileFilter.accept(fPath) ||
                deleteEventDeltaDirFilter.accept(fPath)) {
          addToSnapshot(dirToSnapshots, fPath);
        } else {
          if (fStatus.isDirectory()) {
            stack.push(FileUtils.listStatusIterator(fs, fPath, acidHiddenFileFilter));
          } else {
            // Found an original file
            HdfsDirSnapshot hdfsDirSnapshot = addToSnapshot(dirToSnapshots, fPath.getParent());
            hdfsDirSnapshot.addFile(fStatus);
          }
        }
      }
    }
    return dirToSnapshots;
  }

  private static HdfsDirSnapshot addToSnapshot(Map dirToSnapshots, Path fPath) {
    HdfsDirSnapshot dirSnapshot = dirToSnapshots.get(fPath);
    if (dirSnapshot == null) {
      dirSnapshot = new HdfsDirSnapshotImpl(fPath);
      dirToSnapshots.put(fPath, dirSnapshot);
    }
    return dirSnapshot;
  }

  public static Map getHdfsDirSnapshots(final FileSystem fs, final Path path)
      throws IOException {
    Map dirToSnapshots = new HashMap<>();
    Deque> stack = new ArrayDeque<>();
    stack.push(FileUtils.listStatusIterator(fs, path, acidHiddenFileFilter));
    while (!stack.isEmpty()) {
      RemoteIterator itr = stack.pop();
      while (itr.hasNext()) {
        FileStatus fStatus = itr.next();
        Path fPath = fStatus.getPath();
        if (fStatus.isDirectory()) {
          stack.push(FileUtils.listStatusIterator(fs, fPath, acidHiddenFileFilter));
        } else {
          Path parentDirPath = fPath.getParent();
          if (acidTempDirFilter.accept(parentDirPath)) {
            while (isChildOfDelta(parentDirPath, path)) {
              // Some cases there are other directory layers between the delta and the datafiles
              // (export-import mm table, insert with union all to mm table, skewed tables).
              // But it does not matter for the AcidState, we just need the deltas and the data files
              // So build the snapshot with the files inside the delta directory
              parentDirPath = parentDirPath.getParent();
            }
            HdfsDirSnapshot dirSnapshot = addToSnapshot(dirToSnapshots, parentDirPath);
            // We're not filtering out the metadata file and acid format file,
            // as they represent parts of a valid snapshot
            // We're not using the cached values downstream, but we can potentially optimize more in a follow-up task
            if (fStatus.getPath().toString().contains(MetaDataFile.METADATA_FILE)) {
              dirSnapshot.addMetadataFile(fStatus);
            } else if (fStatus.getPath().toString().contains(OrcAcidVersion.ACID_FORMAT)) {
              dirSnapshot.addOrcAcidFormatFile(fStatus);
            } else {
              dirSnapshot.addFile(fStatus);
            }
          }
        }
      }
    }
    return dirToSnapshots;
  }

  public static boolean isChildOfDelta(Path childDir, Path rootPath) {
    if (childDir.toUri().toString().length() <= rootPath.toUri().toString().length()) {
      return false;
    }
    // We do not want to look outside the original directory
    String fullName = childDir.toUri().toString().substring(rootPath.toUri().toString().length() + 1);
    String dirName = childDir.getName();
    return !dirName.startsWith(BASE_PREFIX) && !dirName.startsWith(DELTA_PREFIX) && !dirName.startsWith(DELETE_DELTA_PREFIX)
          && (fullName.contains(BASE_PREFIX) || fullName.contains(DELTA_PREFIX) || fullName.contains(DELETE_DELTA_PREFIX));
  }

  /**
   * DFS dir listing.
   * Captures a dir and the corresponding list of files it contains,
   * with additional properties about the dir (like isBase etc)
   *
   */
  public interface HdfsDirSnapshot {
    public Path getPath();

    public void addOrcAcidFormatFile(FileStatus fStatus);

    public FileStatus getOrcAcidFormatFile();

    public void addMetadataFile(FileStatus fStatus);

    public FileStatus getMetadataFile();

    // Get the list of files if any within this directory
    public List getFiles();

    public void addFile(FileStatus file);

    // File id or null
    public Long getFileId();

    public Boolean isRawFormat();

    public void setIsRawFormat(boolean isRawFormat);

    public Boolean isBase();

    public void setIsBase(boolean isBase);

    Boolean isValidBase();

    public void setIsValidBase(boolean isValidBase);

    Boolean isCompactedBase();

    public void setIsCompactedBase(boolean isCompactedBase);

    boolean contains(Path path);
  }

  public static class HdfsDirSnapshotImpl implements HdfsDirSnapshot {
    private Path dirPath;
    private FileStatus metadataFStatus = null;
    private FileStatus orcAcidFormatFStatus = null;
    private List files = new ArrayList();
    private Long fileId = null;
    private Boolean isRawFormat = null;
    private Boolean isBase = null;
    private Boolean isValidBase = null;
    private Boolean isCompactedBase = null;

    public HdfsDirSnapshotImpl(Path path, List files) {
      this.dirPath = path;
      this.files = files;
    }

    public HdfsDirSnapshotImpl(Path path) {
      this.dirPath = path;
    }

    @Override
    public Path getPath() {
      return dirPath;
    }

    @Override
    public List getFiles() {
      return files;
    }

    @Override
    public void addFile(FileStatus file) {
      files.add(file);
    }

    @Override
    public Long getFileId() {
      return fileId;
    }

    @Override
    public Boolean isRawFormat() {
      return isRawFormat;
    }

    @Override
    public void setIsRawFormat(boolean isRawFormat) {
       this.isRawFormat = isRawFormat;
    }

    @Override
    public Boolean isBase() {
      return isBase;
    }

    @Override
    public Boolean isValidBase() {
      return isValidBase;
    }

    @Override
    public Boolean isCompactedBase() {
      return isCompactedBase;
    }

    @Override
    public void setIsBase(boolean isBase) {
      this.isBase = isBase;
    }

    @Override
    public void setIsValidBase(boolean isValidBase) {
      this.isValidBase = isValidBase;
    }

    @Override
    public void setIsCompactedBase(boolean isCompactedBase) {
      this.isCompactedBase = isCompactedBase;
    }

    @Override
    public void addOrcAcidFormatFile(FileStatus fStatus) {
      this.orcAcidFormatFStatus = fStatus;
    }

    @Override
    public FileStatus getOrcAcidFormatFile() {
      return orcAcidFormatFStatus;
    }

    @Override
    public void addMetadataFile(FileStatus fStatus) {
      this.metadataFStatus = fStatus;
    }

    @Override
    public FileStatus getMetadataFile() {
      return metadataFStatus;
    }

    @Override
    public boolean contains(Path path) {
      for (FileStatus fileStatus: getFiles()) {
        if (fileStatus.getPath().equals(path)) {
          return true;
        }
      }
      return false;
    }

    @Override
    public String toString() {
      StringBuilder sb = new StringBuilder();
      sb.append("Path: " + dirPath);
      sb.append("; ");
      sb.append("Files: { ");
      for (FileStatus fStatus : files) {
        sb.append(fStatus);
        sb.append(", ");
      }
      sb.append(" }");
      return sb.toString();
    }
  }

  /**
   * We can only use a 'base' if it doesn't have an open txn (from specific reader's point of view)
   * A 'base' with open txn in its range doesn't have 'enough history' info to produce a correct
   * snapshot for this reader.
   * Note that such base is NOT obsolete.  Obsolete files are those that are "covered" by other
   * files within the snapshot.
   * A base produced by Insert Overwrite is different.  Logically it's a delta file but one that
   * causes anything written previously to be ignored (hence the overwrite).  In this case, base_x
   * is visible if writeid:x is committed for current reader.
   */
  private static boolean isValidBase(ParsedBaseLight parsedBase, ValidWriteIdList writeIdList, FileSystem fs,
      HdfsDirSnapshot dirSnapshot) throws IOException {
    boolean isValidBase;
    if (dirSnapshot != null && dirSnapshot.isValidBase() != null) {
      isValidBase = dirSnapshot.isValidBase();
    } else {
      if (parsedBase.getWriteId() == Long.MIN_VALUE) {
        //such base is created by 1st compaction in case of non-acid to acid table conversion
        //By definition there are no open txns with id < 1.
        isValidBase = true;
      } else if (writeIdList.getMinOpenWriteId() != null && parsedBase.getWriteId() <= writeIdList
          .getMinOpenWriteId()) {
        isValidBase = true;
      } else if (isCompactedBase(parsedBase, fs, dirSnapshot)) {
        isValidBase = writeIdList.isValidBase(parsedBase.getWriteId());
      } else {
        // if here, it's a result of IOW
        isValidBase = writeIdList.isWriteIdValid(parsedBase.getWriteId());
      }
      if (dirSnapshot != null) {
        dirSnapshot.setIsValidBase(isValidBase);
      }
    }
    return isValidBase;
  }

  /**
   * Returns {@code true} if {@code parsedBase} was created by compaction.
   * As of Hive 4.0 we can tell if a directory is a result of compaction based on the
   * presence of {@link AcidUtils#VISIBILITY_PATTERN} suffix.  Base directories written prior to
   * that, have to rely on the {@link MetaDataFile} in the directory. So look at the filename first
   * since that is the cheaper test.*/
  private static boolean isCompactedBase(ParsedBaseLight parsedBase, FileSystem fs,
      Map snapshotMap) throws IOException {
    return isCompactedBase(parsedBase, fs, snapshotMap != null ? snapshotMap.get(parsedBase.getBaseDirPath()) : null);
  }

  private static boolean isCompactedBase(ParsedBaseLight parsedBase, FileSystem fs,
      HdfsDirSnapshot snapshot) throws IOException {
    return parsedBase.getVisibilityTxnId() > 0 || MetaDataFile.isCompacted(parsedBase.getBaseDirPath(), fs, snapshot);
  }

  private static void getChildState(AcidDirectory directory, HdfsFileStatusWithId childWithId, ValidWriteIdList writeIdList,
      ValidTxnList validTxnList, boolean ignoreEmptyFiles) throws IOException {
    Path childPath = childWithId.getFileStatus().getPath();
    String fn = childPath.getName();
    if (!childWithId.getFileStatus().isDirectory()) {
      if (!ignoreEmptyFiles || childWithId.getFileStatus().getLen() != 0) {
        directory.getOriginalFiles().add(childWithId);
      }
    } else if (fn.startsWith(BASE_PREFIX)) {
      processBaseDir(childPath, writeIdList, validTxnList, directory, null);
    } else if (fn.startsWith(DELTA_PREFIX) || fn.startsWith(DELETE_DELTA_PREFIX)) {
      processDeltaDir(childPath, writeIdList, validTxnList, directory, null);
    } else {
      // This is just the directory.  We need to recurse and find the actual files.  But don't
      // do this until we have determined there is no base.  This saves time.  Plus,
      // it is possible that the cleaner is running and removing these original files,
      // in which case recursing through them could cause us to get an error.
      directory.getOriginalDirectories().add(childPath);
    }
  }

  private static void getChildState(AcidDirectory directory, Map dirSnapshots,
      ValidWriteIdList writeIdList, ValidTxnList validTxnList , boolean ignoreEmptyFiles) throws IOException {
    for (HdfsDirSnapshot dirSnapshot : dirSnapshots.values()) {
      Path dirPath = dirSnapshot.getPath();
      String dirName = dirPath.getName();
      // dirPath may contains the filesystem prefix
      if (dirPath.toString().endsWith(directory.getPath().toString())) {
        // if the candidateDirectory is itself a delta directory, we need to add originals in that directory
        // and return. This is the case when compaction thread calls getChildState.
        for (FileStatus fileStatus : dirSnapshot.getFiles()) {
          if (!ignoreEmptyFiles || fileStatus.getLen() != 0) {
            directory.getOriginalFiles().add(new HdfsFileStatusWithoutId(fileStatus));
          }
        }
      } else if (dirName.startsWith(BASE_PREFIX)) {
        processBaseDir(dirPath, writeIdList, validTxnList, directory, dirSnapshot);
      } else if (dirName.startsWith(DELTA_PREFIX) || dirName.startsWith(DELETE_DELTA_PREFIX)) {
        processDeltaDir(dirPath, writeIdList, validTxnList, directory, dirSnapshot);
      } else {
        directory.getOriginalDirectories().add(dirPath);
        for (FileStatus stat : dirSnapshot.getFiles()) {
          if ((!ignoreEmptyFiles) || (stat.getLen() != 0)) {
            directory.getOriginalFiles().add(new HdfsFileStatusWithoutId(stat));
          }
        }
      }
    }
  }

  private static void processBaseDir(Path baseDir, ValidWriteIdList writeIdList, ValidTxnList validTxnList,
      AcidDirectory directory, AcidUtils.HdfsDirSnapshot dirSnapshot) throws IOException {
    ParsedBaseLight parsedBase = ParsedBaseLight.parseBase(baseDir);
    if (!isDirUsable(baseDir, parsedBase.getVisibilityTxnId(), directory.getAbortedDirectories(), validTxnList)) {
      return;
    }
    final long writeId = parsedBase.getWriteId();
    if (directory.getOldestBase() == null || directory.getOldestBase().writeId > writeId) {
      // keep track for error reporting
      directory.setOldestBase(parsedBase);
    }
    boolean isCompactedBase = isCompactedBase(parsedBase, directory.getFs(), dirSnapshot);
    // Handle aborted IOW base.
    if (writeIdList.isWriteIdAborted(writeId) && !isCompactedBase) {
      directory.getAbortedDirectories().add(baseDir);
      directory.getAbortedWriteIds().add(parsedBase.writeId);
      return;
    }
    if (directory.getBase() == null || directory.getBase().getWriteId() < writeId
      // If there are two competing versions of a particular write-id, one from the compactor and another from IOW,
      // always pick the compactor one once it is committed.
      || directory.getBase().getWriteId() == writeId &&
          isCompactedBase && validTxnList.isTxnValid(parsedBase.getVisibilityTxnId())) {

      if (isValidBase(parsedBase, writeIdList, directory.getFs(), dirSnapshot)) {
        List files = null;
        if (dirSnapshot != null) {
          files = dirSnapshot.getFiles().stream().map(HdfsFileStatusWithoutId::new).collect(Collectors.toList());
        }
        if (directory.getBase() != null) {
          directory.getObsolete().add(directory.getBase().getBaseDirPath());
        }
        directory.setBase(new ParsedBase(parsedBase, files));
      }
    } else {
      directory.getObsolete().add(baseDir);
    }
  }

  private static void processDeltaDir(Path deltadir, ValidWriteIdList writeIdList, ValidTxnList validTxnList, AcidDirectory directory, AcidUtils.HdfsDirSnapshot dirSnapshot)
      throws IOException {
    ParsedDelta delta = parsedDelta(deltadir, directory.getFs(), dirSnapshot);
    if (!isDirUsable(deltadir, delta.getVisibilityTxnId(), directory.getAbortedDirectories(), validTxnList)) {
      return;
    }
    ValidWriteIdList.RangeResponse abortRange = writeIdList.isWriteIdRangeAborted(delta.minWriteId, delta.maxWriteId);
    if (ValidWriteIdList.RangeResponse.ALL == abortRange) {
      directory.getAbortedDirectories().add(deltadir);
      directory.getAbortedWriteIds().addAll(LongStream.rangeClosed(delta.minWriteId, delta.maxWriteId)
          .boxed().collect(Collectors.toList()));
    } else {
      if (ValidWriteIdList.RangeResponse.SOME == abortRange) {
        // This means this delta contains aborted writes but can not be cleaned
        // This is important for Cleaner to not remove metadata belonging to this transaction
        directory.setUnCompactedAborts(true);
      }
      if (writeIdList.isWriteIdRangeValid(delta.minWriteId, delta.maxWriteId) != ValidWriteIdList.RangeResponse.NONE) {
        directory.getCurrentDirectories().add(delta);
      }
    }
  }

  /**
   * checks {@code visibilityTxnId} to see if {@code child} is committed in current snapshot
   */
  private static boolean isDirUsable(Path child, long visibilityTxnId, List aborted, ValidTxnList validTxnList) {
    if (validTxnList == null) {
      throw new IllegalArgumentException("No ValidTxnList for " + child);
    }
    if (!validTxnList.isTxnValid(visibilityTxnId)) {
      boolean isAborted = validTxnList.isTxnAborted(visibilityTxnId);
      if (isAborted) {
        aborted.add(child);// so we can clean it up
      }
      LOG.debug("getChildState() ignoring(" + aborted + ") " + child);
      return false;
    }
    return true;
  }

  public static boolean isTablePropertyTransactional(Properties props) {
    return isTablePropertyTransactional(Maps.fromProperties(props));
  }

  public static boolean isTablePropertyTransactional(Map parameters) {
    String resultStr = parameters.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL);
    if (resultStr == null) {
      resultStr = parameters.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.toUpperCase());
    }
    return Boolean.parseBoolean(resultStr);
  }

  /**
   * @param p - not null
   */
  public static boolean isDeleteDelta(Path p) {
    return p.getName().startsWith(DELETE_DELTA_PREFIX);
  }
  public static boolean isInsertDelta(Path p) {
    return p.getName().startsWith(DELTA_PREFIX);
  }
  public static boolean isTransactionalTable(CreateTableDesc table) {
    if (table == null || table.getTblProps() == null) {
      return false;
    }
    return isTransactionalTable(table.getTblProps());
  }

  public static boolean isTransactionalTable(Table table) {
    return table != null && isTransactionalTable(table.getTTable());
  }

  public static boolean isTransactionalTable(org.apache.hadoop.hive.metastore.api.Table table) {
    return table != null && isTransactionalTable(table.getParameters());
  }

  public static boolean isTransactionalTable(Map props) {
    return props != null && isTablePropertyTransactional(props);
  }

  public static boolean isTransactionalView(CreateMaterializedViewDesc view) {
    if (view == null || view.getTblProps() == null) {
      return false;
    }
    return isTransactionalTable(view.getTblProps());
  }

  public static boolean isFullAcidTable(CreateTableDesc td) {
    if (td == null || td.getTblProps() == null) {
      return false;
    }
    return isFullAcidTable(td.getTblProps());
  }
  /**
   * Should produce the same result as
   * {@link org.apache.hadoop.hive.metastore.txn.TxnUtils#isAcidTable(org.apache.hadoop.hive.metastore.api.Table)}
   */
  public static boolean isFullAcidTable(Table table) {
    return table != null && isFullAcidTable(table.getTTable());
  }

  /**
   * Should produce the same result as
   * {@link org.apache.hadoop.hive.metastore.txn.TxnUtils#isAcidTable(org.apache.hadoop.hive.metastore.api.Table)}
   */
  public static boolean isFullAcidTable(org.apache.hadoop.hive.metastore.api.Table table) {
    return table != null && isFullAcidTable(table.getParameters());
  }

  public static boolean isFullAcidTable(Map params) {
    return isTransactionalTable(params) && !isInsertOnlyTable(params);
  }

  public static boolean isFullAcidScan(Configuration conf) {
    if (!HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN)) {
      return false;
    }
    int propInt = conf.getInt(ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, -1);
    if (propInt == -1) {
      return true;
    }
    AcidOperationalProperties props = AcidOperationalProperties.parseInt(propInt);
    return !props.isInsertOnly();
  }

  public static boolean isInsertOnlyFetchBucketId(Configuration conf) {
    if (!HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN)) {
      return false;
    }
    int propInt = conf.getInt(ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, -1);
    if (propInt == -1) {
      return false;
    }
    AcidOperationalProperties props = AcidOperationalProperties.parseInt(propInt);
    if (!props.isInsertOnly()) {
      return false;
    }
    return props.isFetchBucketId();
  }

  /**
   * Sets the acidOperationalProperties in the configuration object argument.
   * @param conf Mutable configuration object
   * @param properties An acidOperationalProperties object to initialize from. If this is null,
   *                   we assume this is a full transactional table.
   */
  public static void setAcidOperationalProperties(
      Configuration conf, boolean isTxnTable, AcidOperationalProperties properties) {
    if (isTxnTable) {
      HiveConf.setBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, isTxnTable);
      if (properties != null) {
        HiveConf.setIntVar(conf, ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES, properties.toInt());
      }
    } else {
      conf.unset(ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN.varname);
      conf.unset(ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname);
    }
  }

  /**
   * Sets the acidOperationalProperties in the map object argument.
   * @param parameters Mutable map object
   * @param properties An acidOperationalProperties object to initialize from.
   */
  public static void setAcidOperationalProperties(Map parameters,
      boolean isTxnTable, AcidOperationalProperties properties) {
    parameters.put(ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN.varname, Boolean.toString(isTxnTable));
    if (properties != null) {
      parameters.put(ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, properties.toString());
    }
  }

  /**
   * Returns the acidOperationalProperties for a given table.
   * @param table A table object
   * @return the acidOperationalProperties object for the corresponding table.
   */
  public static AcidOperationalProperties getAcidOperationalProperties(Table table) {
    String transactionalProperties = table.getProperty(
            hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES);
    if (transactionalProperties == null) {
      // If the table does not define any transactional properties, we return a default type.
      return AcidOperationalProperties.getDefault();
    }
    return AcidOperationalProperties.parseString(transactionalProperties);
  }

  /**
   * Returns the acidOperationalProperties for a given configuration.
   * @param conf A configuration object
   * @return the acidOperationalProperties object for the corresponding configuration.
   */
  public static AcidOperationalProperties getAcidOperationalProperties(Configuration conf) {
    // If the conf does not define any transactional properties, the parseInt() should receive
    // a value of 1, which will set AcidOperationalProperties to a default type and return that.
    return AcidOperationalProperties.parseInt(
            HiveConf.getIntVar(conf, ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES));
  }

  /**
   * Returns the acidOperationalProperties for a given set of properties.
   * @param props A properties object
   * @return the acidOperationalProperties object for the corresponding properties.
   */
  public static AcidOperationalProperties getAcidOperationalProperties(Properties props) {
    String resultStr = props.getProperty(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES);
    if (resultStr == null) {
      // If the properties does not define any transactional properties, we return a default type.
      return AcidOperationalProperties.getDefault();
    }
    return AcidOperationalProperties.parseString(resultStr);
  }

  /**
   * Returns the acidOperationalProperties for a given map.
   * @param parameters A parameters object
   * @return the acidOperationalProperties object for the corresponding map.
   */
  public static AcidOperationalProperties getAcidOperationalProperties(
          Map parameters) {
    String resultStr = parameters.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES);
    if (resultStr == null) {
      // If the parameters does not define any transactional properties, we return a default type.
      return AcidOperationalProperties.getDefault();
    }
    return AcidOperationalProperties.parseString(resultStr);
  }
  /**
   * See comments at {@link AcidUtils#DELTA_SIDE_FILE_SUFFIX}.
   *
   * Returns the logical end of file for an acid data file.
   *
   * This relies on the fact that if delta_x_y has no committed transactions it wil be filtered out
   * by {@link #getAcidState(FileSystem, Path, Configuration, ValidWriteIdList, Ref, boolean)}
   * and so won't be read at all.
   * @param file - data file to read/compute splits on
   */
  public static long getLogicalLength(FileSystem fs, FileStatus file) throws IOException {
    Path acidDir = file.getPath().getParent(); //should be base_x or delta_x_y_
    if (AcidUtils.isInsertDelta(acidDir)) {
      ParsedDeltaLight pd = ParsedDeltaLight.parse(acidDir);
      if(!pd.mayContainSideFile()) {
        return file.getLen();
      }
    }
    else {
      return file.getLen();
    }
    return getLastFlushLength(fs, file);
  }

  /**
   * Read the side file to get the last flush length, or file length if no side file found.
   * @param fs the file system to use
   * @param deltaFile the delta file
   * @return length as stored in the side file, or file length if no side file found
   * @throws IOException if problems reading the side file
   */
  private static long getLastFlushLength(FileSystem fs, FileStatus deltaFile) throws IOException {
    Path sideFile = OrcAcidUtils.getSideFile(deltaFile.getPath());

    try (FSDataInputStream stream = fs.open(sideFile)) {
      long result = -1;
      while (stream.available() > 0) {
        result = stream.readLong();
      }
      if (result < 0) {
        /* side file is there but we couldn't read it. We want to avoid a read where
         * (file.getLen() < 'value from side file' which may happen if file is not closed)
         * because this means some committed data from 'file' would be skipped. This should be very
         * unusual.
         */
        throw new IOException(sideFile + " found but is not readable.  Consider waiting or "
            + "orcfiledump --recover");
      }
      return result;

    } catch (FileNotFoundException e) {
      return deltaFile.getLen();
    }
  }


  /**
   * Checks if a table is a transactional table that only supports INSERT, but not UPDATE/DELETE
   * @param params table properties
   * @return true if table is an INSERT_ONLY table, false otherwise
   */
  public static boolean isInsertOnlyTable(Map params) {
    String transactionalProp = params.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES);
    return INSERTONLY_TRANSACTIONAL_PROPERTY.equalsIgnoreCase(transactionalProp);
  }

  public static boolean isInsertOnlyTable(Table table) {
    return isTransactionalTable(table) && getAcidOperationalProperties(table).isInsertOnly();
  }

  public static boolean isInsertOnlyTable(Properties params) {
    return isInsertOnlyTable(Maps.fromProperties(params));
  }

   /**
    * The method for altering table props; may set the table to MM, non-MM, or not affect MM.
    * todo: All such validation logic should be TransactionValidationListener
    * @param tbl object image before alter table command (or null if not retrieved yet).
    * @param props prop values set in this alter table command
    */
  public static Boolean isToInsertOnlyTable(Table tbl, Map props) {
    // Note: Setting these separately is a very hairy issue in certain combinations, since we
    //       cannot decide what type of table this becomes without taking both into account, and
    //       in many cases the conversion might be illegal.
    //       The only thing we allow is tx = true w/o tx-props, for backward compat.
    String transactional = props.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL);
    String transactionalProp = props.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES);

    if (transactional == null && transactionalProp == null) {
      // Not affected or the op is not about transactional.
      return null;
    }

    if (transactional == null && tbl != null) {
      transactional = tbl.getParameters().get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL);
    }
    boolean isSetToTxn = Boolean.parseBoolean(transactional);
    if (transactionalProp == null) {
      if (isSetToTxn || tbl == null) return false; // Assume the full ACID table.
      throw new RuntimeException("Cannot change '" + hive_metastoreConstants.TABLE_IS_TRANSACTIONAL
          + "' without '" + hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES + "'");
    }
    if (!INSERTONLY_TRANSACTIONAL_PROPERTY.equalsIgnoreCase(transactionalProp)) {
      return false; // Not MM.
    }
    if (!isSetToTxn) {
      if (tbl == null) return true; // No table information yet; looks like it could be valid.
      throw new RuntimeException("Cannot set '"
          + hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES + "' to 'insert_only' without "
          + "setting '" + hive_metastoreConstants.TABLE_IS_TRANSACTIONAL + "' to 'true'");
    }
    return true;
  }

  public static Boolean isToFullAcid(Table table, Map props) {
    if (AcidUtils.isTransactionalTable(table)) {
      String transactionalProp = props.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES);

      if (DEFAULT_TRANSACTIONAL_PROPERTY.equalsIgnoreCase(transactionalProp)) {
        return canBeMadeAcid(table.getTableName(), table.getSd());
      }
    }
    return false;
  }

  public static boolean canBeMadeAcid(String fullTableName, StorageDescriptor sd) {
    return isAcidInputOutputFormat(fullTableName, sd) && sd.getSortColsSize() <= 0;
  }

  private static boolean isAcidInputOutputFormat(String fullTableName, StorageDescriptor sd) {
    if (sd.getInputFormat() == null || sd.getOutputFormat() == null) {
      return false;
    }
    try {
      return Class.forName(Constants.ORC_INPUT_FORMAT)
                  .isAssignableFrom(Class.forName(sd.getInputFormat()))
            && Class.forName(Constants.ORC_OUTPUT_FORMAT)
                  .isAssignableFrom(Class.forName(sd.getOutputFormat()));

    } catch (ClassNotFoundException e) {
      //if a table is using some custom I/O format and it's not in the classpath, we won't mark
      //the table for Acid, but today OrcInput/OutputFormat is the only Acid format
      LOG.error("Could not determine if " + fullTableName + " can be made Acid due to: " + e.getMessage(), e);
      return false;
    }
  }

  public static boolean isRemovedInsertOnlyTable(Set removedSet) {
    boolean hasTxn = removedSet.contains(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL),
        hasProps = removedSet.contains(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES);
    return hasTxn || hasProps;
  }

  /**
   * Get the ValidTxnWriteIdList saved in the configuration.
   */
  public static ValidTxnWriteIdList getValidTxnWriteIdList(Configuration conf) {
    String txnString = conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY);
    ValidTxnWriteIdList validTxnList = new ValidTxnWriteIdList(txnString);
    return validTxnList;
  }

  /**
   * Extract the ValidWriteIdList for the given table from the list of tables' ValidWriteIdList.
   */
  public static ValidWriteIdList getTableValidWriteIdList(Configuration conf, String fullTableName) {
    ValidTxnWriteIdList validTxnList = getValidTxnWriteIdList(conf);
    return validTxnList.getTableValidWriteIdList(fullTableName);
  }

  /**
   * Set the valid write id list for the current table scan.
   */
  public static void setValidWriteIdList(Configuration conf, ValidWriteIdList validWriteIds) {
    conf.set(ValidWriteIdList.VALID_WRITEIDS_KEY, validWriteIds.toString());
    LOG.debug("Setting ValidWriteIdList: " + validWriteIds.toString()
            + " isAcidTable: " + HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, false)
            + " acidProperty: " + getAcidOperationalProperties(conf));
  }

  /**
   * Set the valid write id list for the current table scan.
   */
  public static void setValidWriteIdList(Configuration conf, TableScanDesc tsDesc) {
    if (tsDesc.isTranscationalTable()) {
      String dbName = tsDesc.getDatabaseName();
      String tableName = tsDesc.getTableName();
      ValidWriteIdList validWriteIdList = getTableValidWriteIdList(conf,
                                                    AcidUtils.getFullTableName(dbName, tableName));
      if (validWriteIdList != null) {
        setValidWriteIdList(conf, validWriteIdList);
      } else {
        // Log error if the acid table is missing from the ValidWriteIdList conf
        LOG.error("setValidWriteIdList on table: " + AcidUtils.getFullTableName(dbName, tableName)
                + " isAcidTable: " + true
                + " acidProperty: " + getAcidOperationalProperties(conf)
                + " couldn't find the ValidWriteId list from ValidTxnWriteIdList: "
                + conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
        throw new IllegalStateException("ACID table: " + AcidUtils.getFullTableName(dbName, tableName)
                + " is missing from the ValidWriteIdList config: "
                + conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
      }
    }
  }

  public static class TableSnapshot {
    private long writeId;
    private String validWriteIdList;

    public TableSnapshot() {
    }

    public TableSnapshot(long writeId, String validWriteIdList) {
      this.writeId = writeId;
      this.validWriteIdList = validWriteIdList;
    }

    public String getValidWriteIdList() {
      return validWriteIdList;
    }

    public long getWriteId() {
      return writeId;
    }

    public void setWriteId(long writeId) {
      this.writeId = writeId;
    }

    public void setValidWriteIdList(String validWriteIdList) {
      this.validWriteIdList = validWriteIdList;
    }

    @Override
    public String toString() {
      return "[validWriteIdList=" + validWriteIdList + ", writeId=" + writeId + "]";
    }
  }

  public static TableSnapshot getTableSnapshot(
          Configuration conf,
          Table tbl) throws LockException {
    return getTableSnapshot(conf, tbl, false);
  }


  /** Note: this is generally called in Hive.java; so, the callers of Hive.java make sure
   *        to set up the acid state during compile, and Hive.java retrieves it if needed. */
  public static TableSnapshot getTableSnapshot(
      Configuration conf, Table tbl, boolean isStatsUpdater) throws LockException {
    return getTableSnapshot(conf, tbl, tbl.getDbName(), tbl.getTableName(), isStatsUpdater);
  }

  /** Note: this is generally called in Hive.java; so, the callers of Hive.java make sure
   *        to set up the acid state during compile, and Hive.java retrieves it if needed. */
  public static TableSnapshot getTableSnapshot(Configuration conf,
      Table tbl, String dbName, String tblName, boolean isStatsUpdater)
      throws LockException, AssertionError {
    if (!isTransactionalTable(tbl)) {
      return null;
    }
    if (dbName == null) {
      dbName = tbl.getDbName();
    }
    if (tblName == null) {
      tblName = tbl.getTableName();
    }
    long writeId = -1;
    ValidWriteIdList validWriteIdList = null;

    if (SessionState.get() != null) {
      HiveTxnManager sessionTxnMgr = SessionState.get().getTxnMgr();
      String fullTableName = getFullTableName(dbName, tblName);
      if (sessionTxnMgr != null && sessionTxnMgr.getCurrentTxnId() > 0) {
        validWriteIdList = getTableValidWriteIdList(conf, fullTableName);
        if (isStatsUpdater) {
          writeId = sessionTxnMgr.getAllocatedTableWriteId(dbName, tblName);
          if (writeId < 1) {
            // TODO: this is not ideal... stats updater that doesn't have write ID is currently
            //       "create table"; writeId would be 0/-1 here. No need to call this w/true.
            LOG.debug("Stats updater for {}.{} doesn't have a write ID ({})", dbName, tblName, writeId);
          }
        }

        if (HiveConf.getBoolVar(conf, ConfVars.HIVE_IN_TEST) && conf.get(ValidTxnList.VALID_TXNS_KEY) == null) {
          return null;
        }
        if (validWriteIdList == null) {
          validWriteIdList = getTableValidWriteIdListWithTxnList(conf, dbName, tblName);
        }
        if (validWriteIdList == null) {
          throw new AssertionError("Cannot find valid write ID list for " + tblName);
        }
      }
    }
    return new TableSnapshot(writeId,
        validWriteIdList != null ? validWriteIdList.toString() : null);
  }

  /**
   * Returns ValidWriteIdList for the table with the given "dbName" and "tableName".
   * This is called when HiveConf has no list for the table.
   * Otherwise use getTableSnapshot().
   * @param conf       Configuration
   * @param dbName
   * @param tableName
   * @return ValidWriteIdList on success, null on failure to get a list.
   * @throws LockException
   */
  public static ValidWriteIdList getTableValidWriteIdListWithTxnList(
      Configuration conf, String dbName, String tableName) throws LockException {
    HiveTxnManager sessionTxnMgr = SessionState.get().getTxnMgr();
    if (sessionTxnMgr == null) {
      return null;
    }
    ValidWriteIdList validWriteIdList = null;
    ValidTxnWriteIdList validTxnWriteIdList = null;

    String validTxnList = conf.get(ValidTxnList.VALID_TXNS_KEY);
    List tablesInput = new ArrayList<>();
    String fullTableName = getFullTableName(dbName, tableName);
    tablesInput.add(fullTableName);

    validTxnWriteIdList = sessionTxnMgr.getValidWriteIds(tablesInput, validTxnList);
    return validTxnWriteIdList != null ?
        validTxnWriteIdList.getTableValidWriteIdList(fullTableName) : null;
  }

  public static String getFullTableName(String dbName, String tableName) {
    return TableName.fromString(tableName, null, dbName).getNotEmptyDbTable().toLowerCase();
  }

  /**
   * General facility to place a metadata file into a dir created by acid/compactor write.
   *
   * Load Data commands against Acid tables write {@link AcidBaseFileType#ORIGINAL_BASE} type files
   * into delta_x_x/ (or base_x in case there is Overwrite clause).  {@link MetaDataFile} is a
   * small JSON file in this directory that indicates that these files don't have Acid metadata
   * columns and so the values for these columns need to be assigned at read time/compaction.
   */
  public static class MetaDataFile extends AcidMetaDataFile {

    static boolean isCompacted(Path baseOrDeltaDir, FileSystem fs, HdfsDirSnapshot dirSnapshot) throws IOException {
      /**
       * this file was written by Hive versions before 4.0 into a base_x/ dir
       * created by compactor so that it can be distinguished from the one
       * created by Insert Overwrite
       */
      if (dirSnapshot != null && dirSnapshot.getMetadataFile() == null) {
        return false;
      }
      Path formatFile = new Path(baseOrDeltaDir, METADATA_FILE);
      try (FSDataInputStream strm = fs.open(formatFile)) {
        Map metaData = new ObjectMapper().readValue((InputStream)strm, Map.class);
        if (!CURRENT_VERSION.equalsIgnoreCase(metaData.get(Field.VERSION.toString()))) {
          throw new IllegalStateException("Unexpected Meta Data version: " + metaData.get(Field.VERSION));
        }
        String dataFormat = metaData.getOrDefault(Field.DATA_FORMAT.toString(), "null");
        DataFormat format = DataFormat.valueOf(dataFormat.toUpperCase());
        return DataFormat.COMPACTED == format;
      } catch (FileNotFoundException e) {
        return false;
      } catch (IOException e) {
        String msg = "Failed to read " + baseOrDeltaDir + "/" + METADATA_FILE + ": " + e.getMessage();
        LOG.error(msg, e);
        throw e;
      }
    }

    /**
     * Chooses 1 representative file from {@code baseOrDeltaDir}
     * This assumes that all files in the dir are of the same type: either written by an acid
     * write or Load Data.  This should always be the case for an Acid table.
     */
    private static Path chooseFile(Path baseOrDeltaDir, FileSystem fs) throws IOException {
      if(!(baseOrDeltaDir.getName().startsWith(BASE_PREFIX) ||
          baseOrDeltaDir.getName().startsWith(DELTA_PREFIX))) {
        throw new IllegalArgumentException(baseOrDeltaDir + " is not a base/delta");
      }
      FileStatus[] dataFiles;
      try {
        dataFiles = fs.listStatus(baseOrDeltaDir, originalBucketFilter);
      } catch (FileNotFoundException e) {
        // HIVE-22001: If the file was not found, this means that baseOrDeltaDir (which was listed
        // earlier during AcidUtils.getAcidState()) was removed sometime between the FS list call
        // and now. In the case of ACID tables the file would only have been removed by the transactional
        // cleaner thread, in which case this is currently an old base/delta which has already been
        // compacted. So a new set of base files from the compaction should exist which
        // the current call to AcidUtils.getAcidState() would use rather than this old baes/delta.
        // It should be ok to ignore this FileNotFound error and skip processing of this file - the list
        // of files for this old base/delta will be incomplete, but it will not matter since this base/delta
        // would be ignored (in favor of the new base files) by the selection logic in AcidUtils.getAcidState().
        dataFiles = null;
      }
      return dataFiles != null && dataFiles.length > 0 ? dataFiles[0].getPath() : null;
    }

    /**
     * Checks if the files in base/delta dir are a result of Load Data/Add Partition statement
     * and thus do not have ROW_IDs embedded in the data.
     * This is only meaningful for full CRUD tables - Insert-only tables have all their data
     * in raw format by definition.
     * @param baseOrDeltaDir base or delta file.
     * @param dirSnapshot
     */
    public static boolean isRawFormat(Path baseOrDeltaDir, FileSystem fs, HdfsDirSnapshot dirSnapshot) throws IOException {
      //todo: this could be optimized - for full CRUD table only base_x and delta_x_x could have
      // files in raw format delta_x_y (x != y) whether from streaming ingested or compaction
      // must be native Acid format by definition
      if(isDeleteDelta(baseOrDeltaDir)) {
        return false;
      }
      if(isInsertDelta(baseOrDeltaDir)) {
        ParsedDeltaLight pd = ParsedDeltaLight.parse(baseOrDeltaDir);
        if(pd.getMinWriteId() != pd.getMaxWriteId()) {
          //must be either result of streaming or compaction
          return false;
        }
      }
      else {
        //must be base_x
        if(isCompactedBase(ParsedBaseLight.parseBase(baseOrDeltaDir), fs, dirSnapshot)) {
          return false;
        }
      }
      //if here, have to check the files
      Path dataFile = null;
      if ((dirSnapshot != null) && (dirSnapshot.getFiles() != null) && (dirSnapshot.getFiles().size() > 0)) {
        for (FileStatus fileStatus: dirSnapshot.getFiles()) {
          if (originalBucketFilter.accept(fileStatus.getPath())) {
            dataFile = fileStatus.getPath();
          }
        }
      } else {
        dataFile = chooseFile(baseOrDeltaDir, fs);
      }
      if (dataFile == null) {
        //directory is empty or doesn't have any that could have been produced by load data
        return false;
      }
      return isRawFormatFile(dataFile, fs);
    }

    public static boolean isRawFormatFile(Path dataFile, FileSystem fs) throws IOException {
      try (Reader reader = OrcFile.createReader(dataFile, OrcFile.readerOptions(fs.getConf()))) {
        /*
          acid file would have schema like > so could
          check it this way once/if OrcRecordUpdater.ACID_KEY_INDEX_NAME is removed
          TypeDescription schema = reader.getSchema();
          List columns = schema.getFieldNames();
         */
        return OrcInputFormat.isOriginal(reader);
      } catch (FileFormatException | InvalidProtocolBufferException ex) {
        //We may be parsing a delta for Insert-only table which may not even be an ORC file so
        //cannot have ROW_IDs in it.
        LOG.debug("isRawFormat() called on " + dataFile + " which is not an ORC file: " +
            ex.getMessage());
        return true;
      } catch (FileNotFoundException ex) {
        //Fallback in case file was already removed and used Snapshot is outdated
        return false;
      }
    }
  }

  /**
   * Logic related to versioning acid data format.  An {@code ACID_FORMAT} file is written to each
   * base/delta/delete_delta dir written by a full acid write or compaction.  This is the primary
   * mechanism for versioning acid data.
   *
   * Each individual ORC file written stores the current version as a user property in ORC footer.
   * All data files produced by Acid write should have this (starting with Hive 3.0), including
   * those written by compactor.  This is more for sanity checking in case someone moved the files
   * around or something like that.
   *
   * Methods for getting/reading the version from files were moved to test class TestTxnCommands
   * which is the only place they are used, in order to keep devs out of temptation, since they
   * access the FileSystem which is expensive.
   */
  public static final class OrcAcidVersion {
    public static final String ACID_VERSION_KEY = "hive.acid.version";
    public static final String ACID_FORMAT = "_orc_acid_version";
    private static final Charset UTF8 = Charset.forName("UTF-8");
    /**
     * 2 is the version of Acid released in Hive 3.0.
     */
    public static final int ORC_ACID_VERSION = 2;
    /**
     * Include current acid version in file footer.
     * @param writer - file written
     */
    public static void setAcidVersionInDataFile(Writer writer) {
      //so that we know which version wrote the file
      writer.addUserMetadata(ACID_VERSION_KEY, UTF8.encode(String.valueOf(ORC_ACID_VERSION)));
    }

    /**
     * This creates a version file in {@code deltaOrBaseDir}
     * @param deltaOrBaseDir - where to create the version file
     */
    public static void writeVersionFile(Path deltaOrBaseDir, FileSystem fs)  throws IOException {
      Path formatFile = getVersionFilePath(deltaOrBaseDir);
      if(!fs.isFile(formatFile)) {
        try (FSDataOutputStream strm = fs.create(formatFile, false)) {
          strm.write(UTF8.encode(String.valueOf(ORC_ACID_VERSION)).array());
        } catch (IOException ioe) {
          LOG.error("Failed to create " + formatFile + " due to: " + ioe.getMessage(), ioe);
          throw ioe;
        }
      }
    }
    public static Path getVersionFilePath(Path deltaOrBase) {
      return new Path(deltaOrBase, ACID_FORMAT);
    }
  }

  public static List getAcidFilesForStats(
      Table table, Path dir, Configuration jc, FileSystem fs) throws IOException {
    List fileList = new ArrayList<>();
    ValidWriteIdList idList = AcidUtils.getTableValidWriteIdList(jc,
        AcidUtils.getFullTableName(table.getDbName(), table.getTableName()));
    if (idList == null) {
      LOG.warn("Cannot get ACID state for " + table.getDbName() + "." + table.getTableName()
          + " from " + jc.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
      return null;
    }
    if (fs == null) {
      fs = dir.getFileSystem(jc);
    }
    // Collect the all of the files/dirs
    Map hdfsDirSnapshots = AcidUtils.getHdfsDirSnapshots(fs, dir);
    AcidDirectory acidInfo = AcidUtils.getAcidState(fs, dir, jc, idList, null, false, hdfsDirSnapshots);
    // Assume that for an MM table, or if there's only the base directory, we are good.
    if (!acidInfo.getCurrentDirectories().isEmpty() && AcidUtils.isFullAcidTable(table)) {
      Utilities.FILE_OP_LOGGER.warn(
          "Computing stats for an ACID table; stats may be inaccurate");
    }
    for (HdfsFileStatusWithId hfs : acidInfo.getOriginalFiles()) {
      fileList.add(hfs.getFileStatus());
    }
    for (ParsedDelta delta : acidInfo.getCurrentDirectories()) {
      fileList.addAll(hdfsDirSnapshots.get(delta.getPath()).getFiles());
    }
    if (acidInfo.getBaseDirectory() != null) {
      fileList.addAll(hdfsDirSnapshots.get(acidInfo.getBaseDirectory()).getFiles());
    }
    return fileList;
  }

  public static List getValidDataPaths(Path dataPath, Configuration conf, String validWriteIdStr)
          throws IOException {
    List pathList = new ArrayList<>();
    if ((validWriteIdStr == null) || validWriteIdStr.isEmpty()) {
      // If Non-Acid case, then all files would be in the base data path. So, just return it.
      pathList.add(dataPath);
      return pathList;
    }

    // If ACID/MM tables, then need to find the valid state wrt to given ValidWriteIdList.
    ValidWriteIdList validWriteIdList = new ValidReaderWriteIdList(validWriteIdStr);
    AcidDirectory acidInfo = AcidUtils.getAcidState(dataPath.getFileSystem(conf), dataPath, conf, validWriteIdList, null,
        false);

    for (HdfsFileStatusWithId hfs : acidInfo.getOriginalFiles()) {
      pathList.add(hfs.getFileStatus().getPath());
    }
    for (ParsedDelta delta : acidInfo.getCurrentDirectories()) {
      pathList.add(delta.getPath());
    }
    if (acidInfo.getBaseDirectory() != null) {
      pathList.add(acidInfo.getBaseDirectory());
    }
    return pathList;
  }

  public static String getAcidSubDir(Path dataPath) {
    String dataDir = dataPath.getName();
    if (dataDir.startsWith(AcidUtils.BASE_PREFIX)
            || dataDir.startsWith(AcidUtils.DELTA_PREFIX)
            || dataDir.startsWith(AcidUtils.DELETE_DELTA_PREFIX)) {
      return dataDir;
    }
    return null;
  }

  //Get the first level acid directory (if any) from a given path
  public static String getFirstLevelAcidDirPath(Path dataPath, FileSystem fileSystem) throws IOException {
    if (dataPath == null) {
      return null;
    }
    String firstLevelAcidDir = getAcidSubDir(dataPath);
    if (firstLevelAcidDir != null) {
      return firstLevelAcidDir;
    }

    String acidDirPath = getFirstLevelAcidDirPath(dataPath.getParent(), fileSystem);
    if (acidDirPath == null) {
      return null;
    }

    // We need the path for directory so no need to append file name
    if (fileSystem.isDirectory(dataPath)) {
      return acidDirPath + Path.SEPARATOR + dataPath.getName();
    }
    return acidDirPath;
  }

  public static boolean isAcidEnabled(HiveConf hiveConf) {
    String txnMgr = hiveConf.getVar(ConfVars.HIVE_TXN_MANAGER);
    boolean concurrency =  hiveConf.getBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY);
    String dbTxnMgr = "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager";
    if (txnMgr.equals(dbTxnMgr) && concurrency) {
      return true;
    }
    return false;
  }

  public static class AnyIdDirFilter implements PathFilter {
     @Override
     public boolean accept(Path path) {
       return extractWriteId(path) != null;
     }
  }

  public static class IdPathFilter implements PathFilter {
    private String baseDirName, deltaDirName, deleteDeltaDirName;
    private final boolean isDeltaPrefix;
    private final Set dpSpecs;
    private final int dpLevel;

    public IdPathFilter(long writeId, int stmtId) {
      this(writeId, stmtId, null, 0);
    }

    public IdPathFilter(long writeId, int stmtId, Set dpSpecs, int dpLevel) {
      String deltaDirName = DELTA_PREFIX + String.format(DELTA_DIGITS, writeId) + "_" +
          String.format(DELTA_DIGITS, writeId);
      String deleteDeltaDirName = DELETE_DELTA_PREFIX + String.format(DELTA_DIGITS, writeId) + "_" +
          String.format(DELTA_DIGITS, writeId);
      isDeltaPrefix = (stmtId < 0);
      if (!isDeltaPrefix) {
        deltaDirName += "_" + String.format(STATEMENT_DIGITS, stmtId);
        deleteDeltaDirName += "_" + String.format(STATEMENT_DIGITS, stmtId);
      }

      this.baseDirName = BASE_PREFIX + String.format(DELTA_DIGITS, writeId);
      this.deltaDirName = deltaDirName;
      this.deleteDeltaDirName = deleteDeltaDirName;
      this.dpSpecs = dpSpecs;
      this.dpLevel = dpLevel;
    }

    @Override
    public boolean accept(Path path) {
      String name = path.getName();
      // Extending the path filter with optional dynamic partition specifications.
      // This is needed for the use case when doing multi-statement insert overwrite with
      // dynamic partitioning with direct insert or with insert-only tables.
      // In this use-case, each FileSinkOperator should only clean-up the directories written
      // by the same FileSinkOperator and do not clean-up the partition directories
      // written by the other FileSinkOperators. (For further details please see HIVE-23114.)
      if (dpLevel > 0 && dpSpecs != null && !dpSpecs.isEmpty()) {
        Path parent = path.getParent();
        String partitionSpec = parent.getName();
        for (int i = 1; i < dpLevel; i++) {
          parent = parent.getParent();
          partitionSpec = parent.getName() + "/" + partitionSpec;
        }
        return (name.equals(baseDirName) && dpSpecs.contains(partitionSpec));
      }
      else {
        return name.equals(baseDirName)
            || (isDeltaPrefix && (name.startsWith(deltaDirName) || name.startsWith(deleteDeltaDirName)))
            || (!isDeltaPrefix && (name.equals(deltaDirName) || name.equals(deleteDeltaDirName)));
      }
    }
  }

  /**
   * Full recursive PathFilter version of IdPathFilter (filtering files for a given writeId and stmtId).
   * This can be used by recursive filelisting, when we want to match the delta / base pattern on the bucketFiles.
   */
  public static class IdFullPathFiler extends IdPathFilter {
    private final Path basePath;

    public IdFullPathFiler(long writeId, int stmtId, Path basePath) {
      super(writeId, stmtId);
      this.basePath = basePath;
    }
    @Override
    public boolean accept(Path path) {
      do {
        if (super.accept(path)) {
          return true;
        }
        path = path.getParent();
      } while (path != null && !path.equals(basePath));
      return false;
    }
  }

  public static Long extractWriteId(Path file) {
    String fileName = file.getName();
    if (!fileName.startsWith(DELTA_PREFIX) && !fileName.startsWith(BASE_PREFIX)) {
      LOG.trace("Cannot extract write ID for a MM table: {}", file);
      return null;
    }
    String[] parts = fileName.split("_", 4);  // e.g. delta_0000001_0000001_0000 or base_0000022
    if (parts.length < 2) {
      LOG.debug("Cannot extract write ID for a MM table: " + file
          + " (" + Arrays.toString(parts) + ")");
      return null;
    }
    long writeId = -1;
    try {
      writeId = Long.parseLong(parts[1]);
    } catch (NumberFormatException ex) {
      LOG.debug("Cannot extract write ID for a MM table: " + file
          + "; parsing " + parts[1] + " got " + ex.getMessage());
      return null;
    }
    return writeId;
  }

  public static void setNonTransactional(Map tblProps) {
    tblProps.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "false");
    tblProps.remove(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES);
  }

  private static boolean needsLock(Entity entity, boolean isExternalEnabled) {
    return needsLock(entity, isExternalEnabled, false);
  }

  private static boolean needsLock(Entity entity, boolean isExternalEnabled, boolean isLocklessReads) {
    switch (entity.getType()) {
      case TABLE:
        return isLockableTable(entity.getTable(), isExternalEnabled, isLocklessReads);
      case PARTITION:
        return isLockableTable(entity.getPartition().getTable(), isExternalEnabled, isLocklessReads);
      default:
        return true;
    }
  }

  private static boolean isLockableTable(Table t, boolean isExternalEnabled, boolean isLocklessReads) {
    if (t.isTemporary()) {
      return false;
    }
    switch (t.getTableType()) {
      case MANAGED_TABLE:
      case MATERIALIZED_VIEW:
        return !(isLocklessReads && isTransactionalTable(t));
      case EXTERNAL_TABLE:
        return isExternalEnabled;
      default:
        return false;
    }
  }

  /**
   * Create lock components from write/read entities.
   * @param outputs write entities
   * @param inputs read entities
   * @param conf
   * @return list with lock components
   */
  public static List makeLockComponents(Set outputs, Set inputs,
      Context.Operation operation, HiveConf conf) {

    List lockComponents = new ArrayList<>();
    boolean isLocklessReadsEnabled = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_ACID_LOCKLESS_READS_ENABLED);
    boolean skipReadLock = !conf.getBoolVar(ConfVars.HIVE_TXN_READ_LOCKS);
    boolean skipNonAcidReadLock = !conf.getBoolVar(ConfVars.HIVE_TXN_NONACID_READ_LOCKS);

    boolean sharedWrite = !conf.getBoolVar(HiveConf.ConfVars.TXN_WRITE_X_LOCK);
    boolean isExternalEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_TXN_EXT_LOCKING_ENABLED);
    boolean isMerge = operation == Context.Operation.MERGE;

    // We don't want to acquire read locks during update or delete as we'll be acquiring write
    // locks instead. Also, there's no need to lock temp tables since they're session wide
    List readEntities = inputs.stream()
      .filter(input -> !input.isDummy()
        && input.needsLock()
        && !input.isUpdateOrDelete()
        && AcidUtils.needsLock(input, isExternalEnabled, isLocklessReadsEnabled)
        && !skipReadLock)
      .collect(Collectors.toList());

    Set fullTableLock = getFullTableLock(readEntities, conf);

    // For each source to read, get a shared_read lock
    for (ReadEntity input : readEntities) {
      LockComponentBuilder compBuilder = new LockComponentBuilder();
      compBuilder.setSharedRead();
      compBuilder.setOperationType(DataOperationType.SELECT);

      Table t = null;
      switch (input.getType()) {
        case DATABASE:
          compBuilder.setDbName(input.getDatabase().getName());
          break;

        case TABLE:
          t = input.getTable();
          if (!fullTableLock.contains(t)) {
            continue;
          }
          compBuilder.setDbName(t.getDbName());
          compBuilder.setTableName(t.getTableName());
          break;

        case PARTITION:
        case DUMMYPARTITION:
          compBuilder.setPartitionName(input.getPartition().getName());
          t = input.getPartition().getTable();
          if (fullTableLock.contains(t)) {
            continue;
          }
          compBuilder.setDbName(t.getDbName());
          compBuilder.setTableName(t.getTableName());
          break;

        default:
          // This is a file or something we don't hold locks for.
          continue;
      }
      if (skipNonAcidReadLock && !AcidUtils.isTransactionalTable(t)) {
        // skip read-locks for non-transactional tables
        // read-locks don't protect non-transactional tables data consistency
        continue;
      }
      if (t != null) {
        compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t));
      }
      LockComponent comp = compBuilder.build();
      LOG.debug("Adding lock component to lock request {} ", comp);
      lockComponents.add(comp);
    }
    // For each source to write to, get the appropriate lock type.  If it's
    // an OVERWRITE, we need to get an exclusive lock.  If it's an insert (no
    // overwrite) than we need a shared.  If it's update or delete then we
    // need a SHARED_WRITE.
    for (WriteEntity output : outputs) {
      if (output.getType() == Entity.Type.DFS_DIR || output.getType() == Entity.Type.LOCAL_DIR
          || !AcidUtils.needsLock(output, isExternalEnabled)) {
        // We don't lock files or directories. We also skip locking temp tables.
        continue;
      }
      LockComponentBuilder compBuilder = new LockComponentBuilder();
      Table t = null;
      /**
       * For any insert/updates set dir cache to read-only mode, where it wouldn't
       * add any new entry to cache.
       * When updates are executed, delta folders are created only at the end of the statement
       * and at the time of acquiring locks, there would not be any delta folders. This can cause wrong data to be reported
       * when "insert" followed by "update" statements are executed. In such cases, use the cache as read only mode.
       */
      HiveConf.setIntVar(conf, ConfVars.HIVE_TXN_ACID_DIR_CACHE_DURATION, 0);
      switch (output.getType()) {
      case DATABASE:
        compBuilder.setDbName(output.getDatabase().getName());
        break;

      case TABLE:
      case DUMMYPARTITION:   // in case of dynamic partitioning lock the table
        t = output.getTable();
        compBuilder.setDbName(t.getDbName());
        compBuilder.setTableName(t.getTableName());
        break;

      case PARTITION:
        compBuilder.setPartitionName(output.getPartition().getName());
        t = output.getPartition().getTable();
        compBuilder.setDbName(t.getDbName());
        compBuilder.setTableName(t.getTableName());
        break;

      default:
        // This is a file or something we don't hold locks for.
        continue;
      }
      switch (output.getWriteType()) {
        /* base this on HiveOperation instead?  this and DDL_NO_LOCK is peppered all over the code...
         Seems much cleaner if each stmt is identified as a particular HiveOperation (which I'd think
         makes sense everywhere).  This however would be problematic for merge...*/
      case DDL_EXCLUSIVE:
        compBuilder.setExclusive();
        compBuilder.setOperationType(DataOperationType.NO_TXN);
        break;

      case DDL_EXCL_WRITE:
        compBuilder.setExclWrite();
        compBuilder.setOperationType(DataOperationType.NO_TXN);
        break;

      case CTAS:
        assert t != null;
        if (AcidUtils.isTransactionalTable(t)) {
          compBuilder.setExclWrite();
          compBuilder.setOperationType(DataOperationType.INSERT);
        } else {
          compBuilder.setExclusive();
          compBuilder.setOperationType(DataOperationType.NO_TXN);
        }
        break;

      case INSERT_OVERWRITE:
        assert t != null;
        if (AcidUtils.isTransactionalTable(t)) {
          if (conf.getBoolVar(HiveConf.ConfVars.TXN_OVERWRITE_X_LOCK) && !sharedWrite
                  && !isLocklessReadsEnabled) {
            compBuilder.setExclusive();
          } else {
            compBuilder.setExclWrite();
          }
          compBuilder.setOperationType(DataOperationType.UPDATE);
        } else if (MetaStoreUtils.isNonNativeTable(t.getTTable())) {
          compBuilder.setLock(getLockTypeFromStorageHandler(output, t));
          compBuilder.setOperationType(DataOperationType.UPDATE);
        } else {
          compBuilder.setExclusive();
          compBuilder.setOperationType(DataOperationType.NO_TXN);
        }
        break;

      case INSERT:
        assert t != null;
        if (AcidUtils.isTransactionalTable(t)) {
          boolean isExclMergeInsert = conf.getBoolVar(ConfVars.TXN_MERGE_INSERT_X_LOCK) && isMerge;
          compBuilder.setSharedRead();

          if (sharedWrite) {
            compBuilder.setSharedWrite();
          } else {
            if (isExclMergeInsert) {
              compBuilder.setExclWrite();

            } else if (isLocklessReadsEnabled) {
              compBuilder.setSharedWrite();
            }
          }
          if (isExclMergeInsert) {
            compBuilder.setOperationType(DataOperationType.UPDATE);
            break;
          }
        } else if (MetaStoreUtils.isNonNativeTable(t.getTTable())) {
          compBuilder.setLock(getLockTypeFromStorageHandler(output, t));
        } else {
          if (conf.getBoolVar(HiveConf.ConfVars.HIVE_TXN_STRICT_LOCKING_MODE)) {
            compBuilder.setExclusive();
          } else {  // this is backward compatible for non-ACID resources, w/o ACID semantics
            compBuilder.setSharedRead();
          }
        }
        compBuilder.setOperationType(DataOperationType.INSERT);
        break;

      case DDL_SHARED:
        compBuilder.setSharedRead();
        if (output.isTxnAnalyze()) {
          // Analyze needs txn components to be present, otherwise an aborted analyze write ID
          // might be rolled under the watermark by compactor while stats written by it are
          // still present.
          continue;
        }
        compBuilder.setOperationType(DataOperationType.NO_TXN);
        break;

      case UPDATE:
      case DELETE:
        assert t != null;
        if (AcidUtils.isTransactionalTable(t) && sharedWrite) {
          compBuilder.setSharedWrite();
        } else if (MetaStoreUtils.isNonNativeTable(t.getTTable())) {
          compBuilder.setLock(getLockTypeFromStorageHandler(output, t));
        } else {
          compBuilder.setExclWrite();
        }
        compBuilder.setOperationType(DataOperationType.valueOf(
            output.getWriteType().name()));
        break;

      case DDL_NO_LOCK:
        continue; // No lock required here

      default:
        throw new RuntimeException("Unknown write type " + output.getWriteType().toString());
      }
      if (t != null) {
        compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t));
      }

      compBuilder.setIsDynamicPartitionWrite(output.isDynamicPartitionWrite());
      LockComponent comp = compBuilder.build();
      LOG.debug("Adding lock component to lock request " + comp.toString());
      lockComponents.add(comp);
    }
    return lockComponents;
  }

  private static LockType getLockTypeFromStorageHandler(WriteEntity output, Table t) {
    final HiveStorageHandler storageHandler = Preconditions.checkNotNull(t.getStorageHandler(),
        "Non-native tables must have an instance of storage handler.");
    LockType lockType = storageHandler.getLockType(output);
    if (null == lockType) {
      throw new IllegalArgumentException(
              String.format("Lock type for Database.Table [%s.%s] is null", t.getDbName(), t.getTableName()));
    }
    return lockType;
  }

  public static boolean isExclusiveCTASEnabled(Configuration conf) {
    return HiveConf.getBoolVar(conf, ConfVars.TXN_CTAS_X_LOCK);
  }

  public static boolean isExclusiveCTAS(Set outputs, HiveConf conf) {
    return outputs.stream().anyMatch(we -> we.getWriteType().equals(WriteType.CTAS) && isExclusiveCTASEnabled(conf));
  }

  private static Set
 getFullTableLock(List readEntities, HiveConf conf) {
    int partLocksThreshold = conf.getIntVar(HiveConf.ConfVars.HIVE_LOCKS_PARTITION_THRESHOLD);

    Map partLocksPerTable = readEntities.stream()
      .filter(input -> input.getType() == Entity.Type.PARTITION)
      .map(Entity::getPartition)
      .collect(Collectors.groupingBy(Partition::getTable, Collectors.counting()));

    return readEntities.stream()
      .filter(input -> input.getType() == Entity.Type.TABLE)
      .map(Entity::getTable)
      .filter(t -> !partLocksPerTable.containsKey(t)
        || (partLocksThreshold > 0 && partLocksThreshold <= partLocksPerTable.get(t)))
      .collect(Collectors.toSet());
  }

  /**
   * Safety check to make sure a file take from one acid table is not added into another acid table
   * since the ROW__IDs embedded as part a write to one table won't make sense in different
   * table/cluster.
   */
  public static void validateAcidFiles(Table table, FileStatus[] srcs, FileSystem fs) throws SemanticException {
    if (!AcidUtils.isFullAcidTable(table)) {
      return;
    }
    validateAcidFiles(srcs, fs);
  }

  private static void validateAcidFiles(FileStatus[] srcs, FileSystem fs) throws SemanticException {
    try {
      if (srcs == null) {
        return;
      }
      for (FileStatus oneSrc : srcs) {
        if (!AcidUtils.MetaDataFile.isRawFormatFile(oneSrc.getPath(), fs)) {
          throw new SemanticException(ErrorMsg.LOAD_DATA_ACID_FILE, oneSrc.getPath().toString());
        }
      }
    } catch (IOException ex) {
      throw new SemanticException(ex);
    }
  }

  /**
   * Safety check to make sure the given location is not the location of acid table and
   * all it's files  will be not added into another acid table
   */
  public static void validateAcidPartitionLocation(String location, Configuration conf) throws SemanticException {
    try {
      URI uri = new URI(location);
      FileSystem fs = FileSystem.get(uri, conf);
      FileStatus[] fileStatuses = LoadSemanticAnalyzer.matchFilesOrDir(fs, new Path(uri));
      validateAcidFiles(fileStatuses, fs);
    } catch (IOException | URISyntaxException ex) {
      throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ex.getMessage()), ex);
    }
  }

  /**
   * Determines transaction type based on query AST.
   * @param tree AST
   */
  public static TxnType getTxnType(Configuration conf, ASTNode tree) {
    // check if read-only txn
    if (HiveConf.getBoolVar(conf, ConfVars.HIVE_TXN_READONLY_ENABLED) && isReadOnlyTxn(tree)) {
      return TxnType.READ_ONLY;
    }
    // check if txn has a materialized view rebuild
    if (tree.getToken().getType() == HiveParser.TOK_ALTER_MATERIALIZED_VIEW_REBUILD) {
      return TxnType.MATER_VIEW_REBUILD;
    }
    // check if compaction request
    if (tree.getFirstChildWithType(HiveParser.TOK_ALTERTABLE_COMPACT) != null){
      return TxnType.COMPACTION;
    }
    // check if soft delete txn
    if (isSoftDeleteTxn(conf, tree))  {
      return TxnType.SOFT_DELETE;
    }
    return TxnType.DEFAULT;
  }

  private static boolean isReadOnlyTxn(ASTNode tree) {
    final ASTSearcher astSearcher = new ASTSearcher();
    return READ_TXN_TOKENS.contains(tree.getToken().getType())
      || (tree.getToken().getType() == HiveParser.TOK_QUERY && Stream.of(
          new int[]{HiveParser.TOK_INSERT_INTO},
          new int[]{HiveParser.TOK_INSERT, HiveParser.TOK_TAB})
      .noneMatch(pattern -> astSearcher.simpleBreadthFirstSearch(tree, pattern) != null));
  }

  private static boolean isSoftDeleteTxn(Configuration conf, ASTNode tree) {
    boolean locklessReadsEnabled = HiveConf.getBoolVar(conf, ConfVars.HIVE_ACID_LOCKLESS_READS_ENABLED);

    switch (tree.getToken().getType()) {
      case HiveParser.TOK_DROPDATABASE:
      case HiveParser.TOK_DROPTABLE:
      case HiveParser.TOK_DROP_MATERIALIZED_VIEW:
        return locklessReadsEnabled
          || HiveConf.getBoolVar(conf, ConfVars.HIVE_ACID_CREATE_TABLE_USE_SUFFIX);

      case HiveParser.TOK_ALTERTABLE: {
        boolean isDropParts = tree.getFirstChildWithType(HiveParser.TOK_ALTERTABLE_DROPPARTS) != null;
        if (isDropParts) {
          return locklessReadsEnabled
            || HiveConf.getBoolVar(conf, ConfVars.HIVE_ACID_DROP_PARTITION_USE_BASE);
        }
        boolean isRenamePart = tree.getFirstChildWithType(HiveParser.TOK_ALTERTABLE_RENAMEPART) != null;
        if (isRenamePart) {
          return locklessReadsEnabled
            || HiveConf.getBoolVar(conf, ConfVars.HIVE_ACID_RENAME_PARTITION_MAKE_COPY);
        }
      }
    }
    return false;
  }

  public static String getPathSuffix(long txnId) {
    return (SOFT_DELETE_PATH_SUFFIX + String.format(DELTA_DIGITS, txnId));
  }

  @VisibleForTesting
  public static void initDirCache(int durationInMts) {
    if (dirCacheInited.get()) {
      LOG.debug("DirCache got initialized already");
      return;
    }
    dirCache = CacheBuilder.newBuilder()
        .expireAfterWrite(durationInMts, TimeUnit.MINUTES)
        .softValues()
        .build();
    dirCacheInited.set(true);
  }

  private static void printDirCacheEntries() {
    if (dirCache != null) {
      LOG.debug("Cache entries: {}", Arrays.toString(dirCache.asMap().keySet().toArray()));
    }
  }

  /**
   * Tries to get directory details from cache. For now, cache is valid only
   * when base directory is available and no deltas are present. This should
   * be used only in BI strategy and for ACID tables.
   *
   * @param fileSystem file system supplier
   * @param candidateDirectory the partition directory to analyze
   * @param conf the configuration
   * @param writeIdList the list of write ids that we are reading
   * @param useFileIds It will be set to true, if the FileSystem supports listing with fileIds
   * @param ignoreEmptyFiles Ignore files with 0 length
   * @return directory state
   * @throws IOException on errors
   */
  public static AcidDirectory getAcidStateFromCache(Supplier fileSystem,
      Path candidateDirectory, Configuration conf,
      ValidWriteIdList writeIdList, Ref useFileIds, boolean ignoreEmptyFiles) throws IOException {

    int dirCacheDuration = HiveConf.getIntVar(conf,
        ConfVars.HIVE_TXN_ACID_DIR_CACHE_DURATION);

    if (dirCacheDuration < 0) {
      LOG.debug("dirCache is not enabled");
      return getAcidState(fileSystem.get(), candidateDirectory, conf, writeIdList,
          useFileIds, ignoreEmptyFiles);
    } else {
      initDirCache(dirCacheDuration);
    }

    /*
     * Cache for single case, where base directory is there without deltas.
     * In case of changes, cache would get invalidated based on
     * open/aborted list
     */
    //dbName + tableName + dir
    String key = writeIdList.getTableName() + "_" + candidateDirectory.toString();
    DirInfoValue value = dirCache.getIfPresent(key);

    // in case of open/aborted txns, recompute dirInfo
    long[] exceptions = writeIdList.getInvalidWriteIds();
    boolean recompute = (exceptions != null && exceptions.length > 0);

    if (recompute) {
      LOG.info("invalidating cache entry for key: {}", key);
      dirCache.invalidate(key);
      value = null;
    }

    if (value != null) {
      // double check writeIds
      if (!value.getTxnString().equalsIgnoreCase(writeIdList.writeToString())) {
        if (LOG.isDebugEnabled()) {
          LOG.debug("writeIdList: {} from cache: {} is not matching for key: {}",
              writeIdList.writeToString(), value.getTxnString(), key);
        }
        recompute = true;
      }
    }

    // compute and add to cache
    if (recompute || (value == null)) {
      AcidDirectory dirInfo = getAcidState(fileSystem.get(), candidateDirectory, conf,
          writeIdList, useFileIds, ignoreEmptyFiles);
      value = new DirInfoValue(writeIdList.writeToString(), dirInfo);

      if (value.dirInfo != null && value.dirInfo.getBaseDirectory() != null
          && value.dirInfo.getCurrentDirectories().isEmpty()) {
        if (dirCacheDuration > 0) {
          dirCache.put(key, value);
        } else {
          LOG.info("Not populating cache for {}, as duration is set to 0", key);
        }
      }
    } else {
      LOG.info("Got {} from cache, cache size: {}", key, dirCache.size());
    }
    if (LOG.isDebugEnabled()) {
      printDirCacheEntries();
    }
    return value.getDirInfo();
  }

  public static void tryInvalidateDirCache(org.apache.hadoop.hive.metastore.api.Table table) {
    if (dirCacheInited.get()) {
      String key = getFullTableName(table.getDbName(), table.getTableName()) + "_" + table.getSd().getLocation();
      boolean partitioned = table.getPartitionKeys() != null && !table.getPartitionKeys().isEmpty();
      if (!partitioned) {
        dirCache.invalidate(key);
      } else {
        // Invalidate all partitions as the difference in the key is only the partition part at the end of the path.
        dirCache.invalidateAll(
          dirCache.asMap().keySet().stream().filter(k -> k.startsWith(key)).collect(Collectors.toSet()));
      }
    }
  }

  public static boolean isNonNativeAcidTable(Table table, boolean isWriteOperation) {
    return table != null && table.getStorageHandler() != null &&
        table.getStorageHandler().supportsAcidOperations(table, isWriteOperation) != HiveStorageHandler.AcidSupportType.NONE;
  }

  /**
   * Returns the virtual columns needed for update queries. For ACID queries it is a single ROW__ID, for non-native
   * tables the list is provided by the {@link HiveStorageHandler#acidVirtualColumns()}.
   * @param table The table for which we run the query
   * @return The list of virtual columns used
   */
  public static List getAcidVirtualColumns(Table table) {
    if (isTransactionalTable(table)) {
      return Lists.newArrayList(VirtualColumn.ROWID);
    } else {
      if (isNonNativeAcidTable(table, false)) {
        return table.getStorageHandler().acidVirtualColumns();
      }
    }
    return Collections.emptyList();
  }

  public static boolean acidTableWithoutTransactions(Table table) {
    return table != null && table.getStorageHandler() != null &&
        table.getStorageHandler().supportsAcidOperations(table, true) ==
            HiveStorageHandler.AcidSupportType.WITHOUT_TRANSACTIONS;
  }

  static class DirInfoValue {
    private String txnString;
    private AcidDirectory dirInfo;

    DirInfoValue(String txnString, AcidDirectory dirInfo) {
      this.txnString = txnString;
      this.dirInfo = dirInfo;
    }

    String getTxnString() {
      return txnString;
    }

    AcidDirectory getDirInfo() {
      return dirInfo;
    }
  }

  public static String getPartitionName(Map partitionSpec) throws SemanticException {
    String partitionName = null;
    if (partitionSpec != null) {
      try {
        partitionName = Warehouse.makePartName(partitionSpec, false);
      } catch (MetaException e) {
        throw new SemanticException("partition " + partitionSpec.toString() + " not found");
      }
    }
    return partitionName;
  }

  public static CompactionType compactionTypeStr2ThriftType(String inputValue) throws SemanticException {
    try {
      return CompactionType.valueOf(inputValue.toUpperCase());
    } catch (IllegalArgumentException e) {
      throw new SemanticException("Unexpected compaction type " + inputValue);
    }
  }

  public static CompactionState compactionStateStr2Enum(String inputValue) throws SemanticException {
    try {
      return CompactionState.fromString(inputValue);
    } catch (IllegalArgumentException e) {
      throw new SemanticException("Unexpected compaction state " + inputValue);
    }
  }
}
    

    

    
            
    
            

    
        
            
                Related Artifacts
                
                     mysql-connector-java mysql
 facebook-messenger com.github.codedrinker
 selenium-java org.seleniumhq.selenium
 instagram-java com.github.sola92
 gson com.google.code.gson
 poi org.apache.poi
 httpclient org.apache.httpcomponents
 json org.json
 facebook-java-api com.google.code.facebook-java-api
 poi-ooxml org.apache.poi
 jackson-databind com.fasterxml.jackson.core
 junit junit
 primefaces org.primefaces
 ojdbc7 com.github.noraui
 jfoenix com.jfoenix
 testng org.testng
 json-simple com.googlecode.json-simple
 selenium-server org.seleniumhq.selenium
 itextpdf com.itextpdf
 spring-core org.springframework
                
            
        
        
            
                Related Groups
                
                     org.springframework
 org.apache.poi
 org.hibernate
 org.springframework.boot
 com.fasterxml.jackson.core
 com.itextpdf
 org.seleniumhq.selenium
 mysql
 org.finos.legend.engine
 org.apache.httpcomponents
 org.apache.logging.log4j
 org.openjfx
 org.apache.commons
 org.json
 com.google.guava
 com.google.zxing
 net.sf.jasperreports
 javax.xml.bind
 ojdbc
 com.google.code.facebook-java-api
                
            
        
    
    





    © 2015 - 2024 Weber Informatics LLC | Privacy Policy