All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.io.merge.MergeFileWork Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.io.merge;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.HiveStatsUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcFileStripeMergeInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat;
import org.apache.hadoop.hive.ql.plan.Explain;
import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
import org.apache.hadoop.mapred.InputFormat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;

@Explain(displayName = "Merge File Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
public class MergeFileWork extends MapWork {

  private static final Log LOG = LogFactory.getLog(MergeFileWork.class);
  private List inputPaths;
  private Path outputDir;
  private boolean hasDynamicPartitions;
  private boolean isListBucketingAlterTableConcatenate;
  private ListBucketingCtx listBucketingCtx;

  // source table input format
  private String srcTblInputFormat;

  // internal input format used by CombineHiveInputFormat
  private Class internalInputFormat;

  public MergeFileWork(List inputPaths, Path outputDir,
      String srcTblInputFormat) {
    this(inputPaths, outputDir, false, srcTblInputFormat);
  }

  public MergeFileWork(List inputPaths, Path outputDir,
      boolean hasDynamicPartitions,
      String srcTblInputFormat) {
    this.inputPaths = inputPaths;
    this.outputDir = outputDir;
    this.hasDynamicPartitions = hasDynamicPartitions;
    this.srcTblInputFormat = srcTblInputFormat;
    PartitionDesc partDesc = new PartitionDesc();
    if (srcTblInputFormat.equals(OrcInputFormat.class.getName())) {
      this.internalInputFormat = OrcFileStripeMergeInputFormat.class;
    } else if (srcTblInputFormat.equals(RCFileInputFormat.class.getName())) {
      this.internalInputFormat = RCFileBlockMergeInputFormat.class;
    }
    partDesc.setInputFileFormatClass(internalInputFormat);
    if (this.getPathToPartitionInfo() == null) {
      this.setPathToPartitionInfo(new LinkedHashMap());
    }
    for (Path path : this.inputPaths) {
      this.getPathToPartitionInfo().put(path.toString(), partDesc);
    }
    this.isListBucketingAlterTableConcatenate = false;
  }

  public List getInputPaths() {
    return inputPaths;
  }

  public void setInputPaths(List inputPaths) {
    this.inputPaths = inputPaths;
  }

  public Path getOutputDir() {
    return outputDir;
  }

  public void setOutputDir(Path outputDir) {
    this.outputDir = outputDir;
  }

  @Override
  public Long getMinSplitSize() {
    return null;
  }

  @Override
  public String getInputformat() {
    return getInputformatClass().getName();
  }

  public Class getInputformatClass() {
    return CombineHiveInputFormat.class;
  }

  @Override
  public boolean isGatheringStats() {
    return false;
  }

  public boolean hasDynamicPartitions() {
    return this.hasDynamicPartitions;
  }

  public void setHasDynamicPartitions(boolean hasDynamicPartitions) {
    this.hasDynamicPartitions = hasDynamicPartitions;
  }

  @Override
  public void resolveDynamicPartitionStoredAsSubDirsMerge(HiveConf conf,
      Path path,
      TableDesc tblDesc,
      ArrayList aliases,
      PartitionDesc partDesc) {
    super.resolveDynamicPartitionStoredAsSubDirsMerge(conf, path, tblDesc,
        aliases, partDesc);
    // set internal input format for all partition descriptors
    partDesc.setInputFileFormatClass(internalInputFormat);
    // Add the DP path to the list of input paths
    inputPaths.add(path);
  }

  /**
   * alter table ... concatenate
   * 

* If it is skewed table, use subdirectories in inputpaths. */ public void resolveConcatenateMerge(HiveConf conf) { isListBucketingAlterTableConcatenate = ((listBucketingCtx == null) ? false : listBucketingCtx .isSkewedStoredAsDir()); LOG.info("isListBucketingAlterTableConcatenate : " + isListBucketingAlterTableConcatenate); if (isListBucketingAlterTableConcatenate) { // use sub-dir as inputpath. assert ((this.inputPaths != null) && (this.inputPaths.size() == 1)) : "alter table ... concatenate should only have one" + " directory inside inputpaths"; Path dirPath = inputPaths.get(0); try { FileSystem inpFs = dirPath.getFileSystem(conf); FileStatus[] status = HiveStatsUtils.getFileStatusRecurse(dirPath, listBucketingCtx .getSkewedColNames().size(), inpFs); List newInputPath = new ArrayList(); boolean succeed = true; for (int i = 0; i < status.length; ++i) { if (status[i].isDir()) { // Add the lb path to the list of input paths newInputPath.add(status[i].getPath()); } else { // find file instead of dir. dont change inputpath succeed = false; } } assert (succeed || ((!succeed) && newInputPath.isEmpty())) : "This partition has " + " inconsistent file structure: " + "it is stored-as-subdir and expected all files in the same depth" + " of subdirectories."; if (succeed) { inputPaths.clear(); inputPaths.addAll(newInputPath); } } catch (IOException e) { String msg = "Fail to get filesystem for directory name : " + dirPath.toUri(); throw new RuntimeException(msg, e); } } } /** * @return the listBucketingCtx */ public ListBucketingCtx getListBucketingCtx() { return listBucketingCtx; } /** * @param listBucketingCtx the listBucketingCtx to set */ public void setListBucketingCtx(ListBucketingCtx listBucketingCtx) { this.listBucketingCtx = listBucketingCtx; } /** * @return the isListBucketingAlterTableConcatenate */ public boolean isListBucketingAlterTableConcatenate() { return isListBucketingAlterTableConcatenate; } @Explain(displayName = "input format") public String getSourceTableInputFormat() { return srcTblInputFormat; } public void setSourceTableInputFormat(String srcTblInputFormat) { this.srcTblInputFormat = srcTblInputFormat; } @Explain(displayName = "merge level") public String getMergeLevel() { if (srcTblInputFormat != null) { if (srcTblInputFormat.equals(OrcInputFormat.class.getName())) { return "stripe"; } else if (srcTblInputFormat.equals(RCFileInputFormat.class.getName())) { return "block"; } } return null; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy