All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.io.rcfile.merge;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.HiveStatsUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.Explain;
import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.Mapper;

@Explain(displayName = "Block level merge")
public class MergeWork extends MapWork implements Serializable {

  private static final long serialVersionUID = 1L;

  private transient List inputPaths;
  private transient Path outputDir;
  private boolean hasDynamicPartitions;
  private DynamicPartitionCtx dynPartCtx;
  private boolean isListBucketingAlterTableConcatenate;
  private ListBucketingCtx listBucketingCtx;

  public MergeWork() {
  }

  public MergeWork(List inputPaths, Path outputDir) {
    this(inputPaths, outputDir, false, null);
  }

  public MergeWork(List inputPaths, Path outputDir,
      boolean hasDynamicPartitions, DynamicPartitionCtx dynPartCtx) {
    super();
    this.inputPaths = inputPaths;
    this.outputDir = outputDir;
    this.hasDynamicPartitions = hasDynamicPartitions;
    this.dynPartCtx = dynPartCtx;
    PartitionDesc partDesc = new PartitionDesc();
    partDesc.setInputFileFormatClass(RCFileBlockMergeInputFormat.class);
    if(this.getPathToPartitionInfo() == null) {
      this.setPathToPartitionInfo(new LinkedHashMap());
    }
    for(Path path: this.inputPaths) {
      this.getPathToPartitionInfo().put(path.toString(), partDesc);
    }
  }

  public List getInputPaths() {
    return inputPaths;
  }

  public void setInputPaths(List inputPaths) {
    this.inputPaths = inputPaths;
  }

  public Path getOutputDir() {
    return outputDir;
  }

  public void setOutputDir(Path outputDir) {
    this.outputDir = outputDir;
  }

  public Class getMapperClass() {
    return RCFileMergeMapper.class;
  }

  @Override
  public Long getMinSplitSize() {
    return null;
  }

  @Override
  public String getInputformat() {
    return CombineHiveInputFormat.class.getName();
  }

  @Override
  public boolean isGatheringStats() {
    return false;
  }

  public boolean hasDynamicPartitions() {
    return this.hasDynamicPartitions;
  }

  public void setHasDynamicPartitions(boolean hasDynamicPartitions) {
    this.hasDynamicPartitions = hasDynamicPartitions;
  }

  @Override
  public void resolveDynamicPartitionStoredAsSubDirsMerge(HiveConf conf, Path path,
      TableDesc tblDesc, ArrayList aliases, PartitionDesc partDesc) {

    String inputFormatClass = conf.getVar(HiveConf.ConfVars.HIVEMERGEINPUTFORMATBLOCKLEVEL);
    try {
      partDesc.setInputFileFormatClass((Class )
          Class.forName(inputFormatClass));
    } catch (ClassNotFoundException e) {
      String msg = "Merge input format class not found";
      throw new RuntimeException(msg);
    }
    super.resolveDynamicPartitionStoredAsSubDirsMerge(conf, path, tblDesc, aliases, partDesc);

    // Add the DP path to the list of input paths
    inputPaths.add(path);
  }

  /**
   * alter table ... concatenate
   *
   * If it is skewed table, use subdirectories in inputpaths.
   */
  public void resolveConcatenateMerge(HiveConf conf) {
    isListBucketingAlterTableConcatenate = ((listBucketingCtx == null) ? false : listBucketingCtx
        .isSkewedStoredAsDir());
    if (isListBucketingAlterTableConcatenate) {
      // use sub-dir as inputpath.
      assert ((this.inputPaths != null) && (this.inputPaths.size() == 1)) :
        "alter table ... concatenate should only have one directory inside inputpaths";
      Path dirPath = inputPaths.get(0);
      try {
        FileSystem inpFs = dirPath.getFileSystem(conf);
        FileStatus[] status = HiveStatsUtils.getFileStatusRecurse(dirPath, listBucketingCtx
            .getSkewedColNames().size(), inpFs);
        List newInputPath = new ArrayList();
        boolean succeed = true;
        for (int i = 0; i < status.length; ++i) {
           if (status[i].isDir()) {
             // Add the lb path to the list of input paths
             newInputPath.add(status[i].getPath());
           } else {
             // find file instead of dir. dont change inputpath
             succeed = false;
           }
        }
        assert (succeed || ((!succeed) && newInputPath.isEmpty())) : "This partition has "
            + " inconsistent file structure: "
            + "it is stored-as-subdir and expected all files in the same depth of subdirectories.";
        if (succeed) {
          inputPaths.clear();
          inputPaths.addAll(newInputPath);
        }
      } catch (IOException e) {
        String msg = "Fail to get filesystem for directory name : " + dirPath.toUri();
        throw new RuntimeException(msg, e);
      }

    }
  }

  public DynamicPartitionCtx getDynPartCtx() {
    return dynPartCtx;
  }

  public void setDynPartCtx(DynamicPartitionCtx dynPartCtx) {
    this.dynPartCtx = dynPartCtx;
  }

  /**
   * @return the listBucketingCtx
   */
  public ListBucketingCtx getListBucketingCtx() {
    return listBucketingCtx;
  }

  /**
   * @param listBucketingCtx the listBucketingCtx to set
   */
  public void setListBucketingCtx(ListBucketingCtx listBucketingCtx) {
    this.listBucketingCtx = listBucketingCtx;
  }

  /**
   * @return the isListBucketingAlterTableConcatenate
   */
  public boolean isListBucketingAlterTableConcatenate() {
    return isListBucketingAlterTableConcatenate;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy