All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hivemall.hcatalog.har.HarOutputCommitterPostProcessor Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hive.hcatalog.har;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.tools.HadoopArchives;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hive.hcatalog.common.HCatConstants;
import org.apache.hive.hcatalog.common.HCatException;

public class HarOutputCommitterPostProcessor {

  boolean isEnabled = false;

  public boolean isEnabled() {
    return isEnabled;
  }

  public void setEnabled(boolean enabled) {
    this.isEnabled = enabled;
  }


  public void exec(JobContext context, Partition partition, Path partPath) throws IOException {
//    LOG.info("Archiving partition ["+partPath.toString()+"]");
    makeHar(context, partPath.toUri().toString(), harFile(partPath));
    partition.getParameters().put(hive_metastoreConstants.IS_ARCHIVED, "true");
  }

  public String harFile(Path ptnPath) throws IOException {
    String harFile = ptnPath.toString().replaceFirst("/+$", "") + ".har";
//    LOG.info("har file : " + harFile);
    return harFile;
  }

  public String getParentFSPath(Path ptnPath) throws IOException {
    return ptnPath.toUri().getPath().replaceFirst("/+$", "");
  }

  public String getProcessedLocation(Path ptnPath) throws IOException {
    String harLocn = ("har://" + ptnPath.toUri().getPath()).replaceFirst("/+$", "") + ".har" + Path.SEPARATOR;
//    LOG.info("har location : " + harLocn);
    return harLocn;
  }


  /**
   * Creates a har file from the contents of a given directory, using that as root.
   * @param dir Directory to archive
   * @param harFile The HAR file to create
   */
  public static void makeHar(JobContext context, String dir, String harFile) throws IOException {
//    Configuration conf = context.getConfiguration();
//    Credentials creds = context.getCredentials();

//    HCatUtil.logAllTokens(LOG,context);

    int lastSep = harFile.lastIndexOf(Path.SEPARATOR_CHAR);
    Path archivePath = new Path(harFile.substring(0, lastSep));
    final String[] args = {
      "-archiveName",
      harFile.substring(lastSep + 1, harFile.length()),
      "-p",
      dir,
      "*",
      archivePath.toString()
    };
//    for (String arg : args){
//      LOG.info("Args to har : "+ arg);
//    }
    try {
      Configuration newConf = new Configuration();
      FileSystem fs = archivePath.getFileSystem(newConf);

      String hadoopTokenFileLocationEnvSetting = System.getenv(HCatConstants.SYSENV_HADOOP_TOKEN_FILE_LOCATION);
      if ((hadoopTokenFileLocationEnvSetting != null) && (!hadoopTokenFileLocationEnvSetting.isEmpty())) {
        newConf.set(HCatConstants.CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocationEnvSetting);
//      LOG.info("System.getenv(\"HADOOP_TOKEN_FILE_LOCATION\") =["+  System.getenv("HADOOP_TOKEN_FILE_LOCATION")+"]");
      }
//      for (FileStatus ds : fs.globStatus(new Path(dir, "*"))){
//        LOG.info("src : "+ds.getPath().toUri().toString());
//      }

      final HadoopArchives har = new HadoopArchives(newConf);
      int rc = ToolRunner.run(har, args);
      if (rc != 0) {
        throw new Exception("Har returned error code " + rc);
      }

//      for (FileStatus hs : fs.globStatus(new Path(harFile, "*"))){
//        LOG.info("dest : "+hs.getPath().toUri().toString());
//      }
//      doHarCheck(fs,harFile);
//      LOG.info("Nuking " + dir);
      fs.delete(new Path(dir), true);
    } catch (Exception e) {
      throw new HCatException("Error creating Har [" + harFile + "] from [" + dir + "]", e);
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy