org.apache.hadoop.hive.shims.HadoopShimsSecure Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.shims;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Constructor;
import java.net.URI;
import java.security.AccessControlException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;

import org.apache.commons.lang.ArrayUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DefaultFileAccess;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsShell;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.CombineFileInputFormat;
import org.apache.hadoop.mapred.lib.CombineFileSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.util.Progressable;

/**
 * Base implemention for shims against secure Hadoop 0.20.3/0.23.
 */
public abstract class HadoopShimsSecure implements HadoopShims {

  static final Logger LOG = LoggerFactory.getLogger(HadoopShimsSecure.class);

  public static class InputSplitShim extends CombineFileSplit {
    long shrinkedLength;
    boolean _isShrinked;
    public InputSplitShim() {
      super();
      _isShrinked = false;
    }

    public InputSplitShim(JobConf conf, Path[] paths, long[] startOffsets,
      long[] lengths, String[] locations) throws IOException {
      super(conf, paths, startOffsets, lengths, dedup(locations));
      _isShrinked = false;
    }

    public void shrinkSplit(long length) {
      _isShrinked = true;
      shrinkedLength = length;
    }

    public boolean isShrinked() {
      return _isShrinked;
    }

    public long getShrinkedLength() {
      return shrinkedLength;
    }

    @Override
    public void readFields(DataInput in) throws IOException {
      super.readFields(in);
      _isShrinked = in.readBoolean();
      if (_isShrinked) {
        shrinkedLength = in.readLong();
      }
    }

    @Override
    public void write(DataOutput out) throws IOException {
      super.write(out);
      out.writeBoolean(_isShrinked);
      if (_isShrinked) {
        out.writeLong(shrinkedLength);
      }
    }
  }

  /* This class should be replaced with org.apache.hadoop.mapred.lib.CombineFileRecordReader class, once
   * https://issues.apache.org/jira/browse/MAPREDUCE-955 is fixed. This code should be removed - it is a copy
   * of org.apache.hadoop.mapred.lib.CombineFileRecordReader
   */
  public static class CombineFileRecordReader implements RecordReader {

    static final Class[] constructorSignature = new Class[] {
        InputSplit.class,
        Configuration.class,
        Reporter.class,
        Integer.class
        };

    protected CombineFileSplit split;
    protected JobConf jc;
    protected Reporter reporter;
    protected Class> rrClass;
    protected Constructor> rrConstructor;
    protected FileSystem fs;

    protected int idx;
    protected long progress;
    protected RecordReader curReader;
    protected boolean isShrinked;
    protected long shrinkedLength;

    @Override
    public boolean next(K key, V value) throws IOException {

      while ((curReader == null)
          || !doNextWithExceptionHandler((K) ((CombineHiveKey) key).getKey(),
              value)) {
        if (!initNextRecordReader(key)) {
          return false;
        }
      }
      return true;
    }

    @Override
    public K createKey() {
      K newKey = curReader.createKey();
      return (K)(new CombineHiveKey(newKey));
    }

    @Override
    public V createValue() {
      return curReader.createValue();
    }

    /**
     * Return the amount of data processed.
     */
    @Override
    public long getPos() throws IOException {
      return progress;
    }

    @Override
    public void close() throws IOException {
      if (curReader != null) {
        curReader.close();
        curReader = null;
      }
    }

    /**
     * Return progress based on the amount of data processed so far.
     */
    @Override
    public float getProgress() throws IOException {
      return Math.min(1.0f, progress / (float) (split.getLength()));
    }

    /**
     * A generic RecordReader that can hand out different recordReaders
     * for each chunk in the CombineFileSplit.
     */
    public CombineFileRecordReader(JobConf job, CombineFileSplit split,
        Reporter reporter,
        Class> rrClass)
        throws IOException {
      this.split = split;
      this.jc = job;
      this.rrClass = rrClass;
      this.reporter = reporter;
      this.idx = 0;
      this.curReader = null;
      this.progress = 0;

      isShrinked = false;

      assert (split instanceof InputSplitShim);
      if (((InputSplitShim) split).isShrinked()) {
        isShrinked = true;
        shrinkedLength = ((InputSplitShim) split).getShrinkedLength();
      }

      try {
        rrConstructor = rrClass.getDeclaredConstructor(constructorSignature);
        rrConstructor.setAccessible(true);
      } catch (Exception e) {
        throw new RuntimeException(rrClass.getName() +
            " does not have valid constructor", e);
      }
      initNextRecordReader(null);
    }

    /**
     * do next and handle exception inside it.
     * @param key
     * @param value
     * @return
     * @throws IOException
     */
    private boolean doNextWithExceptionHandler(K key, V value) throws IOException {
      try {
        return curReader.next(key, value);
      } catch (Exception e) {
        return HiveIOExceptionHandlerUtil
            .handleRecordReaderNextException(e, jc);
      }
    }

    /**
     * Get the record reader for the next chunk in this CombineFileSplit.
     */
    protected boolean initNextRecordReader(K key) throws IOException {

      if (curReader != null) {
        curReader.close();
        curReader = null;
        if (idx > 0) {
          progress += split.getLength(idx - 1); // done processing so far
        }
      }

      // if all chunks have been processed, nothing more to do.
      if (idx == split.getNumPaths() || (isShrinked && progress > shrinkedLength)) {
        return false;
      }

      // get a record reader for the idx-th chunk
      try {
        curReader = rrConstructor.newInstance(new Object[]
            {split, jc, reporter, Integer.valueOf(idx)});

        // change the key if need be
        if (key != null) {
          K newKey = curReader.createKey();
          ((CombineHiveKey)key).setKey(newKey);
        }

        // setup some helper config variables.
        jc.set("map.input.file", split.getPath(idx).toString());
        jc.setLong("map.input.start", split.getOffset(idx));
        jc.setLong("map.input.length", split.getLength(idx));
      } catch (Exception e) {
        curReader = HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(
            e, jc);
      }
      idx++;
      return true;
    }
  }

  public abstract static class CombineFileInputFormatShim extends
      CombineFileInputFormat
      implements HadoopShims.CombineFileInputFormatShim {

    @Override
    public Path[] getInputPathsShim(JobConf conf) {
      try {
        return FileInputFormat.getInputPaths(conf);
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    }

    @Override
    public void createPool(JobConf conf, PathFilter... filters) {
      super.createPool(conf, filters);
    }

    @Override
    public CombineFileSplit[] getSplits(JobConf job, int numSplits) throws IOException {

      long minSize =
          job.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 0);

      // For backward compatibility, let the above parameter be used
      if (job.getLong(
          org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.SPLIT_MINSIZE_PERNODE,
          0) == 0) {
        super.setMinSplitSizeNode(minSize);
      }

      if (job.getLong(
          org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.SPLIT_MINSIZE_PERRACK,
          0) == 0) {
        super.setMinSplitSizeRack(minSize);
      }

      if (job.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MAXSIZE,
          0) == 0) {
        super.setMaxSplitSize(minSize);
      }

      InputSplit[] splits = super.getSplits(job, numSplits);

      ArrayList inputSplitShims = new ArrayList();
      for (int pos = 0; pos < splits.length; pos++) {
        CombineFileSplit split = (CombineFileSplit) splits[pos];
        if (split.getPaths().length > 0) {
          inputSplitShims.add(new InputSplitShim(job, split.getPaths(),
            split.getStartOffsets(), split.getLengths(), split.getLocations()));
        }
      }
      return inputSplitShims.toArray(new InputSplitShim[inputSplitShims.size()]);
    }

    @Override
    public InputSplitShim getInputSplitShim() throws IOException {
      return new InputSplitShim();
    }

    @Override
    public RecordReader getRecordReader(JobConf job, CombineFileSplit split,
        Reporter reporter,
        Class> rrClass)
        throws IOException {
      CombineFileSplit cfSplit = split;
      return new CombineFileRecordReader(job, cfSplit, reporter, rrClass);
    }
  }

  @Override
  abstract public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception;

  @Override
  abstract public org.apache.hadoop.mapreduce.TaskAttemptContext newTaskAttemptContext(
      Configuration conf, final Progressable progressable);

  @Override
  abstract public org.apache.hadoop.mapreduce.JobContext newJobContext(Job job);

  @Override
  abstract public boolean isLocalMode(Configuration conf);

  @Override
  abstract public void setJobLauncherRpcAddress(Configuration conf, String val);

  @Override
  abstract public String getJobLauncherHttpAddress(Configuration conf);

  @Override
  abstract public String getJobLauncherRpcAddress(Configuration conf);

  @Override
  abstract public short getDefaultReplication(FileSystem fs, Path path);

  @Override
  abstract public long getDefaultBlockSize(FileSystem fs, Path path);

  @Override
  abstract public FileSystem createProxyFileSystem(FileSystem fs, URI uri);

  @Override
  abstract public FileSystem getNonCachedFileSystem(URI uri, Configuration conf) throws IOException;

  private static String[] dedup(String[] locations) throws IOException {
    Set dedup = new HashSet();
    Collections.addAll(dedup, locations);
    return dedup.toArray(new String[dedup.size()]);
  }

  @Override
  public void checkFileAccess(FileSystem fs, FileStatus stat, FsAction action)
      throws IOException, AccessControlException, Exception {
    DefaultFileAccess.checkFileAccess(fs, stat, action);
  }

  @Override
  abstract public void addDelegationTokens(FileSystem fs, Credentials cred, String uname) throws IOException;
}