All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.mapred.pipes.Application Maven / Gradle / Ivy

There is a newer version: 3.4.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in org.apache.hadoop.shaded.com.liance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org.apache.hadoop.shaded.org.licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.shaded.org.apache.hadoop.mapred.pipes;

import java.org.apache.hadoop.shaded.io.File;
import java.org.apache.hadoop.shaded.io.IOException;
import java.org.apache.hadoop.shaded.net.ServerSocket;
import java.org.apache.hadoop.shaded.net.Socket;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;

import javax.crypto.SecretKey;

import org.apache.hadoop.shaded.org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.Path;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.FloatWritable;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.IOUtils;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.NullWritable;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.Writable;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.WritableComparable;
import org.apache.hadoop.shaded.org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.shaded.org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.shaded.org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.shaded.org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.shaded.org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.shaded.org.apache.hadoop.mapred.TaskLog;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.filecache.DistributedCache;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.security.SecureShuffleUtils;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.security.TokenCache;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
import org.apache.hadoop.shaded.org.apache.hadoop.security.token.Token;
import org.apache.hadoop.shaded.org.apache.hadoop.thirdparty.org.apache.hadoop.shaded.com.google.org.apache.hadoop.shaded.com.on.annotations.VisibleForTesting;
import org.apache.hadoop.shaded.org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.shaded.org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.shaded.org.slf4j.Logger;
import org.apache.hadoop.shaded.org.slf4j.LoggerFactory;

/**
 * This class is responsible for launching and org.apache.hadoop.shaded.com.unicating with the child 
 * process.
 */
class Application {
  private static final Logger LOG =
      LoggerFactory.getLogger(Application.class.getName());
  private ServerSocket serverSocket;
  private PingSocketCleaner socketCleaner;
  private Process process;
  private Socket clientSocket;
  private OutputHandler handler;
  private DownwardProtocol downlink;
  static final boolean WINDOWS
  = System.getProperty("os.name").startsWith("Windows");

  /**
   * Start the child process to handle the task for us.
   * @param conf the task's configuration
   * @param recordReader the fake record reader to update progress with
   * @param output the collector to send output to
   * @param reporter the reporter for the task
   * @param outputKeyClass the class of the output keys
   * @param outputValueClass the class of the output values
   * @throws IOException
   * @throws InterruptedException
   */
  Application(JobConf conf, 
              RecordReader recordReader, 
              OutputCollector output, Reporter reporter,
              Class outputKeyClass,
              Class outputValueClass
              ) throws IOException, InterruptedException {
    serverSocket = new ServerSocket(0);
    Map env = new HashMap();
    // add TMPDIR environment variable with the value of java.org.apache.hadoop.shaded.io.tmpdir
    env.put("TMPDIR", System.getProperty("java.org.apache.hadoop.shaded.io.tmpdir"));
    env.put(Submitter.PORT, 
            Integer.toString(serverSocket.getLocalPort()));
    
    //Add token to the environment if security is enabled
    Token jobToken = TokenCache.getJobToken(conf
        .getCredentials());
    // This password is used as shared secret key between this application and
    // child pipes process
    byte[]  password = jobToken.getPassword();
    String localPasswordFile = new File(".") + Path.SEPARATOR
        + "jobTokenPassword";
    writePasswordToLocalFile(localPasswordFile, password, conf);
    env.put("hadoop.pipes.shared.secret.location", localPasswordFile);
 
    List cmd = new ArrayList();
    String interpretor = conf.get(Submitter.INTERPRETOR);
    if (interpretor != null) {
      cmd.add(interpretor);
    }
    String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
    if (!FileUtil.canExecute(new File(executable))) {
      // LinuxTaskController sets +x permissions on all distcache files already.
      // In case of DefaultTaskController, set permissions here.
      FileUtil.chmod(executable, "u+x");
    }
    cmd.add(executable);
    // wrap the org.apache.hadoop.shaded.com.and in a stdout/stderr capture
    // we are starting map/reduce task of the pipes job. this is not a cleanup
    // attempt. 
    TaskAttemptID taskid = 
      TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID));
    File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
    File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
    long logLength = TaskLog.getTaskLogLength(conf);
    cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength,
                                     false);
    
    process = runClient(cmd, env);
    clientSocket = serverSocket.accept();
    // start ping socket cleaner
    int soTimeout = conf.getInt(CommonConfigurationKeys.IPC_PING_INTERVAL_KEY,
        CommonConfigurationKeys.IPC_PING_INTERVAL_DEFAULT);
    socketCleaner = new PingSocketCleaner("ping-socket-cleaner", serverSocket,
                                          soTimeout);
    socketCleaner.setDaemon(true);
    socketCleaner.start();
    
    String challenge = getSecurityChallenge();
    String digestToSend = createDigest(password, challenge);
    String digestExpected = createDigest(password, digestToSend);
    
    handler = new OutputHandler(output, reporter, recordReader, 
        digestExpected);
    K2 outputKey = (K2)
      ReflectionUtils.newInstance(outputKeyClass, conf);
    V2 outputValue = (V2) 
      ReflectionUtils.newInstance(outputValueClass, conf);
    downlink = new BinaryProtocol(clientSocket, handler, 
                                  outputKey, outputValue, conf);
    
    downlink.authenticate(digestToSend, challenge);
    waitForAuthentication();
    LOG.debug("Authentication succeeded");
    downlink.start();
    downlink.setJobConf(conf);
  }

  private String getSecurityChallenge() {
    Random rand = new Random(System.currentTimeMillis());
    //Use 4 random integers so as to have 16 random bytes.
    StringBuilder strBuilder = new StringBuilder();
    strBuilder.append(rand.nextInt(0x7fffffff));
    strBuilder.append(rand.nextInt(0x7fffffff));
    strBuilder.append(rand.nextInt(0x7fffffff));
    strBuilder.append(rand.nextInt(0x7fffffff));
    return strBuilder.toString();
  }

  private void writePasswordToLocalFile(String localPasswordFile,
      byte[] password, JobConf conf) throws IOException {
    FileSystem localFs = FileSystem.getLocal(conf);
    Path localPath = new Path(localPasswordFile);
    FSDataOutputStream out = FileSystem.create(localFs, localPath,
        new FsPermission("400"));
    out.write(password);
    out.close();
  }

  /**
   * Get the downward protocol object that can send org.apache.hadoop.shaded.com.ands down to the
   * application.
   * @return the downlink proxy
   */
  DownwardProtocol getDownlink() {
    return downlink;
  }
  
  /**
   * Wait for authentication response.
   * @throws IOException
   * @throws InterruptedException
   */
  void waitForAuthentication() throws IOException,
      InterruptedException {
    downlink.flush();
    LOG.debug("Waiting for authentication response");
    handler.waitForAuthentication();
  }
  
  /**
   * Wait for the application to finish
   * @return did the application finish correctly?
   * @throws Throwable
   */
  boolean waitForFinish() throws Throwable {
    downlink.flush();
    return handler.waitForFinish();
  }

  /**
   * Abort the application and wait for it to finish.
   * @param t the exception that signalled the problem
   * @throws IOException A wrapper around the exception that was passed in
   */
  void abort(Throwable t) throws IOException {
    LOG.info("Aborting because of " + StringUtils.stringifyException(t));
    try {
      downlink.abort();
      downlink.flush();
    } catch (IOException e) {
      // IGNORE cleanup problems
    }
    try {
      handler.waitForFinish();
    } catch (Throwable ignored) {
      process.destroy();
    }
    IOException wrapper = new IOException("pipe child exception");
    wrapper.initCause(t);
    throw wrapper;      
  }
  
  /**
   * Clean up the child procress and socket.
   * @throws IOException
   */
  void cleanup() throws IOException {
    serverSocket.close();
    try {
      downlink.close();
      socketCleaner.interrupt();
    } catch (InterruptedException ie) {
      Thread.currentThread().interrupt();
    }      
  }

  /**
   * Run a given org.apache.hadoop.shaded.com.and in a subprocess, including threads to copy its stdout
   * and stderr to our stdout and stderr.
   * @param org.apache.hadoop.shaded.com.and the org.apache.hadoop.shaded.com.and and its arguments
   * @param env the environment to run the process in
   * @return a handle on the process
   * @throws IOException
   */
  static Process runClient(List org.apache.hadoop.shaded.com.and, 
                           Map env) throws IOException {
    ProcessBuilder builder = new ProcessBuilder(org.apache.hadoop.shaded.com.and);
    if (env != null) {
      builder.environment().putAll(env);
    }
    Process result = builder.start();
    return result;
  }
  
  public static String createDigest(byte[] password, String data)
      throws IOException {
    SecretKey key = JobTokenSecretManager.createSecretKey(password);
    return SecureShuffleUtils.hashFromString(data, key);
  }

  @VisibleForTesting
  public static class PingSocketCleaner extends Thread {
    private final ServerSocket serverSocket;
    private final int soTimeout;

    PingSocketCleaner(String name, ServerSocket serverSocket, int soTimeout) {
      super(name);
      this.serverSocket = serverSocket;
      this.soTimeout = soTimeout;
    }

    @Override
    public void run() {
      LOG.info("PingSocketCleaner started...");
      while (!Thread.currentThread().isInterrupted()) {
        Socket clientSocket = null;
        try {
          clientSocket = serverSocket.accept();
          clientSocket.setSoTimeout(soTimeout);
          LOG.debug("Connection received from {}",
                    clientSocket.getInetAddress());
          int readData = 0;
          while (readData != -1) {
            readData = clientSocket.getInputStream().read();
          }
          LOG.debug("close socket cause client has closed.");
          closeSocketInternal(clientSocket);
        } catch (IOException exception) {
          LOG.error("PingSocketCleaner exception", exception);
        } finally {
          closeSocketInternal(clientSocket);
        }
      }
    }

    @VisibleForTesting
    protected void closeSocketInternal(Socket clientSocket) {
      IOUtils.closeSocket(clientSocket);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy