All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tez.common.TezCommonUtils Maven / Gradle / Ivy

There is a newer version: 0.10.4
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tez.common;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.ArrayUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.tez.client.TezClient;
import org.apache.tez.common.security.JobTokenIdentifier;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.TezUncheckedException;

import com.google.protobuf.ByteString;

@Private
public class TezCommonUtils {
  public static final FsPermission TEZ_AM_DIR_PERMISSION = FsPermission
      .createImmutable((short) 0700); // rwx--------
  public static final FsPermission TEZ_AM_FILE_PERMISSION = FsPermission
      .createImmutable((short) 0644); // rw-r--r--
  private static final Logger LOG = LoggerFactory.getLogger(TezClient.class);

  public static final String TEZ_SYSTEM_SUB_DIR = ".tez";

  /**
   * 

* This function returns the staging directory defined in the config with * property name TezConfiguration.TEZ_AM_STAGING_DIR. If the * property is not defined in the conf, Tez uses the value defined as * TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT. In addition, the * function makes sure if the staging directory exists. If not, it creates the * directory with permission TEZ_AM_DIR_PERMISSION. *

* * @param conf * TEZ configuration * @return Fully qualified staging directory */ public static Path getTezBaseStagingPath(Configuration conf) { String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT); Path baseStagingDir; try { Path p = new Path(stagingDirStr); FileSystem fs = p.getFileSystem(conf); if (!fs.exists(p)) { mkDirForAM(fs, p); LOG.info("Stage directory " + p + " doesn't exist and is created"); } baseStagingDir = fs.resolvePath(p); } catch (IOException e) { throw new TezUncheckedException(e); } return baseStagingDir; } /** *

* This function returns the staging directory for TEZ system. Tez creates all * its temporary files under this sub-directory. The function creates a * sub-directory (TEZ_SYSTEM_SUB_DIR/) under the base * staging directory, often provided by user. *

* * @param conf * Tez configuration * @param strAppId * Application ID as string * @return TEZ system level staging directory used for Tez internals */ @Private public static Path createTezSystemStagingPath(Configuration conf, String strAppId) { Path baseStagingPath = getTezBaseStagingPath(conf); Path tezStagingDir; try { tezStagingDir = new Path(baseStagingPath, TEZ_SYSTEM_SUB_DIR); FileSystem fs = tezStagingDir.getFileSystem(conf); tezStagingDir = new Path(tezStagingDir, strAppId); if (!fs.exists(tezStagingDir)) { mkDirForAM(fs, tezStagingDir); LOG.info("Tez system stage directory " + tezStagingDir + " doesn't exist and is created"); } } catch (IOException e) { throw new TezUncheckedException(e); } return tezStagingDir; } /** *

* This function returns the staging directory for TEZ system. Tez creates all * its temporary files under this sub-directory. The function normally doesn't * creates any sub-directory under the base staging directory. *

* * @param conf * Tez configuration * @param strAppId * Application ID as string * @return TEZ system level staging directory used for Tez internals */ @Private public static Path getTezSystemStagingPath(Configuration conf, String strAppId) { Path baseStagingPath = getTezBaseStagingPath(conf); Path tezStagingDir; tezStagingDir = new Path(baseStagingPath, TEZ_SYSTEM_SUB_DIR); tezStagingDir = new Path(tezStagingDir, strAppId); return tezStagingDir; } /** *

* Returns a path to store binary configuration *

* * @param tezSysStagingPath * TEZ system level staging directory used for Tez internals * @return path to configuration */ @Private public static Path getTezConfStagingPath(Path tezSysStagingPath) { return new Path(tezSysStagingPath, TezConstants.TEZ_PB_BINARY_CONF_NAME); } /** *

* Returns a path to store local resources/session jars *

* * @param tezSysStagingPath * TEZ system level staging directory used for Tez internals * @return path to store the session jars */ @Private public static Path getTezAMJarStagingPath(Path tezSysStagingPath) { return new Path(tezSysStagingPath, TezConstants.TEZ_AM_LOCAL_RESOURCES_PB_FILE_NAME); } /** *

* Returns a path to store binary plan *

* * @param tezSysStagingPath * TEZ system level staging directory used for Tez internals * @return path to store the plan in binary */ @Private public static Path getTezBinPlanStagingPath(Path tezSysStagingPath) { return new Path(tezSysStagingPath, TezConstants.TEZ_PB_PLAN_BINARY_NAME); } /** *

* Returns a path to store text plan *

* * @param tezSysStagingPath * TEZ system level staging directory used for Tez internals * @param strAppId * Application ID * @param dagPBName * DAG PB Name * @return path to store the plan in text */ @Private public static Path getTezTextPlanStagingPath(Path tezSysStagingPath, String strAppId, String dagPBName) { String fileName = strAppId + "-" + dagPBName + "-" + TezConstants.TEZ_PB_PLAN_TEXT_NAME; return new Path(tezSysStagingPath, fileName); } /** *

* Returns a path to store recovery information *

* * @param tezSysStagingPath * TEZ system level staging directory used for Tez internals * @param conf * Tez configuration * @return App recovery path * @throws IOException */ @Private public static Path getRecoveryPath(Path tezSysStagingPath, Configuration conf) throws IOException { Path baseReecoveryPath = new Path(tezSysStagingPath, TezConstants.DAG_RECOVERY_DATA_DIR_NAME); FileSystem recoveryFS = baseReecoveryPath.getFileSystem(conf); return recoveryFS.makeQualified(baseReecoveryPath); } /** *

* Returns a path to store app attempt specific recovery details *

* * @param recoveryPath * TEZ recovery directory used for Tez internals * @param attemptID * Application Attempt Id * @return App attempt specific recovery path */ @Private public static Path getAttemptRecoveryPath(Path recoveryPath, int attemptID) { return new Path(recoveryPath, Integer.toString(attemptID)); } /** *

* Returns a path to store DAG specific recovery info *

* * @param attemptRecoverPath * :TEZ system level staging directory used for Tez internals * @param dagID * DagID as string * @return DAG specific recovery path */ @Private public static Path getDAGRecoveryPath(Path attemptRecoverPath, String dagID) { return new Path(attemptRecoverPath, dagID + TezConstants.DAG_RECOVERY_RECOVER_FILE_SUFFIX); } /** *

* Returns a path to store summary info for recovery *

* * @param attemptRecoverPath * TEZ system level staging directory used for Tez internals * @return Summary event path used in recovery */ @Private public static Path getSummaryRecoveryPath(Path attemptRecoverPath) { return new Path(attemptRecoverPath, TezConstants.DAG_RECOVERY_SUMMARY_FILE_SUFFIX); } /** *

* Create a directory with predefined directory permission *

* * @param fs * Filesystem * @param dir * directory to be created * @throws IOException */ public static void mkDirForAM(FileSystem fs, Path dir) throws IOException { fs.mkdirs(dir, new FsPermission(TEZ_AM_DIR_PERMISSION)); } /** *

* Create a file with TEZ_AM_FILE_PERMISSION permission and * returns OutputStream *

* * @param fs * Filesystem * @param filePath * file path to create the file * @return FSDataOutputStream * @throws IOException */ public static FSDataOutputStream createFileForAM(FileSystem fs, Path filePath) throws IOException { return FileSystem.create(fs, filePath, new FsPermission(TEZ_AM_FILE_PERMISSION)); } public static void addAdditionalLocalResources(Map additionalLrs, Map originalLRs, String logContext) { // TODO TEZ-1798. Handle contents of Tez archives for duplicate LocalResource checks if (additionalLrs != null && !additionalLrs.isEmpty()) { StringBuilder sb = new StringBuilder(); for (Map.Entry lrEntry : additionalLrs.entrySet()) { LocalResource originalLr = originalLRs.get(lrEntry.getKey()); if (originalLr != null) { LocalResource additionalLr = lrEntry.getValue(); if (originalLr.getSize() != additionalLr.getSize()) { throw new TezUncheckedException( "Duplicate Resources found with different size for [" + logContext + "]: " + lrEntry.getKey() + " : " + "[" + additionalLr.getResource() + "=" + additionalLr.getSize() + "],[" + originalLr.getResource() + "=" + originalLr.getSize()); } else { if (originalLr.getResource().equals(additionalLr.getResource())) { sb.append("[").append(lrEntry.getKey()).append(" : Duplicate]"); } else { sb.append("[").append(lrEntry.getKey()).append(" : DuplicateDifferentPath]"); } } } // The LR either does not exist, or is an 'equivalent' dupe. // Prefer the tez specified LR instead of the equivalent user specified LR for container reuse matching originalLRs.put(lrEntry.getKey(), lrEntry.getValue()); } String logString = sb.toString(); if (!logString.isEmpty()) { LOG.warn("Found Resources Duplication in " + logContext + " after including resources from " + TezConfiguration.TEZ_LIB_URIS + " and " + TezConfiguration.TEZ_AUX_URIS + ": " + logString); } } } private static final boolean NO_WRAP = true; @Private public static Deflater newBestCompressionDeflater() { return new Deflater(Deflater.BEST_COMPRESSION, NO_WRAP); } @Private public static Deflater newBestSpeedDeflater() { return new Deflater(Deflater.BEST_SPEED, NO_WRAP); } @Private public static Inflater newInflater() { return new Inflater(NO_WRAP); } @Private public static ByteString compressByteArrayToByteString(byte[] inBytes) throws IOException { return compressByteArrayToByteString(inBytes, newBestCompressionDeflater()); } @Private public static ByteString compressByteArrayToByteString(byte[] inBytes, Deflater deflater) throws IOException { deflater.reset(); ByteString.Output os = ByteString.newOutput(); DeflaterOutputStream compressOs = null; try { compressOs = new DeflaterOutputStream(os, deflater); compressOs.write(inBytes); compressOs.finish(); ByteString byteString = os.toByteString(); return byteString; } finally { if (compressOs != null) { compressOs.close(); } } } @Private public static byte[] decompressByteStringToByteArray(ByteString byteString) throws IOException { return decompressByteStringToByteArray(byteString, newInflater()); } @Private public static byte[] decompressByteStringToByteArray(ByteString byteString, Inflater inflater) throws IOException { inflater.reset(); return IOUtils.toByteArray(new InflaterInputStream(byteString.newInput(), inflater)); } public static String getCredentialsInfo(Credentials credentials, String identifier) { StringBuilder sb = new StringBuilder(); sb.append("Credentials: #" + identifier + "Tokens=").append(credentials.numberOfTokens()); if (credentials.numberOfTokens() > 0) { sb.append(", Services="); Iterator> tokenItr = credentials.getAllTokens().iterator(); if (tokenItr.hasNext()) { Token token = tokenItr.next(); sb.append(token.getService()).append("(").append(token.getKind()).append(")"); } while(tokenItr.hasNext()) { Token token = tokenItr.next(); sb.append(",").append(token.getService()).append("(").append(token.getKind()).append(")"); } } return sb.toString(); } public static ByteBuffer convertJobTokenToBytes( Token jobToken) throws IOException { DataOutputBuffer dob = new DataOutputBuffer(); jobToken.write(dob); ByteBuffer bb = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); return bb; } public static Credentials parseCredentialsBytes(byte[] credentialsBytes) throws IOException { Credentials credentials = new Credentials(); DataInputBuffer dib = new DataInputBuffer(); try { byte[] tokenBytes = credentialsBytes; dib.reset(tokenBytes, tokenBytes.length); credentials.readTokenStorageStream(dib); return credentials; } finally { dib.close(); } } public static void logCredentials(Logger log, Credentials credentials, String identifier) { if (log.isDebugEnabled()) { log.debug(getCredentialsInfo(credentials, identifier)); } } public static Collection tokenizeString(String str, String delim) { List values = new ArrayList(); if (str == null || str.isEmpty()) return values; StringTokenizer tokenizer = new StringTokenizer(str, delim); while (tokenizer.hasMoreTokens()) { values.add(tokenizer.nextToken()); } return values; } /** * Splits a comma separated value String, trimming leading and trailing whitespace on each value. * @param str a comma separated with values * @return an array of String values */ public static String[] getTrimmedStrings(String str) { if (null == str || (str = str.trim()).isEmpty()) { return ArrayUtils.EMPTY_STRING_ARRAY; } return str.split("\\s*,\\s*"); } /** * A helper function to serialize the JobTokenIdentifier to be sent to the * ShuffleHandler as ServiceData. * * *NOTE* This is a copy of what is done by the MapReduce ShuffleHandler. Not using that directly * to avoid a dependency on mapreduce. * * @param jobToken * the job token to be used for authentication of shuffle data * requests. * @return the serialized version of the jobToken. */ public static ByteBuffer serializeServiceData(Token jobToken) throws IOException { // TODO these bytes should be versioned DataOutputBuffer jobToken_dob = new DataOutputBuffer(); jobToken.write(jobToken_dob); return ByteBuffer.wrap(jobToken_dob.getData(), 0, jobToken_dob.getLength()); } public static String getSystemPropertiesToLog(Configuration conf) { Collection keys = conf.getTrimmedStringCollection( TezConfiguration.TEZ_JVM_SYSTEM_PROPERTIES_TO_LOG); if (keys.isEmpty()) { keys = TezConfiguration.TEZ_JVM_SYSTEM_PROPERTIES_TO_LOG_DEFAULT; } StringBuilder sb = new StringBuilder(); sb.append("\n/************************************************************\n"); sb.append("[system properties]\n"); for (String key : keys) { sb.append(key).append(": ").append(System.getProperty(key)).append('\n'); } sb.append("************************************************************/"); return sb.toString(); } /** * Helper function to get the heartbeat interval for client-AM heartbeats * See {@link TezConfiguration#TEZ_AM_CLIENT_HEARTBEAT_TIMEOUT_SECS} for more details. * @param conf Configuration object * @return heartbeat interval in milliseconds. -1 implies disabled. */ public static long getAMClientHeartBeatTimeoutMillis(Configuration conf) { int val = conf.getInt(TezConfiguration.TEZ_AM_CLIENT_HEARTBEAT_TIMEOUT_SECS, TezConfiguration.TEZ_AM_CLIENT_HEARTBEAT_TIMEOUT_SECS_DEFAULT); if (val < 0) { return -1; } if (val > 0 && val < TezConstants.TEZ_AM_CLIENT_HEARTBEAT_TIMEOUT_SECS_MINIMUM) { return TezConstants.TEZ_AM_CLIENT_HEARTBEAT_TIMEOUT_SECS_MINIMUM * 1000; } return val * 1000; } /** * Helper function to get the poll interval for client-AM heartbeats. * @param conf Configuration object * @param heartbeatIntervalMillis Heartbeat interval in milliseconds * @param buckets How many times to poll within the provided heartbeat interval * @return poll interval in milliseconds */ public static long getAMClientHeartBeatPollIntervalMillis(Configuration conf, long heartbeatIntervalMillis, int buckets) { if (heartbeatIntervalMillis <= 0) { return -1; } int pollInterval = conf.getInt(TezConfiguration.TEZ_AM_CLIENT_HEARTBEAT_POLL_INTERVAL_MILLIS, TezConfiguration.TEZ_AM_CLIENT_HEARTBEAT_POLL_INTERVAL_MILLIS_DEFAULT); if (pollInterval > 0) { return Math.max(TezConstants.TEZ_AM_CLIENT_HEARTBEAT_POLL_INTERVAL_MILLIS_MINIMUM, pollInterval); } return Math.max(TezConstants.TEZ_AM_CLIENT_HEARTBEAT_POLL_INTERVAL_MILLIS_MINIMUM, heartbeatIntervalMillis/buckets); } public static long getDAGSessionTimeout(Configuration conf) { int timeoutSecs = conf.getInt( TezConfiguration.TEZ_SESSION_AM_DAG_SUBMIT_TIMEOUT_SECS, TezConfiguration.TEZ_SESSION_AM_DAG_SUBMIT_TIMEOUT_SECS_DEFAULT); if (timeoutSecs < 0) { return -1; } // Handle badly configured value to minimize impact of a spinning thread if (timeoutSecs == 0) { timeoutSecs = 1; } return 1000l * timeoutSecs; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy