Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.common;
import java.io.EOFException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.security.AccessControlException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import com.facebook.presto.hive.$internal.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.GlobFilter;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.Trash;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.conf.HiveConfUtil;
import org.apache.hadoop.hive.io.HdfsUtils;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.hive.shims.Utils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.hive.common.util.ShutdownHookManager;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;
/**
* Collection of file manipulation utilities common across Hive.
*/
public final class FileUtils {
private static final Logger LOG = LoggerFactory.getLogger(FileUtils.class.getName());
private static final Random random = new Random();
public static final PathFilter HIDDEN_FILES_PATH_FILTER = new PathFilter() {
@Override
public boolean accept(Path p) {
String name = p.getName();
return !name.startsWith("_") && !name.startsWith(".");
}
};
public static final PathFilter STAGING_DIR_PATH_FILTER = new PathFilter() {
@Override
public boolean accept(Path p) {
String name = p.getName();
return !name.startsWith(".");
}
};
/**
* Variant of Path.makeQualified that qualifies the input path against the default file system
* indicated by the configuration
*
* This does not require a FileSystem handle in most cases - only requires the Filesystem URI.
* This saves the cost of opening the Filesystem - which can involve RPCs - as well as cause
* errors
*
* @param path
* path to be fully qualified
* @param conf
* Configuration file
* @return path qualified relative to default file system
*/
public static Path makeQualified(Path path, Configuration conf) throws IOException {
if (!path.isAbsolute()) {
// in this case we need to get the working directory
// and this requires a FileSystem handle. So revert to
// original method.
return path.makeQualified(FileSystem.get(conf));
}
URI fsUri = FileSystem.getDefaultUri(conf);
URI pathUri = path.toUri();
String scheme = pathUri.getScheme();
String authority = pathUri.getAuthority();
// validate/fill-in scheme and authority. this follows logic
// identical to FileSystem.get(URI, conf) - but doesn't actually
// obtain a file system handle
if (scheme == null) {
// no scheme - use default file system uri
scheme = fsUri.getScheme();
authority = fsUri.getAuthority();
if (authority == null) {
authority = "";
}
} else {
if (authority == null) {
// no authority - use default one if it applies
if (scheme.equals(fsUri.getScheme()) && fsUri.getAuthority() != null) {
authority = fsUri.getAuthority();
} else {
authority = "";
}
}
}
return new Path(scheme, authority, pathUri.getPath());
}
private FileUtils() {
// prevent instantiation
}
public static String makePartName(List partCols, List vals) {
return makePartName(partCols, vals, null);
}
/**
* Makes a valid partition name.
* @param partCols The partition keys' names
* @param vals The partition values
* @param defaultStr
* The default name given to a partition value if the respective value is empty or null.
* @return An escaped, valid partition name.
*/
public static String makePartName(List partCols, List vals,
String defaultStr) {
StringBuilder name = new StringBuilder();
for (int i = 0; i < partCols.size(); i++) {
if (i > 0) {
name.append(Path.SEPARATOR);
}
name.append(escapePathName((partCols.get(i)).toLowerCase(), defaultStr));
name.append('=');
name.append(escapePathName(vals.get(i), defaultStr));
}
return name.toString();
}
/**
* default directory will have the same depth as number of skewed columns
* this will make future operation easy like DML merge, concatenate merge
* @param skewedCols
* @param name
* @return
*/
public static String makeDefaultListBucketingDirName(List skewedCols,
String name) {
String lbDirName;
String defaultDir = FileUtils.escapePathName(name);
StringBuilder defaultDirPath = new StringBuilder();
for (int i = 0; i < skewedCols.size(); i++) {
if (i > 0) {
defaultDirPath.append(Path.SEPARATOR);
}
defaultDirPath.append(defaultDir);
}
lbDirName = defaultDirPath.toString();
return lbDirName;
}
/**
* Makes a valid list bucketing directory name.
* @param lbCols The skewed keys' names
* @param vals The skewed values
* @return An escaped, valid list bucketing directory name.
*/
public static String makeListBucketingDirName(List lbCols, List vals) {
StringBuilder name = new StringBuilder();
for (int i = 0; i < lbCols.size(); i++) {
if (i > 0) {
name.append(Path.SEPARATOR);
}
name.append(escapePathName((lbCols.get(i)).toLowerCase()));
name.append('=');
name.append(escapePathName(vals.get(i)));
}
return name.toString();
}
// NOTE: This is for generating the internal path name for partitions. Users
// should always use the MetaStore API to get the path name for a partition.
// Users should not directly take partition values and turn it into a path
// name by themselves, because the logic below may change in the future.
//
// In the future, it's OK to add new chars to the escape list, and old data
// won't be corrupt, because the full path name in metastore is stored.
// In that case, Hive will continue to read the old data, but when it creates
// new partitions, it will use new names.
// edit : There are some use cases for which adding new chars does not seem
// to be backward compatible - Eg. if partition was created with name having
// a special char that you want to start escaping, and then you try dropping
// the partition with a hive version that now escapes the special char using
// the list below, then the drop partition fails to work.
static BitSet charToEscape = new BitSet(128);
static {
for (char c = 0; c < ' '; c++) {
charToEscape.set(c);
}
/**
* ASCII 01-1F are HTTP control characters that need to be escaped.
* \u000A and \u000D are \n and \r, respectively.
*/
char[] clist = new char[] {'\u0001', '\u0002', '\u0003', '\u0004',
'\u0005', '\u0006', '\u0007', '\u0008', '\u0009', '\n', '\u000B',
'\u000C', '\r', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012',
'\u0013', '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019',
'\u001A', '\u001B', '\u001C', '\u001D', '\u001E', '\u001F',
'"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F', '{',
'[', ']', '^'};
for (char c : clist) {
charToEscape.set(c);
}
}
static boolean needsEscaping(char c) {
return c >= 0 && c < charToEscape.size() && charToEscape.get(c);
}
public static String escapePathName(String path) {
return escapePathName(path, null);
}
/**
* Escapes a path name.
* @param path The path to escape.
* @param defaultPath
* The default name for the path, if the given path is empty or null.
* @return An escaped path name.
*/
public static String escapePathName(String path, String defaultPath) {
// __HIVE_DEFAULT_NULL__ is the system default value for null and empty string.
// TODO: we should allow user to specify default partition or HDFS file location.
if (path == null || path.length() == 0) {
if (defaultPath == null) {
//previously, when path is empty or null and no default path is specified,
// __HIVE_DEFAULT_PARTITION__ was the return value for escapePathName
return "__HIVE_DEFAULT_PARTITION__";
} else {
return defaultPath;
}
}
StringBuilder sb = new StringBuilder();
for (int i = 0; i < path.length(); i++) {
char c = path.charAt(i);
if (needsEscaping(c)) {
sb.append('%');
sb.append(String.format("%1$02X", (int) c));
} else {
sb.append(c);
}
}
return sb.toString();
}
public static String unescapePathName(String path) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < path.length(); i++) {
char c = path.charAt(i);
if (c == '%' && i + 2 < path.length()) {
int code = -1;
try {
code = Integer.parseInt(path.substring(i + 1, i + 3), 16);
} catch (Exception e) {
code = -1;
}
if (code >= 0) {
sb.append((char) code);
i += 2;
continue;
}
}
sb.append(c);
}
return sb.toString();
}
/**
* Recursively lists status for all files starting from a particular directory (or individual file
* as base case).
*
* @param fs
* file system
*
* @param fileStatus
* starting point in file system
*
* @param results
* receives enumeration of all files found
*/
public static void listStatusRecursively(FileSystem fs, FileStatus fileStatus,
List results) throws IOException {
listStatusRecursively(fs, fileStatus, HIDDEN_FILES_PATH_FILTER, results);
}
public static void listStatusRecursively(FileSystem fs, FileStatus fileStatus,
PathFilter filter, List results) throws IOException {
if (fileStatus.isDir()) {
for (FileStatus stat : fs.listStatus(fileStatus.getPath(), filter)) {
listStatusRecursively(fs, stat, results);
}
} else {
results.add(fileStatus);
}
}
/**
* Find the parent of path that exists, if path does not exist
*
* @param fs
* file system
* @param path
* @return FileStatus for argument path if it exists or the first ancestor in the path that exists
* @throws IOException
*/
public static FileStatus getPathOrParentThatExists(FileSystem fs, Path path) throws IOException {
FileStatus stat = FileUtils.getFileStatusOrNull(fs, path);
if (stat != null) {
return stat;
}
Path parentPath = path.getParent();
return getPathOrParentThatExists(fs, parentPath);
}
public static void checkFileAccessWithImpersonation(final FileSystem fs, final FileStatus stat,
final FsAction action, final String user)
throws IOException, AccessControlException, InterruptedException, Exception {
checkFileAccessWithImpersonation(fs, stat, action, user, null);
}
/**
* Perform a check to determine if the user is able to access the file passed in.
* If the user name passed in is different from the current user, this method will
* attempt to do impersonate the user to do the check; the current user should be
* able to create proxy users in this case.
* @param fs FileSystem of the path to check
* @param stat FileStatus representing the file
* @param action FsAction that will be checked
* @param user User name of the user that will be checked for access. If the user name
* is null or the same as the current user, no user impersonation will be done
* and the check will be done as the current user. Otherwise the file access
* check will be performed within a doAs() block to use the access privileges
* of this user. In this case the user must be configured to impersonate other
* users, otherwise this check will fail with error.
* @param children List of children to be collected. If this is null, no children are collected.
* To be set only if this is a directory
* @throws IOException
* @throws AccessControlException
* @throws InterruptedException
* @throws Exception
*/
public static void checkFileAccessWithImpersonation(final FileSystem fs,
final FileStatus stat, final FsAction action, final String user, final List children)
throws IOException, AccessControlException, InterruptedException, Exception {
UserGroupInformation ugi = Utils.getUGI();
String currentUser = ugi.getShortUserName();
if (user == null || currentUser.equals(user)) {
// No need to impersonate user, do the checks as the currently configured user.
ShimLoader.getHadoopShims().checkFileAccess(fs, stat, action);
addChildren(fs, stat.getPath(), children);
return;
}
// Otherwise, try user impersonation. Current user must be configured to do user impersonation.
UserGroupInformation proxyUser = UserGroupInformation.createProxyUser(
user, UserGroupInformation.getLoginUser());
try {
proxyUser.doAs(new PrivilegedExceptionAction