org.apache.hadoop.hdfs.web.WebHdfsFileSystem Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in org.apache.hadoop.shaded.com.liance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org.apache.hadoop.shaded.org.licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.shaded.org.apache.hadoop.hdfs.web;
import static org.apache.hadoop.shaded.org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_WEBHDFS_REST_CSRF_CUSTOM_HEADER_DEFAULT;
import static org.apache.hadoop.shaded.org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_WEBHDFS_REST_CSRF_CUSTOM_HEADER_KEY;
import static org.apache.hadoop.shaded.org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_WEBHDFS_REST_CSRF_ENABLED_DEFAULT;
import static org.apache.hadoop.shaded.org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_WEBHDFS_REST_CSRF_ENABLED_KEY;
import static org.apache.hadoop.shaded.org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_WEBHDFS_REST_CSRF_METHODS_TO_IGNORE_DEFAULT;
import static org.apache.hadoop.shaded.org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_WEBHDFS_REST_CSRF_METHODS_TO_IGNORE_KEY;
import java.org.apache.hadoop.shaded.io.BufferedInputStream;
import java.org.apache.hadoop.shaded.io.BufferedOutputStream;
import java.org.apache.hadoop.shaded.io.EOFException;
import java.org.apache.hadoop.shaded.io.FileNotFoundException;
import java.org.apache.hadoop.shaded.io.IOException;
import java.org.apache.hadoop.shaded.io.InputStream;
import java.lang.reflect.InvocationTargetException;
import java.org.apache.hadoop.shaded.net.HttpURLConnection;
import java.org.apache.hadoop.shaded.net.InetSocketAddress;
import java.org.apache.hadoop.shaded.net.MalformedURLException;
import java.org.apache.hadoop.shaded.net.URI;
import java.org.apache.hadoop.shaded.net.URL;
import java.nio.charset.StandardCharsets;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Base64.Decoder;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.shaded.javax.ws.rs.core.HttpHeaders;
import org.apache.hadoop.shaded.javax.ws.rs.core.MediaType;
import org.apache.hadoop.shaded.org.apache.org.apache.hadoop.shaded.com.ons.org.apache.hadoop.shaded.io.IOUtils;
import org.apache.hadoop.shaded.org.apache.org.apache.hadoop.shaded.com.ons.org.apache.hadoop.shaded.io.input.BoundedInputStream;
import org.apache.hadoop.shaded.org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.shaded.org.apache.hadoop.crypto.key.KeyProvider;
import org.apache.hadoop.shaded.org.apache.hadoop.crypto.key.KeyProviderTokenIssuer;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.CommonPathCapabilities;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.DelegationTokenRenewer;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.FileEncryptionInfo;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.FsServerDefaults;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.GlobalStorageStatistics;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.GlobalStorageStatistics.StorageStatisticsProvider;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.MultipartUploaderBuilder;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.QuotaUsage;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.PathCapabilities;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.StorageStatistics;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.impl.FileSystemMultipartUploaderBuilder;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.permission.FsCreateModes;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.DFSOpsCountStatistics;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.DFSOpsCountStatistics.OpType;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.Options;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.Path;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.XAttrCodec;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.XAttrSetFlag;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.permission.AclStatus;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.DFSUtilClient;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.HAUtilClient;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.HdfsKMSUtil;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.client.DfsPathCapabilities;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.protocol.DirectoryListing;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.FileEncryptionInfoProto;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.protocolPB.PBHelperClient;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.web.resources.*;
import org.apache.hadoop.shaded.org.apache.hadoop.hdfs.web.resources.HttpOpParam.Op;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.Text;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.retry.RetryPolicies;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.retry.RetryPolicy;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.retry.RetryUtils;
import org.apache.hadoop.shaded.org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.shaded.org.apache.hadoop.ipc.StandbyException;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.net.NetUtils;
import org.apache.hadoop.shaded.org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.shaded.org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.shaded.org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.shaded.org.apache.hadoop.security.token.SecretManager.InvalidToken;
import org.apache.hadoop.shaded.org.apache.hadoop.security.token.Token;
import org.apache.hadoop.shaded.org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.shaded.org.apache.hadoop.security.token.TokenSelector;
import org.apache.hadoop.shaded.org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSelector;
import org.apache.hadoop.shaded.org.apache.hadoop.security.token.DelegationTokenIssuer;
import org.apache.hadoop.shaded.org.apache.hadoop.util.JsonSerialization;
import org.apache.hadoop.shaded.org.apache.hadoop.util.KMSUtil;
import org.apache.hadoop.shaded.org.apache.hadoop.util.Progressable;
import org.apache.hadoop.shaded.org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.shaded.org.slf4j.Logger;
import org.apache.hadoop.shaded.org.slf4j.LoggerFactory;
import org.apache.hadoop.shaded.org.apache.hadoop.thirdparty.org.apache.hadoop.shaded.com.google.org.apache.hadoop.shaded.com.on.annotations.VisibleForTesting;
import org.apache.hadoop.shaded.org.apache.hadoop.thirdparty.org.apache.hadoop.shaded.com.google.org.apache.hadoop.shaded.com.on.base.Charsets;
import org.apache.hadoop.shaded.org.apache.hadoop.thirdparty.org.apache.hadoop.shaded.com.google.org.apache.hadoop.shaded.com.on.base.Preconditions;
import org.apache.hadoop.shaded.org.apache.hadoop.thirdparty.org.apache.hadoop.shaded.com.google.org.apache.hadoop.shaded.com.on.collect.Lists;
import static org.apache.hadoop.shaded.org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs;
/** A FileSystem for HDFS over the web. */
public class WebHdfsFileSystem extends FileSystem
implements DelegationTokenRenewer.Renewable,
TokenAspect.TokenManagementDelegator, KeyProviderTokenIssuer {
public static final Logger LOG = LoggerFactory
.getLogger(WebHdfsFileSystem.class);
/** WebHdfs version. */
public static final int VERSION = 1;
/** Http URI: http://namenode:port/{PATH_PREFIX}/path/to/file */
public static final String PATH_PREFIX = "/" + WebHdfsConstants.WEBHDFS_SCHEME
+ "/v" + VERSION;
public static final String EZ_HEADER = "X-Hadoop-Accept-EZ";
public static final String FEFINFO_HEADER = "X-Hadoop-feInfo";
public static final String DFS_HTTP_POLICY_KEY = "dfs.http.policy";
/**
* Default connection factory may be overridden in tests to use smaller
* timeout values
*/
protected URLConnectionFactory connectionFactory;
@VisibleForTesting
public static final String CANT_FALLBACK_TO_INSECURE_MSG =
"The client is configured to only allow connecting to secure cluster";
private boolean canRefreshDelegationToken;
private UserGroupInformation ugi;
private URI uri;
private Token> delegationToken;
protected Text tokenServiceName;
private RetryPolicy retryPolicy = null;
private Path workingDir;
private Path cachedHomeDirectory;
private InetSocketAddress nnAddrs[];
private int currentNNAddrIndex;
private boolean disallowFallbackToInsecureCluster;
private boolean isInsecureCluster;
private String restCsrfCustomHeader;
private Set restCsrfMethodsToIgnore;
private DFSOpsCountStatistics storageStatistics;
private KeyProvider testProvider;
private boolean isTLSKrb;
/**
* Return the protocol scheme for the FileSystem.
*
* @return webhdfs
*/
@Override
public String getScheme() {
return WebHdfsConstants.WEBHDFS_SCHEME;
}
/**
* return the underlying transport protocol (http / https).
*/
protected String getTransportScheme() {
return "http";
}
protected Text getTokenKind() {
return WebHdfsConstants.WEBHDFS_TOKEN_KIND;
}
@Override
public synchronized void initialize(URI uri, Configuration conf
) throws IOException {
super.initialize(uri, conf);
setConf(conf);
// set user and acl patterns based on configuration file
UserParam.setUserPattern(conf.get(
HdfsClientConfigKeys.DFS_WEBHDFS_USER_PATTERN_KEY,
HdfsClientConfigKeys.DFS_WEBHDFS_USER_PATTERN_DEFAULT));
AclPermissionParam.setAclPermissionPattern(conf.get(
HdfsClientConfigKeys.DFS_WEBHDFS_ACL_PERMISSION_PATTERN_KEY,
HdfsClientConfigKeys.DFS_WEBHDFS_ACL_PERMISSION_PATTERN_DEFAULT));
int connectTimeout = (int) conf.getTimeDuration(
HdfsClientConfigKeys.DFS_WEBHDFS_SOCKET_CONNECT_TIMEOUT_KEY,
URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT,
TimeUnit.MILLISECONDS);
int readTimeout = (int) conf.getTimeDuration(
HdfsClientConfigKeys.DFS_WEBHDFS_SOCKET_READ_TIMEOUT_KEY,
URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT,
TimeUnit.MILLISECONDS);
boolean isOAuth = conf.getBoolean(
HdfsClientConfigKeys.DFS_WEBHDFS_OAUTH_ENABLED_KEY,
HdfsClientConfigKeys.DFS_WEBHDFS_OAUTH_ENABLED_DEFAULT);
if(isOAuth) {
LOG.debug("Enabling OAuth2 in WebHDFS");
connectionFactory = URLConnectionFactory
.newOAuth2URLConnectionFactory(connectTimeout, readTimeout, conf);
} else {
LOG.debug("Not enabling OAuth2 in WebHDFS");
connectionFactory = URLConnectionFactory
.newDefaultURLConnectionFactory(connectTimeout, readTimeout, conf);
}
this.isTLSKrb = "HTTPS_ONLY".equals(conf.get(DFS_HTTP_POLICY_KEY));
ugi = UserGroupInformation.getCurrentUser();
this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
this.nnAddrs = resolveNNAddr();
boolean isHA = HAUtilClient.isClientFailoverConfigured(conf, this.uri);
boolean isLogicalUri = isHA && HAUtilClient.isLogicalUri(conf, this.uri);
// In non-HA or non-logical URI case, the code needs to call
// getCanonicalUri() in order to handle the case where no port is
// specified in the URI
this.tokenServiceName = isLogicalUri ?
HAUtilClient.buildTokenServiceForLogicalUri(uri, getScheme())
: SecurityUtil.buildTokenService(getCanonicalUri());
if (!isHA) {
this.retryPolicy =
RetryUtils.getDefaultRetryPolicy(
conf,
HdfsClientConfigKeys.HttpClient.RETRY_POLICY_ENABLED_KEY,
HdfsClientConfigKeys.HttpClient.RETRY_POLICY_ENABLED_DEFAULT,
HdfsClientConfigKeys.HttpClient.RETRY_POLICY_SPEC_KEY,
HdfsClientConfigKeys.HttpClient.RETRY_POLICY_SPEC_DEFAULT,
HdfsConstants.SAFEMODE_EXCEPTION_CLASS_NAME);
} else {
int maxFailoverAttempts = conf.getInt(
HdfsClientConfigKeys.HttpClient.FAILOVER_MAX_ATTEMPTS_KEY,
HdfsClientConfigKeys.HttpClient.FAILOVER_MAX_ATTEMPTS_DEFAULT);
int maxRetryAttempts = conf.getInt(
HdfsClientConfigKeys.HttpClient.RETRY_MAX_ATTEMPTS_KEY,
HdfsClientConfigKeys.HttpClient.RETRY_MAX_ATTEMPTS_DEFAULT);
int failoverSleepBaseMillis = conf.getInt(
HdfsClientConfigKeys.HttpClient.FAILOVER_SLEEPTIME_BASE_KEY,
HdfsClientConfigKeys.HttpClient.FAILOVER_SLEEPTIME_BASE_DEFAULT);
int failoverSleepMaxMillis = conf.getInt(
HdfsClientConfigKeys.HttpClient.FAILOVER_SLEEPTIME_MAX_KEY,
HdfsClientConfigKeys.HttpClient.FAILOVER_SLEEPTIME_MAX_DEFAULT);
this.retryPolicy = RetryPolicies
.failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,
maxFailoverAttempts, maxRetryAttempts, failoverSleepBaseMillis,
failoverSleepMaxMillis);
}
this.workingDir = makeQualified(new Path(getHomeDirectoryString(ugi)));
this.canRefreshDelegationToken = UserGroupInformation.isSecurityEnabled();
this.isInsecureCluster = !this.canRefreshDelegationToken;
this.disallowFallbackToInsecureCluster = !conf.getBoolean(
CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY,
CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_DEFAULT);
this.initializeRestCsrf(conf);
this.delegationToken = null;
storageStatistics = (DFSOpsCountStatistics) GlobalStorageStatistics.INSTANCE
.put(DFSOpsCountStatistics.NAME,
new StorageStatisticsProvider() {
@Override
public StorageStatistics provide() {
return new DFSOpsCountStatistics();
}
});
}
/**
* Initializes client-side handling of cross-site request forgery (CSRF)
* protection by figuring out the custom HTTP headers that need to be sent in
* requests and which HTTP methods are ignored because they do not require
* CSRF protection.
*
* @param conf configuration to read
*/
private void initializeRestCsrf(Configuration conf) {
if (conf.getBoolean(DFS_WEBHDFS_REST_CSRF_ENABLED_KEY,
DFS_WEBHDFS_REST_CSRF_ENABLED_DEFAULT)) {
this.restCsrfCustomHeader = conf.getTrimmed(
DFS_WEBHDFS_REST_CSRF_CUSTOM_HEADER_KEY,
DFS_WEBHDFS_REST_CSRF_CUSTOM_HEADER_DEFAULT);
this.restCsrfMethodsToIgnore = new HashSet<>();
this.restCsrfMethodsToIgnore.addAll(getTrimmedStringList(conf,
DFS_WEBHDFS_REST_CSRF_METHODS_TO_IGNORE_KEY,
DFS_WEBHDFS_REST_CSRF_METHODS_TO_IGNORE_DEFAULT));
} else {
this.restCsrfCustomHeader = null;
this.restCsrfMethodsToIgnore = null;
}
}
/**
* Returns a list of strings from a org.apache.hadoop.shaded.com.a-delimited configuration value.
*
* @param conf configuration to check
* @param name configuration property name
* @param defaultValue default value if no value found for name
* @return list of strings from org.apache.hadoop.shaded.com.a-delimited configuration value, or an
* empty list if not found
*/
private static List getTrimmedStringList(Configuration conf,
String name, String defaultValue) {
String valueString = conf.get(name, defaultValue);
if (valueString == null) {
return new ArrayList<>();
}
return new ArrayList<>(StringUtils.getTrimmedStringCollection(valueString));
}
@Override
public URI getCanonicalUri() {
return super.getCanonicalUri();
}
TokenSelector tokenSelector =
new AbstractDelegationTokenSelector(getTokenKind()){};
// the first getAuthParams() for a non-token op will either get the
// internal token from the ugi or lazy fetch one
protected synchronized Token> getDelegationToken() throws IOException {
if (delegationToken == null) {
Token> token = tokenSelector.selectToken(
new Text(getCanonicalServiceName()), ugi.getTokens());
// ugi tokens are usually indicative of a task which can't
// refetch tokens. even if ugi has credentials, don't attempt
// to get another token to match hdfs/rpc behavior
if (token != null) {
LOG.debug("Using UGI token: {}", token);
canRefreshDelegationToken = false;
} else {
if (canRefreshDelegationToken) {
token = getDelegationToken(null);
if (token != null) {
LOG.debug("Fetched new token: {}", token);
} else { // security is disabled
canRefreshDelegationToken = false;
isInsecureCluster = true;
}
}
}
setDelegationToken(token);
}
return delegationToken;
}
@VisibleForTesting
synchronized boolean replaceExpiredDelegationToken() throws IOException {
boolean replaced = false;
if (canRefreshDelegationToken) {
Token> token = getDelegationToken(null);
LOG.debug("Replaced expired token: {}", token);
setDelegationToken(token);
replaced = (token != null);
}
return replaced;
}
@Override
protected int getDefaultPort() {
return HdfsClientConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT;
}
@Override
public URI getUri() {
return this.uri;
}
@Override
protected URI canonicalizeUri(URI uri) {
return NetUtils.getCanonicalUri(uri, getDefaultPort());
}
/** @return the home directory */
@Deprecated
public static String getHomeDirectoryString(final UserGroupInformation ugi) {
return "/user/" + ugi.getShortUserName();
}
@Override
public Path getHomeDirectory() {
if (cachedHomeDirectory == null) {
final HttpOpParam.Op op = GetOpParam.Op.GETHOMEDIRECTORY;
try {
String pathFromDelegatedFS = new FsPathResponseRunner(op, null){
@Override
String decodeResponse(Map, ?> json) throws IOException {
return JsonUtilClient.getPath(json);
}
} .run();
cachedHomeDirectory = new Path(pathFromDelegatedFS).makeQualified(
this.getUri(), null);
} catch (IOException e) {
LOG.error("Unable to get HomeDirectory from original File System", e);
cachedHomeDirectory = new Path("/user/" + ugi.getShortUserName())
.makeQualified(this.getUri(), null);
}
}
return cachedHomeDirectory;
}
@Override
public synchronized Path getWorkingDirectory() {
return workingDir;
}
@Override
public synchronized void setWorkingDirectory(final Path dir) {
Path absolutePath = makeAbsolute(dir);
String result = absolutePath.toUri().getPath();
if (!DFSUtilClient.isValidName(result)) {
throw new IllegalArgumentException("Invalid DFS directory name " +
result);
}
workingDir = absolutePath;
}
private Path makeAbsolute(Path f) {
return f.isAbsolute()? f: new Path(workingDir, f);
}
static Map, ?> jsonParse(final HttpURLConnection c,
final boolean useErrorStream) throws IOException {
if (c.getContentLength() == 0) {
return null;
}
final InputStream in = useErrorStream ?
c.getErrorStream() : c.getInputStream();
if (in == null) {
throw new IOException("The " + (useErrorStream? "error": "input") +
" stream is null.");
}
try {
final String contentType = c.getContentType();
if (contentType != null) {
final MediaType parsed = MediaType.valueOf(contentType);
if (!MediaType.APPLICATION_JSON_TYPE.isCompatible(parsed)) {
throw new IOException("Content-Type \"" + contentType
+ "\" is incompatible with \"" + MediaType.APPLICATION_JSON
+ "\" (parsed=\"" + parsed + "\")");
}
}
return JsonSerialization.mapReader().readValue(in);
} finally {
in.close();
}
}
private static Map, ?> validateResponse(final HttpOpParam.Op op,
final HttpURLConnection conn, boolean unwrapException)
throws IOException {
final int code = conn.getResponseCode();
// server is demanding an authentication we don't support
if (code == HttpURLConnection.HTTP_UNAUTHORIZED) {
// match hdfs/rpc exception
throw new AccessControlException(conn.getResponseMessage());
}
if (code != op.getExpectedHttpResponseCode()) {
final Map, ?> m;
try {
m = jsonParse(conn, true);
} catch(Exception e) {
throw new IOException("Unexpected HTTP response: code=" + code + " != "
+ op.getExpectedHttpResponseCode() + ", " + op.toQueryString()
+ ", message=" + conn.getResponseMessage(), e);
}
if (m == null) {
throw new IOException("Unexpected HTTP response: code=" + code + " != "
+ op.getExpectedHttpResponseCode() + ", " + op.toQueryString()
+ ", message=" + conn.getResponseMessage());
} else if (m.get(RemoteException.class.getSimpleName()) == null) {
return m;
}
IOException re = JsonUtilClient.toRemoteException(m);
//check if exception is due to org.apache.hadoop.shaded.com.unication with a Standby name node
if (re.getMessage() != null && re.getMessage().endsWith(
StandbyException.class.getSimpleName())) {
LOG.trace("Detected StandbyException", re);
throw new IOException(re);
}
// extract UGI-related exceptions and unwrap InvalidToken
// the NN mangles these exceptions but the DN does not and may need
// to re-fetch a token if either report the token is expired
if (re.getMessage() != null && re.getMessage().startsWith(
SecurityUtil.FAILED_TO_GET_UGI_MSG_HEADER)) {
String[] parts = re.getMessage().split(":\\s+", 3);
re = new RemoteException(parts[1], parts[2]);
re = ((RemoteException)re).unwrapRemoteException(InvalidToken.class);
}
throw unwrapException? toIOException(re): re;
}
return null;
}
/**
* Covert an exception to an IOException.
*
* For a non-IOException, wrap it with IOException.
* For a RemoteException, unwrap it.
* For an IOException which is not a RemoteException, return it.
*/
private static IOException toIOException(Exception e) {
if (!(e instanceof IOException)) {
return new IOException(e);
}
final IOException org.apache.hadoop.shaded.io. = (IOException)e;
if (!(org.apache.hadoop.shaded.io. instanceof RemoteException)) {
return org.apache.hadoop.shaded.io.;
}
return ((RemoteException)org.apache.hadoop.shaded.io.).unwrapRemoteException();
}
private synchronized InetSocketAddress getCurrentNNAddr() {
return nnAddrs[currentNNAddrIndex];
}
/**
* Reset the appropriate state to gracefully fail over to another name node
*/
private synchronized void resetStateToFailOver() {
currentNNAddrIndex = (currentNNAddrIndex + 1) % nnAddrs.length;
}
/**
* Return a URL pointing to given path on the namenode.
*
* @param path to obtain the URL for
* @param query string to append to the path
* @return namenode URL referring to the given path
* @throws IOException on error constructing the URL
*/
private URL getNamenodeURL(String path, String query) throws IOException {
InetSocketAddress nnAddr = getCurrentNNAddr();
final URL url = new URL(getTransportScheme(), nnAddr.getHostName(),
nnAddr.getPort(), path + '?' + query);
LOG.trace("url={}", url);
return url;
}
private synchronized Param, ?>[] getAuthParameters(final HttpOpParam.Op op)
throws IOException {
List> authParams = Lists.newArrayList();
// Skip adding delegation token for token operations because these
// operations require authentication.
Token> token = null;
if (!op.getRequireAuth()) {
token = getDelegationToken();
}
if (token != null) {
authParams.add(new DelegationParam(token.encodeToUrlString()));
} else {
UserGroupInformation userUgi = ugi;
UserGroupInformation realUgi = userUgi.getRealUser();
if (realUgi != null) { // proxy user
authParams.add(new DoAsParam(userUgi.getShortUserName()));
userUgi = realUgi;
}
UserParam userParam = new UserParam((userUgi.getShortUserName()));
//in insecure, use user.name parameter, in secure, use spnego auth
if(isInsecureCluster) {
authParams.add(userParam);
}
}
return authParams.toArray(new Param,?>[0]);
}
URL toUrl(final HttpOpParam.Op op, final Path fspath,
final Param,?>... parameters) throws IOException {
//initialize URI path and query
final String path = PATH_PREFIX
+ (fspath == null? "/": makeQualified(fspath).toUri().getRawPath());
final String query = op.toQueryString()
+ Param.toSortedString("&", getAuthParameters(op))
+ Param.toSortedString("&", parameters);
final URL url = getNamenodeURL(path, query);
LOG.trace("url={}", url);
return url;
}
/**
* This class is for initialing a HTTP connection, connecting to server,
* obtaining a response, and also handling retry on failures.
*/
abstract class AbstractRunner {
abstract protected URL getUrl() throws IOException;
protected final HttpOpParam.Op op;
private final boolean redirected;
protected ExcludeDatanodesParam excludeDatanodes =
new ExcludeDatanodesParam("");
private boolean checkRetry;
private String redirectHost;
private boolean followRedirect = true;
protected AbstractRunner(final HttpOpParam.Op op, boolean redirected) {
this.op = op;
this.redirected = redirected;
}
protected AbstractRunner(final HttpOpParam.Op op, boolean redirected,
boolean followRedirect) {
this(op, redirected);
this.followRedirect = followRedirect;
}
T run() throws IOException {
UserGroupInformation connectUgi = ugi.getRealUser();
if (connectUgi == null) {
connectUgi = ugi;
}
if (op.getRequireAuth()) {
connectUgi.checkTGTAndReloginFromKeytab();
}
try {
// the entire lifecycle of the connection must be run inside the
// doAs to ensure authentication is performed correctly
return connectUgi.doAs(
new PrivilegedExceptionAction() {
@Override
public T run() throws IOException {
return runWithRetry();
}
});
} catch (InterruptedException e) {
throw new IOException(e);
}
}
/**
* Two-step requests redirected to a DN
*
* Create/Append:
* Step 1) Submit a Http request with neither auto-redirect nor data.
* Step 2) Submit another Http request with the URL from the Location header
* with data.
*
* The reason of having two-step create/append is for preventing clients to
* send out the data before the redirect. This issue is addressed by the
* "Expect: 100-continue" header in HTTP/1.1; see RFC 2616, Section 8.2.3.
* Unfortunately, there are software library bugs (e.g. Jetty 6 http server
* and Java 6 http client), which do not correctly implement "Expect:
* 100-continue". The two-step create/append is a temporary workaround for
* the software library bugs.
*
* Open/Checksum
* Also implements two-step connects for other operations redirected to
* a DN such as open and checksum
*/
protected HttpURLConnection connect(URL url) throws IOException {
//redirect hostname and port
redirectHost = null;
if (url.getProtocol().equals("http") &&
UserGroupInformation.isSecurityEnabled() &&
isTLSKrb) {
throw new IOException("Access denied: dfs.http.policy is HTTPS_ONLY.");
}
// resolve redirects for a DN operation unless already resolved
if (op.getRedirect() && !redirected) {
final HttpOpParam.Op redirectOp =
HttpOpParam.TemporaryRedirectOp.valueOf(op);
final HttpURLConnection conn = connect(redirectOp, url);
// application level proxy like httpfs might not issue a redirect
if (conn.getResponseCode() == op.getExpectedHttpResponseCode()) {
return conn;
}
try {
validateResponse(redirectOp, conn, false);
url = new URL(conn.getHeaderField("Location"));
redirectHost = url.getHost() + ":" + url.getPort();
} finally {
// TODO: consider not calling conn.disconnect() to allow connection reuse
// See http://tinyurl.org.apache.hadoop.shaded.com.java7-http-keepalive
conn.disconnect();
}
if (!followRedirect) {
return conn;
}
}
try {
final HttpURLConnection conn = connect(op, url);
// output streams will validate on close
if (!op.getDoOutput()) {
validateResponse(op, conn, false);
}
return conn;
} catch (IOException org.apache.hadoop.shaded.io.) {
if (redirectHost != null) {
if (excludeDatanodes.getValue() != null) {
excludeDatanodes = new ExcludeDatanodesParam(redirectHost + ","
+ excludeDatanodes.getValue());
} else {
excludeDatanodes = new ExcludeDatanodesParam(redirectHost);
}
}
throw org.apache.hadoop.shaded.io.;
}
}
private HttpURLConnection connect(final HttpOpParam.Op op, final URL url)
throws IOException {
final HttpURLConnection conn =
(HttpURLConnection)connectionFactory.openConnection(url);
final boolean doOutput = op.getDoOutput();
conn.setRequestMethod(op.getType().toString());
conn.setInstanceFollowRedirects(false);
if (restCsrfCustomHeader != null &&
!restCsrfMethodsToIgnore.contains(op.getType().name())) {
// The value of the header is unimportant. Only its presence matters.
conn.setRequestProperty(restCsrfCustomHeader, "\"\"");
}
conn.setRequestProperty(EZ_HEADER, "true");
switch (op.getType()) {
// if not sending a message body for a POST or PUT operation, need
// to ensure the server/proxy knows this
case POST:
case PUT: {
conn.setDoOutput(true);
if (!doOutput) {
// explicitly setting content-length to 0 won't do spnego!!
// opening and closing the stream will send "Content-Length: 0"
conn.getOutputStream().close();
} else {
conn.setRequestProperty("Content-Type",
MediaType.APPLICATION_OCTET_STREAM);
conn.setChunkedStreamingMode(32 << 10); //32kB-chunk
}
break;
}
default:
conn.setDoOutput(doOutput);
break;
}
conn.connect();
return conn;
}
private T runWithRetry() throws IOException {
/**
* Do the real work.
*
* There are three cases that the code inside the loop can throw an
* IOException:
*
*
* - The connection has failed (e.g., ConnectException,
* @see FailoverOnNetworkExceptionRetry for more details)
* - The namenode enters the standby state (i.e., StandbyException).
* - The server returns errors for the org.apache.hadoop.shaded.com.and (i.e., RemoteException)
*
*
* The call to shouldRetry() will conduct the retry policy. The policy
* examines the exception and swallows it if it decides to rerun the work.
*/
for(int retry = 0; ; retry++) {
checkRetry = !redirected;
final URL url = getUrl();
try {
final HttpURLConnection conn = connect(url);
return getResponse(conn);
} catch (AccessControlException ace) {
// no retries for auth failures
throw ace;
} catch (InvalidToken it) {
// try to replace the expired token with a new one. the attempt
// to acquire a new token must be outside this operation's retry
// so if it fails after its own retries, this operation fails too.
if (op.getRequireAuth() || !replaceExpiredDelegationToken()) {
throw it;
}
} catch (IOException org.apache.hadoop.shaded.io.) {
// Attempt to include the redirected node in the exception. If the
// attempt to recreate the exception fails, just use the original.
String node = redirectHost;
if (node == null) {
node = url.getAuthority();
}
try {
IOException newIoe = org.apache.hadoop.shaded.io..getClass().getConstructor(String.class)
.newInstance(node + ": " + org.apache.hadoop.shaded.io..getMessage());
newIoe.initCause(org.apache.hadoop.shaded.io..getCause());
newIoe.setStackTrace(org.apache.hadoop.shaded.io..getStackTrace());
org.apache.hadoop.shaded.io. = newIoe;
} catch (NoSuchMethodException | SecurityException
| InstantiationException | IllegalAccessException
| IllegalArgumentException | InvocationTargetException e) {
}
shouldRetry(org.apache.hadoop.shaded.io., retry);
}
}
}
private void shouldRetry(final IOException org.apache.hadoop.shaded.io., final int retry
) throws IOException {
InetSocketAddress nnAddr = getCurrentNNAddr();
if (checkRetry) {
try {
final RetryPolicy.RetryAction a = retryPolicy.shouldRetry(
org.apache.hadoop.shaded.io., retry, 0, true);
boolean isRetry =
a.action == RetryPolicy.RetryAction.RetryDecision.RETRY;
boolean isFailoverAndRetry =
a.action == RetryPolicy.RetryAction.RetryDecision.FAILOVER_AND_RETRY;
if (isRetry || isFailoverAndRetry) {
LOG.info("Retrying connect to namenode: {}. Already retried {}"
+ " time(s); retry policy is {}, delay {}ms.",
nnAddr, retry, retryPolicy, a.delayMillis);
if (isFailoverAndRetry) {
resetStateToFailOver();
}
Thread.sleep(a.delayMillis);
return;
}
} catch(Exception e) {
LOG.warn("Original exception is ", org.apache.hadoop.shaded.io.);
throw toIOException(e);
}
}
throw toIOException(org.apache.hadoop.shaded.io.);
}
abstract T getResponse(HttpURLConnection conn) throws IOException;
}
/**
* Abstract base class to handle path-based operations with params
*/
abstract class AbstractFsPathRunner extends AbstractRunner {
private final Path fspath;
private Param,?>[] parameters;
AbstractFsPathRunner(final HttpOpParam.Op op, final Path fspath,
Param,?>... parameters) {
super(op, false);
this.fspath = fspath;
this.parameters = parameters;
}
AbstractFsPathRunner(final HttpOpParam.Op op, Param,?>[] parameters,
final Path fspath) {
super(op, false);
this.fspath = fspath;
this.parameters = parameters;
}
protected void updateURLParameters(Param, ?>... p) {
this.parameters = p;
}
@Override
protected URL getUrl() throws IOException {
if (excludeDatanodes.getValue() != null) {
Param, ?>[] tmpParam = new Param, ?>[parameters.length + 1];
System.arraycopy(parameters, 0, tmpParam, 0, parameters.length);
tmpParam[parameters.length] = excludeDatanodes;
return toUrl(op, fspath, tmpParam);
} else {
return toUrl(op, fspath, parameters);
}
}
Path getFspath() {
return fspath;
}
}
/**
* Default path-based implementation expects no json response
*/
class FsPathRunner extends AbstractFsPathRunner {
FsPathRunner(Op op, Path fspath, Param,?>... parameters) {
super(op, fspath, parameters);
}
@Override
Void getResponse(HttpURLConnection conn) throws IOException {
return null;
}
}
/**
* Handle path-based operations with a json response
*/
abstract class FsPathResponseRunner extends AbstractFsPathRunner {
FsPathResponseRunner(final HttpOpParam.Op op, final Path fspath,
Param,?>... parameters) {
super(op, fspath, parameters);
}
FsPathResponseRunner(final HttpOpParam.Op op, Param,?>[] parameters,
final Path fspath) {
super(op, parameters, fspath);
}
@Override
final T getResponse(HttpURLConnection conn) throws IOException {
try {
final Map,?> json = jsonParse(conn, false);
if (json == null) {
// match exception class thrown by parser
throw new IllegalStateException("Missing response");
}
return decodeResponse(json);
} catch (IOException org.apache.hadoop.shaded.io.) {
throw org.apache.hadoop.shaded.io.;
} catch (Exception e) { // catch json parser errors
final IOException org.apache.hadoop.shaded.io. =
new IOException("Response decoding failure: "+e.toString(), e);
LOG.debug("Response decoding failure.", e);
throw org.apache.hadoop.shaded.io.;
} finally {
// Don't call conn.disconnect() to allow connection reuse
// See http://tinyurl.org.apache.hadoop.shaded.com.java7-http-keepalive
conn.getInputStream().close();
}
}
abstract T decodeResponse(Map,?> json) throws IOException;
}
/**
* Handle path-based operations with json boolean response
*/
class FsPathBooleanRunner extends FsPathResponseRunner {
FsPathBooleanRunner(Op op, Path fspath, Param,?>... parameters) {
super(op, fspath, parameters);
}
@Override
Boolean decodeResponse(Map,?> json) throws IOException {
return (Boolean)json.get("boolean");
}
}
/**
* Handle create/append output streams
*/
class FsPathOutputStreamRunner
extends AbstractFsPathRunner {
private final int bufferSize;
FsPathOutputStreamRunner(Op op, Path fspath, int bufferSize,
Param,?>... parameters) {
super(op, fspath, parameters);
this.bufferSize = bufferSize;
}
@Override
FSDataOutputStream getResponse(final HttpURLConnection conn)
throws IOException {
return new FSDataOutputStream(new BufferedOutputStream(
conn.getOutputStream(), bufferSize), statistics) {
@Override
public void write(int b) throws IOException {
try {
super.write(b);
} catch (IOException e) {
LOG.warn("Write to output stream for file '{}' failed. "
+ "Attempting to fetch the cause from the connection.",
getFspath(), e);
validateResponse(op, conn, true);
throw e;
}
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
try {
super.write(b, off, len);
} catch (IOException e) {
LOG.warn("Write to output stream for file '{}' failed. "
+ "Attempting to fetch the cause from the connection.",
getFspath(), e);
validateResponse(op, conn, true);
throw e;
}
}
@Override
public void close() throws IOException {
try {
super.close();
} finally {
try {
validateResponse(op, conn, true);
} finally {
// This is a connection to DataNode. Let's disconnect since
// there is little chance that the connection will be reused
// any time soonl
conn.disconnect();
}
}
}
};
}
}
class FsPathConnectionRunner extends AbstractFsPathRunner {
FsPathConnectionRunner(Op op, Path fspath, Param,?>... parameters) {
super(op, fspath, parameters);
}
@Override
HttpURLConnection getResponse(final HttpURLConnection conn)
throws IOException {
return conn;
}
}
/**
* Used by open() which tracks the resolved url itself
*/
class URLRunner extends AbstractRunner {
private final URL url;
@Override
protected URL getUrl() throws IOException {
return url;
}
protected URLRunner(final HttpOpParam.Op op, final URL url,
boolean redirected, boolean followRedirect) {
super(op, redirected, followRedirect);
this.url = url;
}
@Override
HttpURLConnection getResponse(HttpURLConnection conn) throws IOException {
return conn;
}
}
private FsPermission applyUMask(FsPermission permission) {
if (permission == null) {
permission = FsPermission.getDefault();
}
return FsCreateModes.applyUMask(permission,
FsPermission.getUMask(getConf()));
}
private HdfsFileStatus getHdfsFileStatus(Path f) throws IOException {
final HttpOpParam.Op op = GetOpParam.Op.GETFILESTATUS;
HdfsFileStatus status = new FsPathResponseRunner(op, f) {
@Override
HdfsFileStatus decodeResponse(Map,?> json) {
return JsonUtilClient.toFileStatus(json, true);
}
}.run();
if (status == null) {
throw new FileNotFoundException("File does not exist: " + f);
}
return status;
}
@Override
public FileStatus getFileStatus(Path f) throws IOException {
statistics.incrementReadOps(1);
storageStatistics.incrementOpCounter(OpType.GET_FILE_STATUS);
return getHdfsFileStatus(f).makeQualified(getUri(), f);
}
@Override
public AclStatus getAclStatus(Path f) throws IOException {
final HttpOpParam.Op op = GetOpParam.Op.GETACLSTATUS;
AclStatus status = new FsPathResponseRunner(op, f) {
@Override
AclStatus decodeResponse(Map,?> json) {
return JsonUtilClient.toAclStatus(json);
}
}.run();
if (status == null) {
throw new FileNotFoundException("File does not exist: " + f);
}
return status;
}
@Override
public boolean mkdirs(Path f, FsPermission permission) throws IOException {
statistics.incrementWriteOps(1);
storageStatistics.incrementOpCounter(OpType.MKDIRS);
final HttpOpParam.Op op = PutOpParam.Op.MKDIRS;
final FsPermission modes = applyUMask(permission);
return new FsPathBooleanRunner(op, f,
new PermissionParam(modes.getMasked()),
new UnmaskedPermissionParam(modes.getUnmasked())
).run();
}
@Override
public boolean supportsSymlinks() {
return true;
}
/**
* Create a symlink pointing to the destination path.
*/
public void createSymlink(Path destination, Path f, boolean createParent
) throws IOException {
statistics.incrementWriteOps(1);
storageStatistics.incrementOpCounter(OpType.CREATE_SYM_LINK);
final HttpOpParam.Op op = PutOpParam.Op.CREATESYMLINK;
new FsPathRunner(op, f,
new DestinationParam(makeQualified(destination).toUri().getPath()),
new CreateParentParam(createParent)
).run();
}
@Override
public boolean rename(final Path src, final Path dst) throws IOException {
statistics.incrementWriteOps(1);
storageStatistics.incrementOpCounter(OpType.RENAME);
final HttpOpParam.Op op = PutOpParam.Op.RENAME;
return new FsPathBooleanRunner(op, src,
new DestinationParam(makeQualified(dst).toUri().getPath())
).run();
}
@SuppressWarnings("deprecation")
@Override
public void rename(final Path src, final Path dst,
final Options.Rename... options) throws IOException {
statistics.incrementWriteOps(1);
storageStatistics.incrementOpCounter(OpType.RENAME);
final HttpOpParam.Op op = PutOpParam.Op.RENAME;
new FsPathRunner(op, src,
new DestinationParam(makeQualified(dst).toUri().getPath()),
new RenameOptionSetParam(options)
).run();
}
@Override
public void setXAttr(Path p, String name, byte[] value,
EnumSet flag) throws IOException {
statistics.incrementWriteOps(1);
storageStatistics.incrementOpCounter(OpType.SET_XATTR);
final HttpOpParam.Op op = PutOpParam.Op.SETXATTR;
if (value != null) {
new FsPathRunner(op, p, new XAttrNameParam(name), new XAttrValueParam(
XAttrCodec.encodeValue(value, XAttrCodec.HEX)),
new XAttrSetFlagParam(flag)).run();
} else {
new FsPathRunner(op, p, new XAttrNameParam(name),
new XAttrSetFlagParam(flag)).run();
}
}
@Override
public byte[] getXAttr(Path p, final String name) throws IOException {
statistics.incrementReadOps(1);
storageStatistics.incrementOpCounter(OpType.GET_XATTR);
final HttpOpParam.Op op = GetOpParam.Op.GETXATTRS;
return new FsPathResponseRunner(op, p, new XAttrNameParam(name),
new XAttrEncodingParam(XAttrCodec.HEX)) {
@Override
byte[] decodeResponse(Map, ?> json) throws IOException {
return JsonUtilClient.getXAttr(json);
}
}.run();
}
@Override
public Map getXAttrs(Path p) throws IOException {
final HttpOpParam.Op op = GetOpParam.Op.GETXATTRS;
return new FsPathResponseRunner
© 2015 - 2025 Weber Informatics LLC | Privacy Policy