Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Random;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.zip.CRC32;
import javax.net.SocketFactory;
import javax.security.auth.login.LoginException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.BlockMissingException;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSInputChecker;
import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.FSOutputSummer;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.OpenFileInfo;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.Syncable;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DistributedFileSystem.DiskStatus;
import org.apache.hadoop.hdfs.metrics.DFSClientMetrics;
import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
import org.apache.hadoop.hdfs.protocol.DataTransferProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.DirectoryListing;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlockWithFileName;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.protocol.LocatedBlockWithMetaInfo;
import org.apache.hadoop.hdfs.protocol.LocatedBlocksWithMetaInfo;
import org.apache.hadoop.hdfs.protocol.LocatedDirectoryListing;
import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException;
import org.apache.hadoop.hdfs.protocol.ReadBlockHeader;
import org.apache.hadoop.hdfs.protocol.WriteBlockHeader;
import org.apache.hadoop.hdfs.protocol.ProtocolCompatible;
import org.apache.hadoop.hdfs.protocol.VersionedLocatedBlock;
import org.apache.hadoop.hdfs.protocol.VersionedLocatedBlocks;
import org.apache.hadoop.hdfs.server.common.HdfsConstants;
import org.apache.hadoop.hdfs.server.common.UpgradeStatusReport;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryProxy;
import org.apache.hadoop.ipc.Client;
import org.apache.hadoop.ipc.ProtocolProxy;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.net.DNSToSwitchMapping;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.net.ScriptBasedMapping;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UnixUserGroupInformation;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.PureJavaCrc32;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
/********************************************************
* DFSClient can connect to a Hadoop Filesystem and
* perform basic file tasks. It uses the ClientProtocol
* to communicate with a NameNode daemon, and connects
* directly to DataNodes to read/write block data.
*
* Hadoop DFS users should obtain an instance of
* DistributedFileSystem, which uses DFSClient to handle
* filesystem tasks.
*
********************************************************/
public class DFSClient implements FSConstants, java.io.Closeable {
public static final Log LOG = LogFactory.getLog(DFSClient.class);
public static final int MAX_BLOCK_ACQUIRE_FAILURES = 3;
private static final int TCP_WINDOW_SIZE = 128 * 1024; // 128 KB
private static final long NUM_BYTES_CHECK_READ_SPEED = 128 * 1024;
private static byte[] emptyByteArray = new byte[0];
public ClientProtocol namenode;
private ClientProtocol rpcNamenode;
// Namenode proxy that supports method-based compatibility
public ProtocolProxy namenodeProtocolProxy = null;
public Object namenodeProxySyncObj = new Object();
final UnixUserGroupInformation ugi;
volatile boolean clientRunning = true;
static Random r = new Random();
final String clientName;
final LeaseChecker leasechecker;
private Configuration conf;
private long defaultBlockSize;
private short defaultReplication;
private SocketFactory socketFactory;
private int socketTimeout;
private int socketReadExtentionTimeout;
private int datanodeWriteTimeout;
private int datanodeWriteExtentionTimeout;
private int timeoutValue; // read timeout for the socket
final int writePacketSize;
final long minReadSpeedBps;
private final FileSystem.Statistics stats;
private int maxBlockAcquireFailures;
private final int hdfsTimeout; // timeout value for a DFS operation.
// The amount of time to wait before aborting a close file.
private final long closeFileTimeout;
private long namenodeVersion = ClientProtocol.versionID;
private DFSClientMetrics metrics = new DFSClientMetrics();
protected Integer dataTransferVersion = -1;
private boolean shortCircuitLocalReads = false;
private final InetAddress localHost;
private InetSocketAddress nameNodeAddr;
private DatanodeInfo pseuDatanodeInfoForLocalhost;
private String localhostNetworkLocation = null;
DNSToSwitchMapping dnsToSwitchMapping = null;
private int ipTosValue = NetUtils.NOT_SET_IP_TOS;
/**
* This variable tracks the number of failures for each thread of
* dfs input stream since the start of the most recent user-facing operation.
* That is to say, it should be reset
* whenever the user makes a call on this stream, and if at any point
* during the retry logic, the failure count exceeds a threshold,
* the errors will be thrown back to the operation.
*
* Specifically this counts the number of times the client has gone
* back to the namenode to get a new list of block locations, and is
* capped at maxBlockAcquireFailures
*
*/
private static ThreadLocal dfsInputStreamfailures =
new ThreadLocal();
/**
* The locking hierarchy is to first acquire lock on DFSClient object, followed by
* lock on leasechecker, followed by lock on an individual DFSOutputStream.
*/
public static ClientProtocol createNamenode(Configuration conf) throws IOException {
return createNamenode(NameNode.getAddress(conf), conf);
}
public static ClientProtocol createNamenode( InetSocketAddress nameNodeAddr,
Configuration conf) throws IOException {
try {
return createNamenode(createRPCNamenode(nameNodeAddr, conf,
UnixUserGroupInformation.login(conf, true)).getProxy());
} catch (LoginException e) {
throw (IOException)(new IOException().initCause(e));
}
}
/**
* Create a NameNode proxy for the client if the client and NameNode
* are compatible
*
* @param nameNodeAddr NameNode address
* @param conf configuration
* @param ugi ticket
* @return a NameNode proxy that's compatible with the client
*/
private void createRPCNamenodeIfCompatible(
InetSocketAddress nameNodeAddr,
Configuration conf,
UnixUserGroupInformation ugi) throws IOException {
try {
this.namenodeProtocolProxy = createRPCNamenode(nameNodeAddr, conf, ugi);
this.rpcNamenode = namenodeProtocolProxy.getProxy();
} catch (RPC.VersionMismatch e) {
long clientVersion = e.getClientVersion();
namenodeVersion = e.getServerVersion();
if (clientVersion > namenodeVersion &&
!ProtocolCompatible.isCompatibleClientProtocol(
clientVersion, namenodeVersion)) {
throw new RPC.VersionIncompatible(
ClientProtocol.class.getName(), clientVersion, namenodeVersion);
}
this.rpcNamenode = (ClientProtocol)e.getProxy();
}
}
public static ProtocolProxy createRPCNamenode(
Configuration conf) throws IOException {
try {
return createRPCNamenode(NameNode.getAddress(conf), conf,
UnixUserGroupInformation.login(conf, true));
} catch (LoginException e) {
throw new IOException(e);
}
}
public static ProtocolProxy createRPCNamenode(InetSocketAddress nameNodeAddr,
Configuration conf, UnixUserGroupInformation ugi)
throws IOException {
return RPC.getProtocolProxy(ClientProtocol.class,
ClientProtocol.versionID, nameNodeAddr, ugi, conf,
NetUtils.getSocketFactory(conf, ClientProtocol.class));
}
private static ClientProtocol createNamenode(ClientProtocol rpcNamenode)
throws IOException {
RetryPolicy createPolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(
5, LEASE_SOFTLIMIT_PERIOD, TimeUnit.MILLISECONDS);
Map,RetryPolicy> remoteExceptionToPolicyMap =
new HashMap, RetryPolicy>();
remoteExceptionToPolicyMap.put(AlreadyBeingCreatedException.class, createPolicy);
Map,RetryPolicy> exceptionToPolicyMap =
new HashMap, RetryPolicy>();
exceptionToPolicyMap.put(RemoteException.class,
RetryPolicies.retryByRemoteException(
RetryPolicies.TRY_ONCE_THEN_FAIL, remoteExceptionToPolicyMap));
RetryPolicy methodPolicy = RetryPolicies.retryByException(
RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
Map methodNameToPolicyMap = new HashMap();
methodNameToPolicyMap.put("create", methodPolicy);
return (ClientProtocol) RetryProxy.create(ClientProtocol.class,
rpcNamenode, methodNameToPolicyMap);
}
public static ClientDatanodeProtocol createClientDatanodeProtocolProxy(
DatanodeID datanodeid, Configuration conf, int socketTimeout)
throws IOException {
return createClientDNProtocolProxy(datanodeid, conf, socketTimeout).getProxy();
}
static ProtocolProxy createClientDNProtocolProxy (
DatanodeID datanodeid, Configuration conf, int socketTimeout)
throws IOException {
InetSocketAddress addr = NetUtils.createSocketAddr(
datanodeid.getHost() + ":" + datanodeid.getIpcPort());
if (ClientDatanodeProtocol.LOG.isDebugEnabled()) {
ClientDatanodeProtocol.LOG.info("ClientDatanodeProtocol addr=" + addr);
}
UserGroupInformation ugi;
try {
ugi = UserGroupInformation.login(conf);
} catch (LoginException le) {
throw new RuntimeException("Couldn't login!");
}
return RPC.getProtocolProxy(ClientDatanodeProtocol.class,
ClientDatanodeProtocol.versionID, addr, ugi, conf,
NetUtils.getDefaultSocketFactory(conf), socketTimeout);
}
/**
* Same as this(NameNode.getAddress(conf), conf);
* @see #DFSClient(InetSocketAddress, Configuration)
*/
public DFSClient(Configuration conf) throws IOException {
this(NameNode.getAddress(conf), conf);
}
/**
* Same as this(nameNodeAddr, conf, null);
* @see #DFSClient(InetSocketAddress, Configuration, org.apache.hadoop.fs.FileSystem.Statistics)
*/
public DFSClient(InetSocketAddress nameNodeAddr, Configuration conf
) throws IOException {
this(nameNodeAddr, conf, null);
}
/**
* Same as this(nameNodeAddr, null, conf, stats);
* @see #DFSClient(InetSocketAddress, ClientProtocol, Configuration, org.apache.hadoop.fs.FileSystem.Statistics)
*/
public DFSClient(InetSocketAddress nameNodeAddr, Configuration conf,
FileSystem.Statistics stats)
throws IOException {
this(nameNodeAddr, null, conf, stats);
}
/**
* Create a new DFSClient connected to the given nameNodeAddr or rpcNamenode.
* Exactly one of nameNodeAddr or rpcNamenode must be null.
*/
DFSClient(InetSocketAddress nameNodeAddr, ClientProtocol rpcNamenode,
Configuration conf, FileSystem.Statistics stats)
throws IOException {
this.conf = conf;
this.stats = stats;
this.socketTimeout = conf.getInt("dfs.socket.timeout",
HdfsConstants.READ_TIMEOUT);
this.socketReadExtentionTimeout = conf.getInt(
HdfsConstants.DFS_DATANODE_READ_EXTENSION,
HdfsConstants.READ_TIMEOUT_EXTENSION);
this.timeoutValue = this.socketTimeout;
this.datanodeWriteTimeout = conf.getInt("dfs.datanode.socket.write.timeout",
HdfsConstants.WRITE_TIMEOUT);
this.datanodeWriteExtentionTimeout = conf.getInt(
HdfsConstants.DFS_DATANODE_WRITE_EXTENTSION,
HdfsConstants.WRITE_TIMEOUT_EXTENSION);
this.socketFactory = NetUtils.getSocketFactory(conf, ClientProtocol.class);
// dfs.write.packet.size is an internal config variable
this.writePacketSize = conf.getInt("dfs.write.packet.size", 64*1024);
this.minReadSpeedBps = conf.getLong("dfs.min.read.speed.bps", -1);
this.maxBlockAcquireFailures = getMaxBlockAcquireFailures(conf);
this.localHost = InetAddress.getLocalHost();
// fetch network location of localhost
this.pseuDatanodeInfoForLocalhost = new DatanodeInfo(new DatanodeID(
this.localHost.getHostAddress()));
this.dnsToSwitchMapping = ReflectionUtils.newInstance(
conf.getClass("topology.node.switch.mapping.impl", ScriptBasedMapping.class,
DNSToSwitchMapping.class), conf);
ArrayList tempList = new ArrayList();
tempList.add(this.localHost.getHostName());
List retList = dnsToSwitchMapping.resolve(tempList);
if (retList != null && retList.size() > 0) {
localhostNetworkLocation = retList.get(0);
this.pseuDatanodeInfoForLocalhost.setNetworkLocation(localhostNetworkLocation);
}
// The hdfsTimeout is currently the same as the ipc timeout
this.hdfsTimeout = Client.getTimeout(conf);
this.closeFileTimeout = conf.getLong("dfs.client.closefile.timeout", this.hdfsTimeout);
try {
this.ugi = UnixUserGroupInformation.login(conf, true);
} catch (LoginException e) {
throw (IOException)(new IOException().initCause(e));
}
String taskId = conf.get("mapred.task.id");
if (taskId != null) {
this.clientName = "DFSClient_" + taskId + "_" + r.nextInt()
+ "_" + Thread.currentThread().getId();
} else {
this.clientName = "DFSClient_" + r.nextInt();
}
defaultBlockSize = conf.getLong("dfs.block.size", DEFAULT_BLOCK_SIZE);
defaultReplication = (short) conf.getInt("dfs.replication", 3);
if (nameNodeAddr != null && rpcNamenode == null) {
this.nameNodeAddr = nameNodeAddr;
getNameNode();
} else if (nameNodeAddr == null && rpcNamenode != null) {
//This case is used for testing.
if (rpcNamenode instanceof NameNode) {
this.namenodeProtocolProxy = createRPCNamenode(((NameNode)rpcNamenode).getNameNodeAddress(), conf, ugi);
}
this.namenode = this.rpcNamenode = rpcNamenode;
} else {
throw new IllegalArgumentException(
"Expecting exactly one of nameNodeAddr and rpcNamenode being null: "
+ "nameNodeAddr=" + nameNodeAddr + ", rpcNamenode=" + rpcNamenode);
}
// read directly from the block file if configured.
this.shortCircuitLocalReads = conf.getBoolean("dfs.read.shortcircuit", false);
if (this.shortCircuitLocalReads) {
LOG.debug("Configured to shortcircuit reads to " + localHost);
}
this.leasechecker = new LeaseChecker(this.clientName, this.conf);
// by default, if the ipTosValue is less than 0(for example -1),
// we will not set it in the socket.
this.ipTosValue = conf.getInt("dfs.client.tos.value",
NetUtils.NOT_SET_IP_TOS);
if (this.ipTosValue > NetUtils.IP_TOS_MAX_VALUE) {
LOG.warn("dfs.client.tos.value " + ipTosValue +
" exceeds the max allowed value " + NetUtils.IP_TOS_MAX_VALUE +
", will not take affect");
this.ipTosValue = NetUtils.NOT_SET_IP_TOS;
}
}
private void getNameNode() throws IOException {
if (nameNodeAddr != null) {
// The lock is to make sure namenode, namenodeProtocolProxy
// and rpcNamenode are consistent ultimately. There is still
// a small window where another thread can see inconsistent
// version of namenodeProtocolProxy and namenode. But it will
// only happen during the transit time when name-node upgrade
// and the exception will likely to be resolved after a retry.
//
synchronized (namenodeProxySyncObj) {
createRPCNamenodeIfCompatible(nameNodeAddr, conf, ugi);
this.namenode = createNamenode(this.rpcNamenode);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Name node signature is refreshed. Fingerprint: "
+ namenodeProtocolProxy.getMethodsFingerprint());
}
}
public String getClientName() {
return clientName;
}
public void getNewNameNodeIfNeeded(int serverMethodFingerprint)
throws IOException {
if (serverMethodFingerprint != namenodeProtocolProxy
.getMethodsFingerprint()) {
LOG.info(String.format(
"Different Namenode methods' fingerprint: client %s server %s ",
namenodeProtocolProxy.getMethodsFingerprint(),
serverMethodFingerprint));
getNameNode();
LOG.info("Namenode methods updated. New fingerprint: "
+ namenodeProtocolProxy.getMethodsFingerprint());
}
}
static int getMaxBlockAcquireFailures(Configuration conf) {
return conf.getInt("dfs.client.max.block.acquire.failures",
MAX_BLOCK_ACQUIRE_FAILURES);
}
public boolean isOpen() {
return clientRunning;
}
private void checkOpen() throws IOException {
if (!clientRunning) {
IOException result = new IOException("Filesystem closed");
throw result;
}
}
/**
* Close the file system, abandoning all of the leases and files being
* created and close connections to the namenode.
*/
public synchronized void close() throws IOException {
if(clientRunning) {
leasechecker.close();
leasechecker.closeRenewal();
clientRunning = false;
try {
leasechecker.interruptAndJoin();
} catch (InterruptedException ie) {
}
// close connections to the namenode
RPC.stopProxy(rpcNamenode);
}
}
/**
* Get DFSClientMetrics
*/
public DFSClientMetrics getDFSClientMetrics(){
return metrics;
}
/**
* Get the default block size for this cluster
* @return the default block size in bytes
*/
public long getDefaultBlockSize() {
return defaultBlockSize;
}
public long getBlockSize(String f) throws IOException {
try {
return namenode.getPreferredBlockSize(f);
} catch (IOException ie) {
LOG.warn("Problem getting block size: " +
StringUtils.stringifyException(ie));
throw ie;
}
}
/**
* Report corrupt blocks that were discovered by the client.
*/
public void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
namenode.reportBadBlocks(blocks);
}
public short getDefaultReplication() {
return defaultReplication;
}
/**
* @deprecated Use getBlockLocations instead
*
* Get hints about the location of the indicated block(s).
*
* getHints() returns a list of hostnames that store data for
* a specific file region. It returns a set of hostnames for
* every block within the indicated region.
*
* This function is very useful when writing code that considers
* data-placement when performing operations. For example, the
* MapReduce system tries to schedule tasks on the same machines
* as the data-block the task processes.
*/
@Deprecated
public String[][] getHints(String src, long start, long length)
throws IOException {
BlockLocation[] blkLocations = getBlockLocations(src, start, length);
if ((blkLocations == null) || (blkLocations.length == 0)) {
return new String[0][];
}
int blkCount = blkLocations.length;
String[][]hints = new String[blkCount][];
for (int i=0; i < blkCount ; i++) {
String[] hosts = blkLocations[i].getHosts();
hints[i] = new String[hosts.length];
hints[i] = hosts;
}
return hints;
}
public static boolean isMetaInfoSuppoted(ProtocolProxy proxy)
throws IOException {
return proxy != null && proxy.isMethodSupported(
"openAndFetchMetaInfo", String.class, long.class, long.class);
}
private static LocatedBlocks callGetBlockLocations(
ClientProtocol namenode,
String src, long start, long length, boolean supportMetaInfo) throws IOException {
try {
if (supportMetaInfo) {
return namenode.openAndFetchMetaInfo(src, start, length);
}
return namenode.getBlockLocations(src, start, length);
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class,
FileNotFoundException.class);
}
}
/**
* Get block location info about file
*
* getBlockLocations() returns a list of hostnames that store
* data for a specific file region. It returns a set of hostnames
* for every block within the indicated region.
*
* This function is very useful when writing code that considers
* data-placement when performing operations. For example, the
* MapReduce system tries to schedule tasks on the same machines
* as the data-block the task processes.
*/
public BlockLocation[] getBlockLocations(String src, long start,
long length) throws IOException {
LocatedBlocks blocks = callGetBlockLocations(namenode, src, start, length,
isMetaInfoSuppoted(namenodeProtocolProxy));
return DFSUtil.locatedBlocks2Locations(blocks);
}
public LocatedBlocks getLocatedBlocks(String src, long start,
long length) throws IOException {
return callGetBlockLocations(namenode, src, start, length,
isMetaInfoSuppoted(namenodeProtocolProxy));
}
public DFSInputStream open(String src) throws IOException {
return open(src, conf.getInt("io.file.buffer.size", 4096), true, null, false);
}
/*
* This method is only used by SnapshotClient
*/
DFSInputStream open(LocatedBlocksWithMetaInfo blocks) throws IOException {
checkOpen();
incFileReadToStats();
return new DFSInputStream(blocks, conf.getInt("io.file.buffer.size", 4096),
true);
}
/**
* Create an input stream that obtains a nodelist from the
* namenode, and then reads from all the right places. Creates
* inner subclass of InputStream that does the right out-of-band
* work.
*/
DFSInputStream open(String src, int buffersize, boolean verifyChecksum,
FileSystem.Statistics stats, boolean clearOsBuffer
) throws IOException {
checkOpen();
incFileReadToStats();
// Get block info from namenode
return new DFSInputStream(src, buffersize, verifyChecksum, clearOsBuffer);
}
/**
* Create a new dfs file and return an output stream for writing into it.
*
* @param src stream name
* @param overwrite do not check for file existence if true
* @return output stream
* @throws IOException
*/
public OutputStream create(String src,
boolean overwrite
) throws IOException {
return create(src, overwrite, defaultReplication, defaultBlockSize, null);
}
/**
* Create a new dfs file and return an output stream for writing into it
* with write-progress reporting.
*
* @param src stream name
* @param overwrite do not check for file existence if true
* @return output stream
* @throws IOException
*/
public OutputStream create(String src,
boolean overwrite,
Progressable progress
) throws IOException {
return create(src, overwrite, defaultReplication, defaultBlockSize, null);
}
/**
* Create a new dfs file with the specified block replication
* and return an output stream for writing into the file.
*
* @param src stream name
* @param overwrite do not check for file existence if true
* @param replication block replication
* @return output stream
* @throws IOException
*/
public OutputStream create(String src,
boolean overwrite,
short replication,
long blockSize
) throws IOException {
return create(src, overwrite, replication, blockSize, null);
}
/**
* Create a new dfs file with the specified block replication
* with write-progress reporting and return an output stream for writing
* into the file.
*
* @param src stream name
* @param overwrite do not check for file existence if true
* @param replication block replication
* @return output stream
* @throws IOException
*/
public OutputStream create(String src,
boolean overwrite,
short replication,
long blockSize,
Progressable progress
) throws IOException {
return create(src, overwrite, replication, blockSize, progress,
conf.getInt("io.file.buffer.size", 4096));
}
/**
* Call
* {@link #create(String,FsPermission,boolean,short,long,Progressable,int)}
* with default permission.
* @see FsPermission#getDefault()
*/
public OutputStream create(String src,
boolean overwrite,
short replication,
long blockSize,
Progressable progress,
int buffersize
) throws IOException {
return create(src, FsPermission.getDefault(),
overwrite, replication, blockSize, progress, buffersize);
}
/**
* Call
* {@link #create(String,FsPermission,boolean,boolean,short,long,Progressable,int)}
* with createParent set to true.
*/
public OutputStream create(String src,
FsPermission permission,
boolean overwrite,
short replication,
long blockSize,
Progressable progress,
int buffersize
) throws IOException {
return create(src, permission, overwrite, true,
replication, blockSize, progress, buffersize);
}
/**
* Create a new dfs file with the specified block replication
* with write-progress reporting and return an output stream for writing
* into the file.
*
* @param src stream name
* @param permission The permission of the directory being created.
* If permission == null, use {@link FsPermission#getDefault()}.
* @param overwrite do not check for file existence if true
* @param createParent create missing parent directory if true
* @param replication block replication
* @return output stream
* @throws IOException
* @see ClientProtocol#create(String, FsPermission, String, boolean, short, long)
*/
public OutputStream create(String src,
FsPermission permission,
boolean overwrite,
boolean createParent,
short replication,
long blockSize,
Progressable progress,
int buffersize
) throws IOException {
return create(src, permission, overwrite, createParent, replication, blockSize,
progress, buffersize, conf.getInt("io.bytes.per.checksum", 512));
}
/**
* Create a new dfs file with the specified block replication
* with write-progress reporting and return an output stream for writing
* into the file.
*
* @param src stream name
* @param permission The permission of the directory being created.
* If permission == null, use {@link FsPermission#getDefault()}.
* @param overwrite do not check for file existence if true
* @param replication block replication
* @return output stream
* @throws IOException
* @see ClientProtocol#create(String, FsPermission, String, boolean, short, long)
*/
public OutputStream create(String src,
FsPermission permission,
boolean overwrite,
boolean createParent,
short replication,
long blockSize,
Progressable progress,
int buffersize,
int bytesPerChecksum) throws IOException {
return create(src, permission, overwrite, createParent, replication, blockSize,
progress, buffersize, bytesPerChecksum, false, false, null);
}
/**
* Create a new dfs file with the specified block replication
* with write-progress reporting and return an output stream for writing
* into the file.
*
* @param src stream name
* @param permission The permission of the directory being created.
* If permission == null, use {@link FsPermission#getDefault()}.
* @param overwrite do not check for file existence if true
* @param createParent create missing parent directory if true
* @param replication block replication
* @param forceSync a hdfs sync() operation invokes local filesystem sync
* on datanodes.
* @param doParallelWrites write replicas in parallel
* @return output stream
* @throws IOException
* @see ClientProtocol#create(String, FsPermission, String, boolean, short, long)
*/
public OutputStream create(String src,
FsPermission permission,
boolean overwrite,
boolean createParent,
short replication,
long blockSize,
Progressable progress,
int buffersize,
boolean forceSync,
boolean doParallelWrites) throws IOException {
return create(src, permission, overwrite, createParent, replication,
blockSize,progress, buffersize,
conf.getInt("io.bytes.per.checksum", 512),
forceSync, doParallelWrites, null);
}
/**
* Create a new dfs file with the specified block replication
* with write-progress reporting and return an output stream for writing
* into the file.
*
* @param src stream name
* @param permission The permission of the directory being created.
* If permission == null, use {@link FsPermission#getDefault()}.
* @param overwrite do not check for file existence if true
* @param replication block replication
* @param forceSync a hdfs sync() operation invokes local filesystem sync
* on datanodes.
* @param doParallelWrites write replicas in parallel
* @return output stream
* @throws IOException
* @see ClientProtocol#create(String, FsPermission, String, boolean, short, long)
*/
public OutputStream create(String src,
FsPermission permission,
boolean overwrite,
boolean createParent,
short replication,
long blockSize,
Progressable progress,
int buffersize,
int bytesPerChecksum,
boolean forceSync,
boolean doParallelWrites) throws IOException {
return create(src, permission, overwrite, createParent, replication,
blockSize, progress, buffersize, bytesPerChecksum, forceSync,
doParallelWrites, null);
}
/**
* Create a new dfs file with the specified block replication
* with write-progress reporting and return an output stream for writing
* into the file.
*
* @param src stream name
* @param permission The permission of the directory being created.
* If permission == null, use {@link FsPermission#getDefault()}.
* @param overwrite do not check for file existence if true
* @param replication block replication
* @param forceSync a hdfs sync() operation invokes local filesystem sync
* on datanodes.
* @param doParallelWrites write replicas in parallel
* @param favoredNodes nodes on which to place replicas if possible
* @return output stream
* @throws IOException
* @see ClientProtocol#create(String, FsPermission, String, boolean, short, long)
*/
public OutputStream create(String src,
FsPermission permission,
boolean overwrite,
boolean createParent,
short replication,
long blockSize,
Progressable progress,
int buffersize,
int bytesPerChecksum,
boolean forceSync,
boolean doParallelWrites,
InetSocketAddress[] favoredNodes)
throws IOException {
checkOpen();
if (permission == null) {
permission = FsPermission.getDefault();
}
boolean success = false;
try {
FsPermission masked = permission.applyUMask(FsPermission.getUMask(conf));
LOG.debug(src + ": masked=" + masked);
// For each of the favored nodes, mock up a DatanodeInfo with the IP
// address and port of that node.
DatanodeInfo[] favoredNodeInfos = null;
if (favoredNodes != null) {
favoredNodeInfos = new DatanodeInfo[favoredNodes.length];
for (int i = 0; i < favoredNodes.length; i++) {
favoredNodeInfos[i] = new DatanodeInfo(new DatanodeID(
favoredNodes[i].getAddress().getHostAddress() + ":" +
favoredNodes[i].getPort()));
}
}
OutputStream result = new DFSOutputStream(src, masked,
overwrite, createParent, replication, blockSize, progress, buffersize,
bytesPerChecksum, forceSync, doParallelWrites, favoredNodeInfos);
leasechecker.put(src, result);
metrics.incNumCreateFileOps();
if (stats != null) {
stats.incrementFilesCreated();
}
success = true;
return result;
} finally {
if (!success && namenodeProtocolProxy.isMethodSupported(
"abandonFile", String.class, String.class)) {
try {
namenode.abandonFile(src, clientName);
} catch (RemoteException e) {
if (e.unwrapRemoteException() instanceof LeaseExpiredException) {
LOG.debug(String.format(
"client %s attempting to abandon file %s which it does not own",
clientName, src),
e
);
} else {
throw e;
}
}
}
}
}
/**
* Recover a file's lease
*
* @param src a file's path
* @return if lease recovery completes
* @throws IOException
*/
boolean recoverLease(String src, boolean discardLastBlock) throws IOException {
checkOpen();
if (this.namenodeProtocolProxy == null) {
return versionBasedRecoverLease(src);
}
return methodBasedRecoverLease(src, discardLastBlock);
}
/** recover lease based on version */
private boolean versionBasedRecoverLease(String src) throws IOException {
if (namenodeVersion < ClientProtocol.RECOVER_LEASE_VERSION) {
OutputStream out;
try {
out = append(src, conf.getInt("io.file.buffer.size", 4096), null);
} catch (RemoteException re) {
IOException e = re.unwrapRemoteException(AlreadyBeingCreatedException.class);
if (e instanceof AlreadyBeingCreatedException) {
return false;
}
throw re;
}
out.close();
return true;
} else if (namenodeVersion < ClientProtocol.CLOSE_RECOVER_LEASE_VERSION){
try {
namenode.recoverLease(src, clientName);
} catch (RemoteException re) {
throw re.unwrapRemoteException(FileNotFoundException.class,
AccessControlException.class);
}
return !namenode.getBlockLocations(src, 0, Long.MAX_VALUE).isUnderConstruction();
} else {
try {
return namenode.closeRecoverLease(src, clientName, false);
} catch (RemoteException re) {
throw re.unwrapRemoteException(FileNotFoundException.class,
AccessControlException.class);
}
}
}
/** recover lease based on method name */
private boolean methodBasedRecoverLease(String src, boolean discardLastBlock)
throws IOException {
// check if closeRecoverLease(discardLastBlock) is supported
if (namenodeProtocolProxy.isMethodSupported(
"closeRecoverLease", String.class, String.class, boolean.class)) {
try {
return namenode.closeRecoverLease(src, clientName, discardLastBlock);
} catch (RemoteException re) {
throw re.unwrapRemoteException(FileNotFoundException.class,
AccessControlException.class);
}
}
// check if closeRecoverLease is supported
else if (namenodeProtocolProxy.isMethodSupported(
"closeRecoverLease", String.class, String.class)) {
try {
return namenode.closeRecoverLease(src, clientName);
} catch (RemoteException re) {
throw re.unwrapRemoteException(FileNotFoundException.class,
AccessControlException.class);
}
}
// check if recoverLease is supported
if (namenodeProtocolProxy.isMethodSupported(
"recoverLease", String.class, String.class)) {
try {
namenode.recoverLease(src, clientName);
} catch (RemoteException re) {
throw re.unwrapRemoteException(FileNotFoundException.class,
AccessControlException.class);
}
return !namenode.getBlockLocations(src, 0, Long.MAX_VALUE).isUnderConstruction();
}
// now use append
OutputStream out;
try {
out = append(src, conf.getInt("io.file.buffer.size", 4096), null);
} catch (RemoteException re) {
IOException e = re.unwrapRemoteException(AlreadyBeingCreatedException.class);
if (e instanceof AlreadyBeingCreatedException) {
return false;
}
throw re;
}
out.close();
return true;
}
private boolean closeFileOnNameNode(String src, long fileLen,
Block lastBlockId) throws IOException {
boolean fileComplete;
if (namenodeProtocolProxy != null
&& namenodeProtocolProxy.isMethodSupported("complete", String.class,
String.class, long.class, Block.class)) {
fileComplete = namenode.complete(src, clientName, fileLen, lastBlockId);
} else if (namenodeProtocolProxy != null
&& namenodeProtocolProxy.isMethodSupported("complete", String.class,
String.class, long.class)) {
fileComplete = namenode.complete(src, clientName, fileLen);
} else {
fileComplete = namenode.complete(src, clientName);
}
return fileComplete;
}
public void closeFile(String src, long fileLen, Block lastBlockId) throws IOException {
long localstart = System.currentTimeMillis();
boolean fileComplete = false;
boolean retried = false;
IOException lastException = null;
// These are the close file semantics for retry that we need :
//
// 1) If we have exhausted the close file time out but have tried only once, retry one more time.
//
// 2) If we have exhausted the close file time otherwise, just abort.
while (!fileComplete) {
try {
fileComplete = closeFileOnNameNode(src, fileLen, lastBlockId);
} catch (RemoteException re) {
// If the Namenode throws an exception, we need to rethrow the
// exception.
throw re;
} catch (IOException e) {
// Record exception so that we re-throw when we fail.
if (closeFileTimeout <= 0) {
// If the closeFileTimeout is not positive, we should throw the
// exception since otherwise we would retry indefinitely.
throw e;
}
lastException = e;
LOG.warn("Exception closing file on namenode", e);
}
boolean timedOut = (closeFileTimeout > 0 &&
localstart + closeFileTimeout < System.currentTimeMillis());
// Verify the close file timeout has not elapsed.
if (!fileComplete) {
if (!clientRunning || (timedOut && retried)) {
if (lastException != null) {
throw lastException;
}
String msg = "Unable to close file because dfsclient " +
" was unable to contact the HDFS servers." +
" clientRunning " + clientRunning +
" closeFileTimeout " + closeFileTimeout;
LOG.info(msg);
throw new IOException(msg);
}
try {
retried = true;
Thread.sleep(400);
if (System.currentTimeMillis() - localstart > 5000) {
LOG.info("Could not complete file " + src + " retrying...");
}
} catch (InterruptedException ie) {
}
}
}
}
/**
* Append to an existing HDFS file.
*
* @param src file name
* @param buffersize buffer size
* @param progress for reporting write-progress
* @return an output stream for writing into the file
* @throws IOException
* @see ClientProtocol#append(String, String)
*/
OutputStream append(String src, int buffersize, Progressable progress
) throws IOException {
checkOpen();
FileStatus stat = null;
LocatedBlock lastBlock = null;
boolean success = false;
int namespaceId = 0;
try {
stat = getFileInfo(src);
if (namenodeProtocolProxy != null
&& namenodeProtocolProxy.isMethodSupported(
"appendAndFetchMetaInfo", String.class, String.class)) {
LocatedBlockWithMetaInfo loc = namenode.appendAndFetchMetaInfo(src,
clientName);
lastBlock = loc;
if (loc != null) {
namespaceId = loc.getNamespaceID();
updateDataTransferProtocolVersionIfNeeded(loc.getDataProtocolVersion());
getNewNameNodeIfNeeded(loc.getMethodFingerPrint());
}
} else {
lastBlock = namenode.append(src, clientName);
}
OutputStream result = new DFSOutputStream(src, buffersize, progress,
lastBlock, stat, conf.getInt("io.bytes.per.checksum", 512), namespaceId);
leasechecker.put(src, result);
success = true;
return result;
} catch(RemoteException re) {
throw re.unwrapRemoteException(FileNotFoundException.class,
AccessControlException.class,
NSQuotaExceededException.class,
DSQuotaExceededException.class);
} finally {
if (!success) {
try {
namenode.abandonFile(src, clientName);
} catch (RemoteException e) {
if (e.unwrapRemoteException() instanceof LeaseExpiredException) {
LOG.debug(String.format(
"client %s attempting to abandon file %s which it does not own",
clientName, src),
e
);
} else {
throw e;
}
}
}
}
}
/**
* Set replication for an existing file.
*
* @see ClientProtocol#setReplication(String, short)
* @param replication
* @throws IOException
* @return true is successful or false if file does not exist
*/
public boolean setReplication(String src,
short replication
) throws IOException {
try {
return namenode.setReplication(src, replication);
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class,
NSQuotaExceededException.class,
DSQuotaExceededException.class);
}
}
/**
* Move blocks from src to trg and delete src
* See {@link ClientProtocol#concat(String, String [])}.
*/
public void concat(String trg, String[] srcs, boolean restricted)
throws IOException {
checkOpen();
try {
if (namenodeProtocolProxy != null
&& namenodeProtocolProxy.isMethodSupported("concat", String.class,
String[].class, boolean.class)) {
namenode.concat(trg, srcs, restricted);
} else if (!restricted){
throw new UnsupportedOperationException(
"Namenode does not support variable length blocks");
} else {
namenode.concat(trg, srcs);
}
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class,
NSQuotaExceededException.class,
DSQuotaExceededException.class);
}
}
/**
* See {@link ClientProtocol#hardLink(String, String)}.
*/
public boolean hardLink(String src, String dst) throws IOException {
checkOpen();
try {
return namenode.hardLink(src, dst);
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class,
NSQuotaExceededException.class,
DSQuotaExceededException.class);
}
}
/**
* Rename file or directory.
* See {@link ClientProtocol#rename(String, String)}.
*/
public boolean rename(String src, String dst) throws IOException {
checkOpen();
try {
return namenode.rename(src, dst);
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class,
NSQuotaExceededException.class,
DSQuotaExceededException.class);
}
}
/**
* Delete file or directory.
* See {@link ClientProtocol#delete(String)}.
*/
@Deprecated
public boolean delete(String src) throws IOException {
checkOpen();
return namenode.delete(src, true);
}
/**
* delete file or directory.
* delete contents of the directory if non empty and recursive
* set to true
*/
public boolean delete(String src, boolean recursive) throws IOException {
checkOpen();
try {
return namenode.delete(src, recursive);
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class);
}
}
/** Implemented using getFileInfo(src)
*/
public boolean exists(String src) throws IOException {
checkOpen();
return getFileInfo(src) != null;
}
/** @deprecated Use getFileStatus() instead */
@Deprecated
public boolean isDirectory(String src) throws IOException {
FileStatus fs = getFileInfo(src);
if (fs != null)
return fs.isDir();
else
throw new FileNotFoundException("File does not exist: " + src);
}
/**
* Convert an HdfsFileStatus to a FileStatus
* @param stat an HdfsFileStatus
* @param src parent path in string representation
* @return a FileStatus object
*/
private static FileStatus toFileStatus(HdfsFileStatus stat, String src) {
if (stat == null) {
return null;
}
return new FileStatus(stat.getLen(), stat.isDir(), stat.getReplication(),
stat.getBlockSize(), stat.getModificationTime(),
stat.getAccessTime(),
stat.getPermission(), stat.getOwner(), stat.getGroup(),
stat.getFullPath(new Path(src))); // full path
}
/**
* Convert an HdfsFileStatus and its block locations to a LocatedFileStatus
* @param stat an HdfsFileStatus
* @param locs the file's block locations
* @param src parent path in string representation
* @return a FileStatus object
*/
private static LocatedFileStatus toLocatedFileStatus(
HdfsFileStatus stat, LocatedBlocks locs, String src) {
if (stat == null) {
return null;
}
return new LocatedFileStatus(stat.getLen(),
stat.isDir(), stat.getReplication(),
stat.getBlockSize(), stat.getModificationTime(),
stat.getAccessTime(),
stat.getPermission(), stat.getOwner(), stat.getGroup(),
stat.getFullPath(new Path(src)), // full path
DFSUtil.locatedBlocks2Locations(locs));
}
/**
* Get a listing of the indicated directory
*/
public FileStatus[] listPaths(String src) throws IOException {
checkOpen();
metrics.incLsCalls();
try {
if (namenodeProtocolProxy == null) {
return versionBasedListPath(src);
}
return methodBasedListPath(src);
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class);
}
}
private FileStatus[] versionBasedListPath(String src) throws IOException {
if (namenodeVersion >= ClientProtocol.ITERATIVE_LISTING_VERSION) {
return iterativeListing(src);
} else if (namenodeVersion >= ClientProtocol.OPTIMIZE_FILE_STATUS_VERSION) {
HdfsFileStatus[] hdfsStats = namenode.getHdfsListing(src);
if (hdfsStats == null) {
return null;
}
FileStatus[] stats = new FileStatus[hdfsStats.length];
for (int i=0; i= ClientProtocol.CONCAT_VERSION)
return true;
}
else {
return namenodeProtocolProxy.isMethodSupported(
"concat", String.class, String[].class);
}
return false;
}
/**
* Get a partial listing of the indicated directory
*
* Recommend to use HdfsFileStatus.EMPTY_NAME as startAfter
* if the application wants to fetch a listing starting from
* the first entry in the directory
*
* @see ClientProtocol#getLocatedPartialListing(String, byte[])
*/
public RemoteIterator listPathWithLocation(
final String src) throws IOException {
checkOpen();
try {
if (namenodeProtocolProxy == null) {
return versionBasedListPathWithLocation(src);
}
return methodBasedListPathWithLocation(src);
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class);
}
}
/** List a directory with location based on version */
private RemoteIterator versionBasedListPathWithLocation(
final String src) throws IOException {
if (namenodeVersion >= ClientProtocol.BULK_BLOCK_LOCATIONS_VERSION) {
return iteratorListing(src);
} else {
return arrayListing(src);
}
}
/** List a directory with location based on method */
private RemoteIterator methodBasedListPathWithLocation(
final String src) throws IOException {
if (namenodeProtocolProxy.isMethodSupported(
"getLocatedPartialListing", String.class, byte[].class)) {
return iteratorListing(src);
} else {
return arrayListing(src);
}
}
/** create the iterator from an array of file status */
private RemoteIterator arrayListing(final String src)
throws IOException {
return new RemoteIterator() {
private FileStatus[] stats;
private int i = 0;
{ //initializer
stats = listPaths(src);
if (stats == null) {
throw new FileNotFoundException("File " + src + " does not exist.");
}
}
@Override
public boolean hasNext() throws IOException {
return i iteratorListing(final String src)
throws IOException {
return new RemoteIterator() {
private LocatedDirectoryListing thisListing;
private int i;
{ // initializer
// fetch the first batch of entries in the directory
thisListing = namenode.getLocatedPartialListing(
src, HdfsFileStatus.EMPTY_NAME);
if (thisListing == null) { // the directory does not exist
throw new FileNotFoundException("File " + src + " does not exist.");
}
}
@Override
public boolean hasNext() throws IOException {
if (i>=thisListing.getPartialListing().length
&& thisListing.hasMore()) {
// current listing is exhausted & fetch a new listing
thisListing = namenode.getLocatedPartialListing(
src, thisListing.getLastName());
if (thisListing == null) {
throw new FileNotFoundException("File " + src + " does not exist.");
}
i = 0;
}
return i < thisListing.getPartialListing().length;
}
@Override
public LocatedFileStatus next() throws IOException {
if (!hasNext()) {
throw new java.util.NoSuchElementException("No more entry in " + src);
}
return toLocatedFileStatus(
thisListing.getPartialListing()[i],
thisListing.getBlockLocations()[i++], src);
}
};
}
/**
* List the given path iteratively if the directory is large
*
* @param src a path
* @return a listing of the path
* @throws IOException if any IO error is occurred
*/
private FileStatus[] iterativeListing(String src) throws IOException {
// fetch the first batch of entries in the directory
DirectoryListing thisListing = namenode.getPartialListing(
src, HdfsFileStatus.EMPTY_NAME);
if (thisListing == null) { // the directory does not exist
return null;
}
HdfsFileStatus[] partialListing = thisListing.getPartialListing();
if (!thisListing.hasMore()) { // got all entries of the directory
FileStatus[] stats = new FileStatus[partialListing.length];
for (int i = 0; i < partialListing.length; i++) {
stats[i] = toFileStatus(partialListing[i], src);
}
return stats;
}
// The directory size is too big that it needs to fetch more
// estimate the total number of entries in the directory
int totalNumEntries =
partialListing.length + thisListing.getRemainingEntries();
ArrayList listing =
new ArrayList(totalNumEntries);
// add the first batch of entries to the array list
for (HdfsFileStatus fileStatus : partialListing) {
listing.add(toFileStatus(fileStatus, src));
}
// now fetch more entries
do {
thisListing = namenode.getPartialListing(src, thisListing.getLastName());
if (thisListing == null) {
return null; // the directory is deleted
}
partialListing = thisListing.getPartialListing();
for (HdfsFileStatus fileStatus : partialListing) {
listing.add(toFileStatus(fileStatus, src));
}
} while (thisListing.hasMore());
return listing.toArray(new FileStatus[listing.size()]);
}
public FileStatus getFileInfo(String src) throws IOException {
checkOpen();
try {
if (namenodeProtocolProxy == null) {
return versionBasedGetFileInfo(src);
}
return methodBasedGetFileInfo(src);
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class);
}
}
/** Get file info: decide which rpc to call based on protocol version */
private FileStatus versionBasedGetFileInfo(String src) throws IOException {
if (namenodeVersion >= ClientProtocol.OPTIMIZE_FILE_STATUS_VERSION) {
return toFileStatus(namenode.getHdfsFileInfo(src), src);
} else {
return namenode.getFileInfo(src);
}
}
/** Get file info: decide which rpc to call based on server methods*/
private FileStatus methodBasedGetFileInfo(String src) throws IOException {
if (namenodeProtocolProxy.isMethodSupported(
"getHdfsFileInfo", String.class)) {
return toFileStatus(namenode.getHdfsFileInfo(src), src);
} else {
return namenode.getFileInfo(src);
}
}
/**
* Get the checksum of a file.
* @param src The file path
* @return The checksum
* @see DistributedFileSystem#getFileChecksum(Path)
*/
MD5MD5CRC32FileChecksum getFileChecksum(String src) throws IOException {
checkOpen();
return getFileChecksum(dataTransferVersion,
src, namenode, namenodeProtocolProxy, socketFactory, socketTimeout);
}
/**
* Get the checksum of a file.
* @param src The file path
* @return The checksum
*/
public static MD5MD5CRC32FileChecksum getFileChecksum(
int dataTransferVersion, String src,
ClientProtocol namenode, ProtocolProxy namenodeProxy,
SocketFactory socketFactory, int socketTimeout
) throws IOException {
//get all block locations
final LocatedBlocks locatedBlocks = callGetBlockLocations(
namenode, src, 0, Long.MAX_VALUE, isMetaInfoSuppoted(namenodeProxy));
int namespaceId = 0;
if (locatedBlocks instanceof LocatedBlocksWithMetaInfo) {
LocatedBlocksWithMetaInfo lBlocks = (LocatedBlocksWithMetaInfo)locatedBlocks;
dataTransferVersion = lBlocks.getDataProtocolVersion();
namespaceId = lBlocks.getNamespaceID();
} else if (dataTransferVersion == -1) {
dataTransferVersion = namenode.getDataTransferProtocolVersion();
}
final List locatedblocks = locatedBlocks.getLocatedBlocks();
final DataOutputBuffer md5out = new DataOutputBuffer();
int bytesPerCRC = 0;
long crcPerBlock = 0;
//get block checksum for each block
for(int i = 0; i < locatedblocks.size(); i++) {
LocatedBlock lb = locatedblocks.get(i);
final Block block = lb.getBlock();
final DatanodeInfo[] datanodes = lb.getLocations();
//try each datanode location of the block
final int timeout = (socketTimeout > 0) ? (socketTimeout +
HdfsConstants.READ_TIMEOUT_EXTENSION * datanodes.length) : 0;
boolean done = false;
for(int j = 0; !done && j < datanodes.length; j++) {
//connect to a datanode
final Socket sock = socketFactory.createSocket();
NetUtils.connect(sock,
NetUtils.createSocketAddr(datanodes[j].getName()),
timeout);
sock.setSoTimeout(timeout);
DataOutputStream out = new DataOutputStream(
new BufferedOutputStream(NetUtils.getOutputStream(sock),
DataNode.SMALL_BUFFER_SIZE));
DataInputStream in = new DataInputStream(NetUtils.getInputStream(sock));
// get block MD5
try {
if (LOG.isDebugEnabled()) {
LOG.debug("write to " + datanodes[j].getName() + ": "
+ DataTransferProtocol.OP_BLOCK_CHECKSUM +
", block=" + block);
}
out.writeShort(dataTransferVersion);
out.write(DataTransferProtocol.OP_BLOCK_CHECKSUM);
if (dataTransferVersion >= DataTransferProtocol.FEDERATION_VERSION) {
out.writeInt(namespaceId);
}
out.writeLong(block.getBlockId());
out.writeLong(block.getGenerationStamp());
out.flush();
final short reply = in.readShort();
if (reply != DataTransferProtocol.OP_STATUS_SUCCESS) {
throw new IOException("Bad response " + reply + " for block "
+ block + " from datanode " + datanodes[j].getName());
}
//read byte-per-checksum
final int bpc = in.readInt();
if (i == 0) { //first block
bytesPerCRC = bpc;
}
else if (bpc != bytesPerCRC) {
throw new IOException("Byte-per-checksum not matched: bpc=" + bpc
+ " but bytesPerCRC=" + bytesPerCRC);
}
//read crc-per-block
final long cpb = in.readLong();
if (locatedblocks.size() > 1 && i == 0) {
crcPerBlock = cpb;
}
//read md5
final MD5Hash md5 = MD5Hash.read(in);
md5.write(md5out);
done = true;
if (LOG.isDebugEnabled()) {
if (i == 0) {
LOG.debug("set bytesPerCRC=" + bytesPerCRC
+ ", crcPerBlock=" + crcPerBlock);
}
LOG.debug("got reply from " + datanodes[j].getName()
+ ": md5=" + md5);
}
} catch (IOException ie) {
LOG.warn("src=" + src + ", datanodes[" + j + "].getName()="
+ datanodes[j].getName(), ie);
} finally {
IOUtils.closeStream(in);
IOUtils.closeStream(out);
IOUtils.closeSocket(sock);
}
}
if (!done) {
throw new IOException("Fail to get block MD5 for " + block);
}
}
//compute file MD5
final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData());
return new MD5MD5CRC32FileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
}
/**
* Set permissions to a file or directory.
* @param src path name.
* @param permission
* @throws FileNotFoundException is file does not exist.
*/
public void setPermission(String src, FsPermission permission
) throws IOException {
checkOpen();
try {
namenode.setPermission(src, permission);
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class,
FileNotFoundException.class);
}
}
/**
* Set file or directory owner.
* @param src path name.
* @param username user id.
* @param groupname user group.
* @throws FileNotFoundException is file does not exist.
*/
public void setOwner(String src, String username, String groupname
) throws IOException {
checkOpen();
try {
namenode.setOwner(src, username, groupname);
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class,
FileNotFoundException.class);
}
}
public DiskStatus getDiskStatus() throws IOException {
long rawNums[] = namenode.getStats();
return new DiskStatus(rawNums[0], rawNums[1], rawNums[2]);
}
/*
* Return the Disk status for current namespace
*/
public DiskStatus getNSDiskStatus() throws IOException {
long rawNums[] = namenode.getStats();
// rawNums[6] should be capacityNamespaceUsed
long dfsUsed = (rawNums.length > 6)? rawNums[6]: rawNums[1];
return new DiskStatus(rawNums[0], dfsUsed, rawNums[2]);
}
/**
*/
public long totalRawCapacity() throws IOException {
long rawNums[] = namenode.getStats();
return rawNums[0];
}
/**
*/
public long totalRawUsed() throws IOException {
long rawNums[] = namenode.getStats();
return rawNums[1];
}
/**
* Returns count of blocks with no good replicas left. Normally should be
* zero.
* @throws IOException
*/
public long getMissingBlocksCount() throws IOException {
return namenode.getStats()[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX];
}
/**
* Returns count of blocks with one of more replica missing.
* @throws IOException
*/
public long getUnderReplicatedBlocksCount() throws IOException {
return namenode.getStats()[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX];
}
/**
* Returns count of blocks with at least one replica marked corrupt.
* @throws IOException
*/
public long getCorruptBlocksCount() throws IOException {
return namenode.getStats()[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX];
}
/**
* @return a list in which each entry describes a corrupt file/block
* @throws AccessControlException
* @throws IOException
*/
public CorruptFileBlocks listCorruptFileBlocks(String path,
String cookie)
throws IOException {
if (namenodeProtocolProxy == null) {
return versionBasedListCorruptFileBlocks(path, cookie);
}
return methodBasedListCorruptFileBlocks(path, cookie);
}
/** Version based list corrupt file blocks */
private CorruptFileBlocks versionBasedListCorruptFileBlocks(String path,
String cookie) throws IOException {
if (namenodeVersion < ClientProtocol.LIST_CORRUPT_FILEBLOCKS_VERSION) {
LOG.info("NameNode version is " + namenodeVersion +
" Using older version of getCorruptFiles.");
if (cookie != null ) {
return new CorruptFileBlocks(new String[0], "");
}
ArrayList str = new ArrayList();
for (FileStatus stat : namenode.getCorruptFiles()) {
String filename = stat.getPath().toUri().getPath();
if (filename.startsWith(path)) {
str.add(filename);
}
}
return new CorruptFileBlocks(str.toArray(new String[str.size()]), "");
}
return namenode.listCorruptFileBlocks(path, cookie);
}
/** Method based listCorruptFileBlocks */
private CorruptFileBlocks methodBasedListCorruptFileBlocks(String path,
String cookie) throws IOException {
if (!namenodeProtocolProxy.isMethodSupported("listCorruptFileBlocks",
String.class, String.class)) {
LOG.info("NameNode version is " + namenodeVersion +
" Using older version of getCorruptFiles.");
if (cookie != null ) {
return new CorruptFileBlocks(new String[0], "");
}
ArrayList str = new ArrayList();
for (FileStatus stat : namenode.getCorruptFiles()) {
String filename = stat.getPath().toUri().getPath();
if (filename.startsWith(path)) {
str.add(filename);
}
}
return new CorruptFileBlocks(str.toArray(new String[str.size()]), "");
}
return namenode.listCorruptFileBlocks(path, cookie);
}
public DatanodeInfo[] datanodeReport(DatanodeReportType type)
throws IOException {
return namenode.getDatanodeReport(type);
}
/**
* Enter, leave or get safe mode.
* See {@link ClientProtocol#setSafeMode(FSConstants.SafeModeAction)}
* for more details.
*
* @see ClientProtocol#setSafeMode(FSConstants.SafeModeAction)
*/
public boolean setSafeMode(SafeModeAction action) throws IOException {
return namenode.setSafeMode(action);
}
/**
* Save namespace image.
* See {@link ClientProtocol#saveNamespace()}
* for more details.
*
* @see ClientProtocol#saveNamespace()
*/
void saveNamespace(boolean force, boolean uncompressed)
throws AccessControlException, IOException {
try {
if (namenodeProtocolProxy == null) {
versionBasedSaveNamespace(force, uncompressed);
} else {
methodBasedSaveNamespace(force, uncompressed);
}
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class);
}
}
/** Version-based save namespace */
private void versionBasedSaveNamespace(boolean force, boolean uncompressed)
throws AccessControlException, IOException {
if (namenodeVersion >= ClientProtocol.SAVENAMESPACE_FORCE) {
namenode.saveNamespace(force, uncompressed);
} else {
namenode.saveNamespace();
}
}
/** Method-based save namespace */
private void methodBasedSaveNamespace(boolean force, boolean uncompressed)
throws AccessControlException, IOException {
if (namenodeProtocolProxy.isMethodSupported(
"saveNamespace", boolean.class, boolean.class)) {
namenode.saveNamespace(force, uncompressed);
} else {
namenode.saveNamespace();
}
}
/**
* Refresh the hosts and exclude files. (Rereads them.)
* See {@link ClientProtocol#refreshNodes()}
* for more details.
*
* @see ClientProtocol#refreshNodes()
*/
public void refreshNodes() throws IOException {
namenode.refreshNodes();
}
/**
* Dumps DFS data structures into specified file.
* See {@link ClientProtocol#metaSave(String)}
* for more details.
*
* @see ClientProtocol#metaSave(String)
*/
public void metaSave(String pathname) throws IOException {
namenode.metaSave(pathname);
}
/**
* @see ClientProtocol#finalizeUpgrade()
*/
public void finalizeUpgrade() throws IOException {
namenode.finalizeUpgrade();
}
/**
* @see ClientProtocol#distributedUpgradeProgress(FSConstants.UpgradeAction)
*/
public UpgradeStatusReport distributedUpgradeProgress(UpgradeAction action
) throws IOException {
return namenode.distributedUpgradeProgress(action);
}
public String getClusterName() throws IOException {
if (namenodeProtocolProxy.isMethodSupported(
"getClusterName")) {
return namenode.getClusterName();
} else {
return null;
}
}
/** Re-populate the namespace and diskspace count of every node with quota */
public void recount() throws IOException {
if (namenodeProtocolProxy.isMethodSupported("recount")) {
namenode.recount();
}
}
/**
* Fetch the list of files that have been open longer than a
* specified amount of time.
* @param prefix path prefix specifying subset of files to examine
* @param millis select files that have been open longer that this
* @param where to start searching when there are large numbers of
* files returned. pass null the first time, then pass the last
* value returned by the previous call for subsequent calls.
* @return array of OpenFileInfo objects
* @throw IOException
*/
public OpenFileInfo[] iterativeGetOpenFiles(
Path prefix, int millis, String start) throws IOException {
checkOpen();
try {
return namenode.iterativeGetOpenFiles(prefix.toString(), millis, start);
} catch (RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class,
NSQuotaExceededException.class,
DSQuotaExceededException.class);
}
}
/**
*/
public boolean mkdirs(String src) throws IOException {
return mkdirs(src, null);
}
/**
* Create a directory (or hierarchy of directories) with the given
* name and permission.
*
* @param src The path of the directory being created
* @param permission The permission of the directory being created.
* If permission == null, use {@link FsPermission#getDefault()}.
* @return True if the operation success.
* @see ClientProtocol#mkdirs(String, FsPermission)
*/
public boolean mkdirs(String src, FsPermission permission)throws IOException{
checkOpen();
if (permission == null) {
permission = FsPermission.getDefault();
}
FsPermission masked = permission.applyUMask(FsPermission.getUMask(conf));
LOG.debug(src + ": masked=" + masked);
try {
metrics.incNumCreateDirOps();
return namenode.mkdirs(src, masked);
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class,
NSQuotaExceededException.class,
DSQuotaExceededException.class);
}
}
public ContentSummary getContentSummary(String src) throws IOException {
try {
return namenode.getContentSummary(src);
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class,
FileNotFoundException.class);
}
}
/**
* Sets or resets quotas for a directory.
* @see org.apache.hadoop.hdfs.protocol.ClientProtocol#setQuota(String, long, long)
*/
void setQuota(String src, long namespaceQuota, long diskspaceQuota)
throws IOException {
// sanity check
if ((namespaceQuota <= 0 && namespaceQuota != FSConstants.QUOTA_DONT_SET &&
namespaceQuota != FSConstants.QUOTA_RESET) ||
(diskspaceQuota <= 0 && diskspaceQuota != FSConstants.QUOTA_DONT_SET &&
diskspaceQuota != FSConstants.QUOTA_RESET)) {
throw new IllegalArgumentException("Invalid values for quota : " +
namespaceQuota + " and " +
diskspaceQuota);
}
try {
namenode.setQuota(src, namespaceQuota, diskspaceQuota);
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class,
FileNotFoundException.class,
NSQuotaExceededException.class,
DSQuotaExceededException.class);
}
}
/**
* set the modification and access time of a file
* @throws FileNotFoundException if the path is not a file
*/
public void setTimes(String src, long mtime, long atime) throws IOException {
checkOpen();
try {
namenode.setTimes(src, mtime, atime);
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class,
FileNotFoundException.class);
}
}
private int numNodeLeft(DatanodeInfo nodes[],
AbstractMap deadNodes) {
int nodesLeft = 0;
if (nodes != null) {
for (int i = 0; i < nodes.length; i++) {
if (!deadNodes.containsKey(nodes[i])) {
nodesLeft++;
}
}
}
return nodesLeft;
}
/**
* Pick the best node from which to stream the data.
* Entries in nodes are already in the priority order
*/
private DatanodeInfo bestNode(DatanodeInfo nodes[],
AbstractMap deadNodes)
throws IOException {
if (nodes != null) {
for (int i = 0; i < nodes.length; i++) {
if (!deadNodes.containsKey(nodes[i])) {
return nodes[i];
}
}
}
StringBuilder errMsgr = new StringBuilder(
"No live nodes contain current block ");
errMsgr.append("Block locations:");
for (DatanodeInfo datanode : nodes) {
errMsgr.append(" ");
errMsgr.append(datanode.toString());
}
errMsgr.append(" Dead nodes: ");
for (DatanodeInfo datanode : deadNodes.values()) {
errMsgr.append(" ");
errMsgr.append(datanode.toString());
}
throw new IOException(errMsgr.toString());
}
boolean isLeaseCheckerStarted() {
return leasechecker.daemon != null;
}
/** Lease management*/
class LeaseChecker extends LeaseRenewal {
/** A map from src -> DFSOutputStream of files that are currently being
* written by this client.
*/
private final SortedMap pendingCreates
= new TreeMap();
private Daemon daemon = null;
public LeaseChecker(String clientName, Configuration conf) {
super(clientName, conf);
}
synchronized void put(String src, OutputStream out) {
if (clientRunning) {
if (daemon == null) {
daemon = new Daemon(this);
daemon.start();
}
pendingCreates.put(src, out);
}
}
synchronized void remove(String src) {
pendingCreates.remove(src);
}
void interruptAndJoin() throws InterruptedException {
Daemon daemonCopy = null;
synchronized (this) {
if (daemon != null) {
daemon.interrupt();
daemonCopy = daemon;
}
}
if (daemonCopy != null) {
LOG.debug("Wait for lease checker to terminate");
daemonCopy.join();
}
}
synchronized void close() {
while (!pendingCreates.isEmpty()) {
String src = pendingCreates.firstKey();
OutputStream out = pendingCreates.remove(src);
if (out != null) {
try {
out.close();
} catch (IOException ie) {
LOG.error("Exception closing file " + src+ " : " + ie, ie);
}
}
}
}
/**
* Abort all open files. Release resources held. Ignore all errors.
*/
@Override
protected synchronized void abort() {
super.closeRenewal();
clientRunning = false;
while (!pendingCreates.isEmpty()) {
String src = pendingCreates.firstKey();
DFSOutputStream out = (DFSOutputStream)pendingCreates.remove(src);
if (out != null) {
try {
out.abort();
} catch (IOException ie) {
LOG.error("Exception aborting file " + src+ ": ", ie);
}
}
}
RPC.stopProxy(rpcNamenode); // close connections to the namenode
}
@Override
protected void renew() throws IOException {
synchronized(this) {
if (pendingCreates.isEmpty()) {
return;
}
}
namenode.renewLease(clientName);
}
/** {@inheritDoc} */
public String toString() {
String s = getClass().getSimpleName();
if (LOG.isTraceEnabled()) {
return s + "@" + DFSClient.this + ": "
+ StringUtils.stringifyException(new Throwable("for testing"));
}
return s;
}
}
private static class DataNodeSlowException extends IOException {
public DataNodeSlowException(String msg) {
super(msg);
}
}
/** Utility class to encapsulate data node info and its ip address. */
private static class DNAddrPair {
DatanodeInfo info;
InetSocketAddress addr;
DNAddrPair(DatanodeInfo info, InetSocketAddress addr) {
this.info = info;
this.addr = addr;
}
}
/** This is a wrapper around connection to datadone
* and understands checksum, offset etc
*/
public static class BlockReader extends FSInputChecker {
private Socket dnSock; //for now just sending checksumOk.
private DataInputStream in;
protected DataChecksum checksum;
protected long lastChunkOffset = -1;
protected long lastChunkLen = -1;
private long lastSeqNo = -1;
private boolean transferBlockSize;
protected long startOffset;
protected long firstChunkOffset;
protected int bytesPerChecksum;
protected int checksumSize;
protected boolean gotEOS = false;
protected boolean blkLenInfoUpdated = false;
protected boolean isBlockFinalized;
protected long updatedBlockLength;
byte[] skipBuf = null;
ByteBuffer checksumBytes = null;
int packetLen = 0;
int dataLeft = 0;
boolean isLastPacket = false;
protected long minSpeedBps;
protected long bytesRead;
protected long timeRead;
protected boolean slownessLoged;
protected boolean isReadLocal = false;
protected boolean isReadRackLocal = false;
protected FileSystem.Statistics fsStats = null;
private long artificialSlowdown = 0;
// It's a temporary flag used for tests
public boolean ENABLE_THROW_FOR_SLOW = false;
void setArtificialSlowdown(long period) {
artificialSlowdown = period;
}
/* FSInputChecker interface */
/* same interface as inputStream java.io.InputStream#read()
* used by DFSInputStream#read()
* This violates one rule when there is a checksum error:
* "Read should not modify user buffer before successful read"
* because it first reads the data to user buffer and then checks
* the checksum.
*/
@Override
public synchronized int read(byte[] buf, int off, int len)
throws IOException {
//for the first read, skip the extra bytes at the front.
if (lastChunkLen < 0 && startOffset > firstChunkOffset) {
// Skip these bytes. But don't call this.skip()!
int toSkip = (int)(startOffset - firstChunkOffset);
if ( skipBuf == null ) {
skipBuf = new byte[bytesPerChecksum];
}
if ( super.read(skipBuf, 0, toSkip) != toSkip ) {
// should never happen
throw new IOException("Could not skip required number of bytes");
}
updateStatsAfterRead(toSkip);
}
boolean eosBefore = gotEOS;
int nRead = super.read(buf, off, len);
// if gotEOS was set in the previous read and checksum is enabled :
if (dnSock != null && gotEOS && !eosBefore && nRead >= 0 && needChecksum()) {
//checksum is verified and there are no errors.
checksumOk(dnSock);
}
updateStatsAfterRead(nRead);
return nRead;
}
@Override
public synchronized long skip(long n) throws IOException {
/* How can we make sure we don't throw a ChecksumException, at least
* in majority of the cases?. This one throws. */
if ( skipBuf == null ) {
skipBuf = new byte[bytesPerChecksum];
}
long nSkipped = 0;
while ( nSkipped < n ) {
int toSkip = (int)Math.min(n-nSkipped, skipBuf.length);
int ret = read(skipBuf, 0, toSkip);
if ( ret <= 0 ) {
return nSkipped;
}
nSkipped += ret;
}
return nSkipped;
}
@Override
public int read() throws IOException {
throw new IOException("read() is not expected to be invoked. " +
"Use read(buf, off, len) instead.");
}
@Override
public boolean seekToNewSource(long targetPos) throws IOException {
/* Checksum errors are handled outside the BlockReader.
* DFSInputStream does not always call 'seekToNewSource'. In the
* case of pread(), it just tries a different replica without seeking.
*/
return false;
}
@Override
public void seek(long pos) throws IOException {
throw new IOException("Seek() is not supported in BlockInputChecker");
}
@Override
protected long getChunkPosition(long pos) {
throw new RuntimeException("getChunkPosition() is not supported, " +
"since seek is not required");
}
public void setReadLocal(boolean isReadLocal) {
this.isReadLocal = isReadLocal;
if (isReadLocal) {
this.isReadRackLocal = true;
}
}
public void setReadRackLocal(boolean isReadSwitchLocal) {
this.isReadRackLocal = isReadSwitchLocal;
}
public void setFsStats(FileSystem.Statistics fsStats) {
this.fsStats = fsStats;
}
public boolean isBlkLenInfoUpdated() {
return blkLenInfoUpdated;
}
public boolean isBlockFinalized() {
return isBlockFinalized;
}
public long getUpdatedBlockLength() {
return updatedBlockLength;
}
public void resetBlockLenInfo() {
blkLenInfoUpdated = false;
}
/**
* Makes sure that checksumBytes has enough capacity
* and limit is set to the number of checksum bytes needed
* to be read.
*/
private void adjustChecksumBytes(int dataLen) {
int requiredSize =
((dataLen + bytesPerChecksum - 1)/bytesPerChecksum)*checksumSize;
if (checksumBytes == null || requiredSize > checksumBytes.capacity()) {
checksumBytes = ByteBuffer.wrap(new byte[requiredSize]);
} else {
checksumBytes.clear();
}
checksumBytes.limit(requiredSize);
}
/**
* Read the block length information from data stream
*
* @throws IOException
*/
private synchronized void readBlockSizeInfo() throws IOException {
if (!transferBlockSize) {
return;
}
blkLenInfoUpdated = true;
isBlockFinalized = in.readBoolean();
updatedBlockLength = in.readLong();
if (LOG.isDebugEnabled()) {
LOG.debug("ifBlockComplete? " + isBlockFinalized + " block size: "
+ updatedBlockLength);
}
}
@Override
protected synchronized int readChunk(long pos, byte[] buf, int offset,
int len, byte[] checksumBuf)
throws IOException {
// Read one chunk.
if ( gotEOS ) {
if ( startOffset < 0 ) {
//This is mainly for debugging. can be removed.
throw new IOException( "BlockRead: already got EOS or an error" );
}
startOffset = -1;
return -1;
}
// Read one DATA_CHUNK.
long chunkOffset = lastChunkOffset;
if ( lastChunkLen > 0 ) {
chunkOffset += lastChunkLen;
}
if ( (pos + firstChunkOffset) != chunkOffset ) {
throw new IOException("Mismatch in pos : " + pos + " + " +
firstChunkOffset + " != " + chunkOffset);
}
long startTime = System.currentTimeMillis();
// Read next packet if the previous packet has been read completely.
if (dataLeft <= 0) {
// check read speed
// Time only is counted in readChunk() not outside. It is to distinguish
// the cases between application to consume data slow or reading from
// data-nodes is slow. We don't want to throw exception in the former
// case. So the speed measurement here, actually is how much slower
// DFSClient reads data from datanodes than application to consume the
// data. That's the real slowness case users care about.
//
if (minSpeedBps > 0) {
bytesRead += packetLen;
if (bytesRead > NUM_BYTES_CHECK_READ_SPEED) {
if (timeRead > 0 && bytesRead * 1000 / timeRead < minSpeedBps) {
if (!slownessLoged) {
FileSystem.LogForCollect
.info("Too slow when reading block. bytes: " + bytesRead
+ " time: " + timeRead + " msec. Path: "
+ super.file.getName());
}
if (this.isReadLocal) {
if (!slownessLoged) {
LOG.info("Not switch from a local datanode.");
slownessLoged = true;
}
} else if (this.isReadRackLocal) {
if (!slownessLoged) {
LOG.info("Not switch from a datanode from the same rack.");
slownessLoged = true;
}
} else {
if (!ENABLE_THROW_FOR_SLOW) {
if (!slownessLoged) {
LOG.info("Won't swtich to another datanode for not disabled.");
slownessLoged = true;
}
} else {
throw new DataNodeSlowException(
"Block Reading Speed is too slow");
}
}
}
timeRead = 0;
bytesRead = 0;
}
}
//Read packet headers.
packetLen = in.readInt();
if (packetLen == 0) {
// the end of the stream
gotEOS = true;
readBlockSizeInfo();
return 0;
}
long offsetInBlock = in.readLong();
long seqno = in.readLong();
boolean lastPacketInBlock = in.readBoolean();
if (LOG.isDebugEnabled()) {
LOG.debug("DFSClient readChunk got seqno " + seqno +
" offsetInBlock " + offsetInBlock +
" lastPacketInBlock " + lastPacketInBlock +
" packetLen " + packetLen);
}
int dataLen = in.readInt();
// Sanity check the lengths
if ( dataLen < 0 ||
( (dataLen % bytesPerChecksum) != 0 && !lastPacketInBlock ) ||
(seqno != (lastSeqNo + 1)) ) {
throw new IOException("BlockReader: error in packet header" +
"(chunkOffset : " + chunkOffset +
", dataLen : " + dataLen +
", seqno : " + seqno +
" (last: " + lastSeqNo + "))");
}
lastSeqNo = seqno;
isLastPacket = lastPacketInBlock;
dataLeft = dataLen;
adjustChecksumBytes(dataLen);
if (dataLen > 0) {
IOUtils.readFully(in, checksumBytes.array(), 0,
checksumBytes.limit());
}
}
int chunkLen = Math.min(dataLeft, bytesPerChecksum);
if ( chunkLen > 0 ) {
// len should be >= chunkLen
IOUtils.readFully(in, buf, offset, chunkLen);
checksumBytes.get(checksumBuf, 0, checksumSize);
// This is used by unit test to trigger race conditions.
if (artificialSlowdown != 0) {
sleepForUnitTest(artificialSlowdown);
}
}
dataLeft -= chunkLen;
lastChunkOffset = chunkOffset;
lastChunkLen = chunkLen;
if (minSpeedBps > 0) {
this.timeRead += System.currentTimeMillis() - startTime;
}
if ((dataLeft == 0 && isLastPacket) || chunkLen == 0) {
gotEOS = true;
int expectZero = in.readInt();
assert expectZero == 0;
readBlockSizeInfo();
}
if ( chunkLen == 0 ) {
return -1;
}
return chunkLen;
}
protected void updateStatsAfterRead(int bytesRead) {
if (fsStats == null) {
return;
}
if (isReadLocal) {
fsStats.incrementLocalBytesRead(bytesRead);
}
if (isReadRackLocal) {
fsStats.incrementRackLocalBytesRead(bytesRead);
}
}
private BlockReader( String file, long blockId, DataInputStream in,
DataChecksum checksum, boolean verifyChecksum,
long startOffset, long firstChunkOffset,
Socket dnSock, long minSpeedBps,
long dataTransferVersion ) {
super(new Path("/blk_" + blockId + ":of:" + file)/*too non path-like?*/,
1, verifyChecksum,
checksum.getChecksumSize() > 0? checksum : null,
checksum.getBytesPerChecksum(),
checksum.getChecksumSize());
this.dnSock = dnSock;
this.in = in;
this.checksum = checksum;
this.startOffset = Math.max( startOffset, 0 );
this.transferBlockSize =
(dataTransferVersion >= DataTransferProtocol.SEND_DATA_LEN_VERSION);
this.firstChunkOffset = firstChunkOffset;
lastChunkOffset = firstChunkOffset;
lastChunkLen = -1;
bytesPerChecksum = this.checksum.getBytesPerChecksum();
checksumSize = this.checksum.getChecksumSize();
this.bytesRead = 0;
this.timeRead = 0;
this.minSpeedBps = minSpeedBps;
this.slownessLoged = false;
}
/**
* Public constructor
*/
BlockReader(Path file, int numRetries) {
super(file, numRetries);
}
protected BlockReader(Path file, int numRetries, DataChecksum checksum, boolean verifyChecksum) {
super(file,
numRetries,
verifyChecksum,
checksum.getChecksumSize() > 0? checksum : null,
checksum.getBytesPerChecksum(),
checksum.getChecksumSize());
}
public static BlockReader newBlockReader(int dataTransferVersion,
int namespaceId,
Socket sock, String file, long blockId,
long genStamp, long startOffset, long len, int bufferSize) throws IOException {
return newBlockReader(dataTransferVersion, namespaceId,
sock, file, blockId, genStamp, startOffset, len, bufferSize,
true);
}
/** Java Doc required */
public static BlockReader newBlockReader( int dataTransferVersion,
int namespaceId,
Socket sock, String file, long blockId,
long genStamp,
long startOffset, long len,
int bufferSize, boolean verifyChecksum)
throws IOException {
return newBlockReader(dataTransferVersion, namespaceId,
sock, file, blockId, genStamp,
startOffset,
len, bufferSize, verifyChecksum, "",
-1);
}
public static BlockReader newBlockReader( int dataTransferVersion,
int namespaceId,
Socket sock, String file,
long blockId,
long genStamp,
long startOffset, long len,
int bufferSize, boolean verifyChecksum,
String clientName, long minSpeedBps)
throws IOException {
// in and out will be closed when sock is closed (by the caller)
DataOutputStream out = new DataOutputStream(
new BufferedOutputStream(NetUtils.getOutputStream(sock,HdfsConstants.WRITE_TIMEOUT)));
//write the header.
ReadBlockHeader readBlockHeader = new ReadBlockHeader(
dataTransferVersion, namespaceId, blockId, genStamp, startOffset, len,
clientName);
readBlockHeader.writeVersionAndOpCode(out);
readBlockHeader.write(out);
out.flush();
//
// Get bytes in block, set streams
//
DataInputStream in = new DataInputStream(
new BufferedInputStream(NetUtils.getInputStream(sock),
bufferSize));
if ( in.readShort() != DataTransferProtocol.OP_STATUS_SUCCESS ) {
throw new IOException("Got error in response to OP_READ_BLOCK " +
"self=" + sock.getLocalSocketAddress() +
", remote=" + sock.getRemoteSocketAddress() +
" for file " + file +
" for block " + blockId);
}
DataChecksum checksum = DataChecksum.newDataChecksum( in , new PureJavaCrc32());
//Warning when we get CHECKSUM_NULL?
// Read the first chunk offset.
long firstChunkOffset = in.readLong();
if ( firstChunkOffset < 0 || firstChunkOffset > startOffset ||
firstChunkOffset >= (startOffset + checksum.getBytesPerChecksum())) {
throw new IOException("BlockReader: error in first chunk offset (" +
firstChunkOffset + ") startOffset is " +
startOffset + " for file " + file);
}
return new BlockReader(file, blockId, in, checksum, verifyChecksum,
startOffset, firstChunkOffset, sock, minSpeedBps, dataTransferVersion);
}
@Override
public synchronized void close() throws IOException {
startOffset = -1;
checksum = null;
// in will be closed when its Socket is closed.
}
/** kind of like readFully(). Only reads as much as possible.
* And allows use of protected readFully().
*/
public int readAll(byte[] buf, int offset, int len) throws IOException {
return readFully(this, buf, offset, len);
}
/* When the reader reaches end of a block and there are no checksum
* errors, we send OP_STATUS_CHECKSUM_OK to datanode to inform that
* checksum was verified and there was no error.
*/
private void checksumOk(Socket sock) {
try {
OutputStream out = NetUtils.getOutputStream(sock, HdfsConstants.WRITE_TIMEOUT);
byte buf[] = { (DataTransferProtocol.OP_STATUS_CHECKSUM_OK >>> 8) & 0xff,
(DataTransferProtocol.OP_STATUS_CHECKSUM_OK) & 0xff };
out.write(buf);
out.flush();
} catch (IOException e) {
// its ok not to be able to send this.
LOG.debug("Could not write to datanode " + sock.getInetAddress() +
": " + e.getMessage());
}
}
}
/**
* Checks that the given block range covers the given file segment and
* consists of contiguous blocks. This function assumes that the length
* of the queried segment is non-zero, and a non-empty block list is
* expected.
* @param blockRange the set of blocks obtained for the given file segment
* @param offset the start offset of the file segment
* @param length the length of the file segment. Assumed to be positive.
*/
static void checkBlockRange(List blockRange,
long offset, long length) throws IOException {
boolean isValid = false;
if (!blockRange.isEmpty()) {
int numBlocks = blockRange.size();
LocatedBlock firstBlock = blockRange.get(0);
LocatedBlock lastBlock = blockRange.get(numBlocks - 1);
long segmentEnd = offset + length;
// Check that the queried segment is between the beginning of the first
// block and the end of the last block in the block range.
if (firstBlock.getStartOffset() <= offset &&
(segmentEnd <=
lastBlock.getStartOffset() + lastBlock.getBlockSize())) {
isValid = true; // There is a chance the block list is valid
LocatedBlock prevBlock = firstBlock;
for (int i = 1; i < numBlocks; ++i) {
// In this loop, prevBlock is always the block #(i - 1) and curBlock
// is the block #i.
long prevBlkEnd = prevBlock.getStartOffset() +
prevBlock.getBlockSize();
LocatedBlock curBlock = blockRange.get(i);
long curBlkOffset = curBlock.getStartOffset();
if (prevBlkEnd != curBlkOffset || // Blocks are not contiguous
prevBlkEnd <= offset || // Previous block is redundant
segmentEnd <= curBlkOffset) { // Current block is redundant
isValid = false;
break;
}
prevBlock = curBlock;
}
}
}
if (!isValid) {
throw new IOException("Got incorrect block range for " +
"offset=" + offset + ", length=" + length + ": " +
blockRange);
}
}
/****************************************************************
* DFSInputStream provides bytes from a named file. It handles
* negotiation of the namenode and various datanodes as necessary.
****************************************************************/
public class DFSInputStream extends FSInputStream {
private Socket s = null;
private boolean closed = false;
private String src = null;
private long prefetchSize = 10 * defaultBlockSize;
private BlockReader blockReader = null;
private boolean verifyChecksum;
private boolean clearOsBuffer;
private DFSLocatedBlocks locatedBlocks = null;
private DatanodeInfo currentNode = null;
private Block currentBlock = null;
private boolean isCurrentBlockUnderConstruction;
private long pos = 0;
private long blockEnd = -1;
private LocatedBlocks blocks = null;
private int namespaceId; // the namespace that this file belongs to
private int timeWindow = 3000; // wait time window (in msec) if BlockMissingException is caught
/* XXX Use of CocurrentHashMap is temp fix. Need to fix
* parallel accesses to DFSInputStream (through ptreads) properly */
private ConcurrentHashMap deadNodes =
new ConcurrentHashMap();
private int buffersize = 1;
private byte[] oneByteBuf = new byte[1]; // used for 'int read()'
void addToDeadNodes(DatanodeInfo dnInfo) {
deadNodes.put(dnInfo, dnInfo);
}
DFSInputStream(String src, int buffersize, boolean verifyChecksum,
boolean clearOsBuffer) throws IOException {
this.src = src;
init(buffersize, verifyChecksum, clearOsBuffer);
}
/**
* Used for snapshot
*/
DFSInputStream(LocatedBlocksWithMetaInfo blocks, int buffersize,
boolean verifyChecksum) throws IOException {
this.blocks = blocks;
this.namespaceId = blocks.getNamespaceID();
updateDataTransferProtocolVersionIfNeeded(blocks.getDataProtocolVersion());
getNewNameNodeIfNeeded(blocks.getMethodFingerPrint());
init(buffersize, verifyChecksum, false);
}
private void init(int buffersize, boolean verifyChecksum,
boolean clearOsBuffer) throws IOException {
this.verifyChecksum = verifyChecksum;
this.buffersize = buffersize;
this.clearOsBuffer = clearOsBuffer;
prefetchSize = conf.getLong("dfs.read.prefetch.size", prefetchSize);
timeWindow = conf.getInt("dfs.client.baseTimeWindow.waitOn.BlockMissingException", timeWindow);
try {
openInfo();
} catch (IOException e) {
incReadExpCntToStats();
throw e;
}
}
/**
* Grab the open-file info from namenode
*/
synchronized void openInfo() throws IOException {
if (src == null && blocks == null) {
throw new IOException("No fine provided to open");
}
LocatedBlocks newInfo = src != null ?
getLocatedBlocks(src, 0, prefetchSize) : blocks;
if (newInfo == null) {
throw new IOException("Cannot open filename " + src);
}
// I think this check is not correct. A file could have been appended to
// between two calls to openInfo().
if (locatedBlocks != null && !locatedBlocks.isUnderConstruction() &&
!newInfo.isUnderConstruction()) {
Iterator oldIter = locatedBlocks.getLocatedBlocks().iterator();
Iterator newIter = newInfo.getLocatedBlocks().iterator();
while (oldIter.hasNext() && newIter.hasNext()) {
if (! oldIter.next().getBlock().equals(newIter.next().getBlock())) {
throw new IOException("Blocklist for " + src + " has changed!");
}
}
}
// if the file is under construction, then fetch size of last block
// from datanode.
if (newInfo.isUnderConstruction() && newInfo.locatedBlockCount() > 0) {
LocatedBlock last = newInfo.get(newInfo.locatedBlockCount()-1);
if (last.getLocations().length > 0) {
try {
Block newBlock = getBlockInfo(last);
// only if the block has data (not null)
if (newBlock != null) {
long newBlockSize = newBlock.getNumBytes();
newInfo.setLastBlockSize(newBlock.getBlockId(), newBlockSize);
}
} catch (IOException e) {
LOG.debug("DFSClient file " + src +
" is being concurrently append to" +
" but datanodes probably does not have block " +
last.getBlock(), e);
}
}
}
this.locatedBlocks = new DFSLocatedBlocks(newInfo);
this.currentNode = null;
}
private void checkLocatedBlocks(LocatedBlocks locatedBlocks)
throws IOException {
if (null == locatedBlocks) {
return;
}
if(!locatedBlocks.isUnderConstruction()) {
return;
}
List lbs = locatedBlocks.getLocatedBlocks();
if (lbs == null) {
return;
}
for (int i = 0; i < lbs.size() - 1; i++) {
if (lbs.get(i).getBlockSize() <= 1) {
throw new IOException(
"File is under construction and namenode hasn't received the second last block yet.");
}
}
}
private LocatedBlocks getLocatedBlocks(String src, long start, long length)
throws IOException {
try {
if (namenodeProtocolProxy != null &&
namenodeProtocolProxy.isMethodSupported("openAndFetchMetaInfo",
String.class, long.class, long.class)) {
LocatedBlocksWithMetaInfo locs =
namenode.openAndFetchMetaInfo(src, start, length);
if (locs != null) {
this.namespaceId = locs.getNamespaceID();
updateDataTransferProtocolVersionIfNeeded(locs.getDataProtocolVersion());
getNewNameNodeIfNeeded(locs.getMethodFingerPrint());
}
checkLocatedBlocks(locs);
return locs;
} else if (namenodeProtocolProxy != null &&
namenodeProtocolProxy.isMethodSupported("open", String.class,
long.class, long.class)) {
VersionedLocatedBlocks locs = namenode.open(src, start, length);
if (locs != null) {
updateDataTransferProtocolVersionIfNeeded(locs.getDataProtocolVersion());
}
checkLocatedBlocks(locs);
return locs;
} else {
LocatedBlocks locs = namenode.getBlockLocations(src, start, length);
checkLocatedBlocks(locs);
return locs;
}
} catch(RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class,
FileNotFoundException.class);
}
}
/** Get block info from a datanode */
private Block getBlockInfo(LocatedBlock locatedblock) throws IOException {
if (locatedblock == null || locatedblock.getLocations().length == 0) {
return null;
}
int replicaNotFoundCount = locatedblock.getLocations().length;
for(DatanodeInfo datanode : locatedblock.getLocations()) {
ProtocolProxy cdp = null;
try {
cdp = createClientDNProtocolProxy(datanode, conf, socketTimeout);
final Block newBlock;
if (cdp.isMethodSupported("getBlockInfo", int.class, Block.class)) {
newBlock = cdp.getProxy().getBlockInfo(
namespaceId, locatedblock.getBlock());
} else {
newBlock = cdp.getProxy().getBlockInfo(locatedblock.getBlock());
}
if (newBlock == null) {
// special case : replica might not be on the DN, treat as 0 length
replicaNotFoundCount--;
} else {
return newBlock;
}
}
catch(IOException ioe) {
if (DFSClient.LOG.isDebugEnabled()) {
DFSClient.LOG.debug("Failed to getBlockInfo from datanode "
+ datanode + " for block " + locatedblock.getBlock(), ioe);
}
} finally {
if (cdp != null) {
RPC.stopProxy(cdp.getProxy());
}
}
}
// Namenode told us about these locations, but none know about the replica
// means that we hit the race between pipeline creation start and end.
// we require all because some other exception could have happened
// on a DN that has it. we want to report that error
if (replicaNotFoundCount == 0) {
return null;
}
throw new IOException("Cannot obtain block info for " + locatedblock);
}
/**
* Returns whether the file opened is under construction.
*/
public synchronized boolean isUnderConstruction() {
return locatedBlocks.isUnderConstruction();
}
public long getFileLength() {
return locatedBlocks.getFileLength();
}
public DFSLocatedBlocks fetchLocatedBlocks() {
return locatedBlocks;
}
/**
* Returns the datanode from which the stream is currently reading.
*/
public DatanodeInfo getCurrentDatanode() {
return currentNode;
}
/**
* Returns the block containing the target position.
*/
public Block getCurrentBlock() {
return currentBlock;
}
/**
* Return collection of blocks that has already been located.
*/
synchronized List getAllBlocks() throws IOException {
return getBlockRange(0, this.getFileLength());
}
/**
* Get block at the specified position. Fetch it from the namenode if not
* cached.
*
* @param offset
* @param updatePosition
* @param throwWhenNoFound
* when no block found for the offset return null instead of
* throwing an exception
* @return located block
* @throws IOException
*/
private LocatedBlock getBlockAt(long offset, boolean updatePosition,
boolean throwWhenNotFound) throws IOException {
assert (locatedBlocks != null) : "locatedBlocks is null";
// search cached blocks first
LocatedBlock blk = locatedBlocks.getBlockContainingOffset(offset);
if (blk == null) { // block is not cached
// fetch more blocks
LocatedBlocks newBlocks;
newBlocks = getLocatedBlocks(src, offset, prefetchSize);
if (newBlocks == null) {
if (!throwWhenNotFound) {
return null;
}
throw new IOException("Could not find target position " + offset);
}
locatedBlocks.insertRange(newBlocks.getLocatedBlocks());
locatedBlocks.setFileLength(newBlocks.getFileLength());
}
blk = locatedBlocks.getBlockContainingOffset(offset);
if (blk == null) {
if (!throwWhenNotFound) {
return null;
}
throw new IOException("Failed to determine location for block at "
+ "offset=" + offset);
}
if (updatePosition) {
// update current position
this.pos = offset;
this.blockEnd = blk.getStartOffset() + blk.getBlockSize() - 1;
this.currentBlock = blk.getBlock();
isCurrentBlockUnderConstruction = locatedBlocks
.isUnderConstructionBlock(this.currentBlock);
}
return blk;
}
/**
* Get blocks in the specified range. The locations of all blocks
* overlapping with the given segment of the file are retrieved. Fetch them
* from the namenode if not cached.
*
* @param offset the offset of the segment to read
* @param length the length of the segment to read
* @return consequent segment of located blocks
* @throws IOException
*/
private List getBlockRange(final long offset,
final long length) throws IOException {
List blockRange = new ArrayList();
// Zero length. Not sure this ever happens in practice.
if (length == 0)
return blockRange;
// A defensive measure to ensure that we never loop here eternally.
// With a 256 M block size, 10000 blocks will correspond to 2.5 TB.
// No one should read this much data at once in practice.
int maxLoops = 10000;
// Copy locatedBlocks to a local data structure. This ensures that
// a concurrent invocation of openInfo() works OK, the reason being
// that openInfo may completely replace locatedBlocks.
DFSLocatedBlocks locatedBlocks = this.locatedBlocks;
if (locatedBlocks == null) {
// Make this an IO exception because this is input/output code error.
throw new IOException("locatedBlocks is null");
}
long remaining = length;
long curOff = offset;
while (remaining > 0) {
// a defensive check to bail out of this loop at all costs
if (--maxLoops < 0) {
String msg = "Failed to getBlockRange at offset " + offset +
", length=" + length +
", curOff=" + curOff +
", remaining=" + remaining +
". Aborting...";
LOG.warn(msg);
throw new IOException(msg);
}
LocatedBlock blk = locatedBlocks.getBlockContainingOffset(curOff);
if (blk == null) {
LocatedBlocks newBlocks;
newBlocks = getLocatedBlocks(src, curOff, remaining);
if (newBlocks == null) {
throw new IOException("Could not get block locations for curOff=" +
curOff + ", remaining=" + remaining + " (offset=" + offset +
")");
}
locatedBlocks.insertRange(newBlocks.getLocatedBlocks());
continue;
}
blockRange.add(blk);
long bytesRead = blk.getStartOffset() + blk.getBlockSize() - curOff;
remaining -= bytesRead;
curOff += bytesRead;
}
checkBlockRange(blockRange, offset, length);
return blockRange;
}
private synchronized DatanodeInfo blockSeekTo(long target) throws IOException {
return blockSeekTo(target, true);
}
/**
* Open a DataInputStream to a DataNode so that it can be read from.
* We get block ID and the IDs of the destinations at startup, from the namenode.
*/
private synchronized DatanodeInfo blockSeekTo(long target,
boolean throwWhenNotFound) throws IOException {
// We only allow to seek before the end of the file, or the end of the file
// and allowSeedtoEnd, which is the case called by available().
//
if (target > getFileLength() || (target == getFileLength() && throwWhenNotFound)) {
throw new IOException("Attempted to read past end of file");
}
if ( blockReader != null ) {
blockReader.close();
blockReader = null;
}
if (s != null) {
s.close();
s = null;
}
//
// Compute desired block.
//
LocatedBlock targetBlock = getBlockAt(target, true, throwWhenNotFound);
// Given target<= fileLength, when and only whenallowSeektoEnd is true and
// there is no block for the file yet, getBlockAt() returns null, in this
// case we should simply return null.
//
if (targetBlock == null) {
assert target == 0;
return null;
}
assert (target==this.pos) : "Wrong postion " + pos + " expect " + target;
long offsetIntoBlock = target - targetBlock.getStartOffset();
//
// Connect to best DataNode for desired Block, with potential offset
//
DatanodeInfo chosenNode = null;
while (s == null) {
DNAddrPair retval = chooseDataNode(targetBlock);
chosenNode = retval.info;
InetSocketAddress targetAddr = retval.addr;
// try reading the block locally. if this fails, then go via
// the datanode
Block blk = targetBlock.getBlock();
try {
if (LOG.isDebugEnabled()) {
LOG.warn("blockSeekTo shortCircuitLocalReads " + shortCircuitLocalReads +
" localhost " + localHost +
" targetAddr " + targetAddr);
}
if (shortCircuitLocalReads && localHost != null &&
(targetAddr.equals(localHost) ||
targetAddr.getHostName().startsWith("localhost"))) {
blockReader = BlockReaderLocal.newBlockReader(conf, src, namespaceId, blk,
chosenNode,
offsetIntoBlock,
blk.getNumBytes() - offsetIntoBlock,
metrics,
this.verifyChecksum,
this.clearOsBuffer);
blockReader.setReadLocal(true);
blockReader.setFsStats(stats);
return chosenNode;
}
} catch (IOException ex) {
LOG.info("Failed to read block " + targetBlock.getBlock() +
" on local machine " + localHost +
". Try via the datanode on " + targetAddr + ":"
+ StringUtils.stringifyException(ex));
}
try {
s = socketFactory.createSocket();
NetUtils.connect(s, targetAddr, socketTimeout, ipTosValue);
s.setSoTimeout(socketTimeout);
long minReadSpeedBps = (numNodeLeft(targetBlock.getLocations(),
deadNodes) > 1) ? DFSClient.this.minReadSpeedBps : -1;
blockReader = BlockReader.newBlockReader(
getDataTransferProtocolVersion(), namespaceId,
s, src, blk.getBlockId(),
blk.getGenerationStamp(),
offsetIntoBlock, blk.getNumBytes() - offsetIntoBlock,
buffersize, verifyChecksum,
clientName, minReadSpeedBps);
boolean isLocalHost = NetUtils.isLocalAddress(targetAddr.getAddress());
blockReader.setReadLocal(isLocalHost);
if (!isLocalHost) {
blockReader
.setReadRackLocal(isInLocalRack(targetAddr.getAddress()));
}
blockReader.setFsStats(stats);
return chosenNode;
} catch (IOException ex) {
// Put chosen node into dead list, continue
LOG.warn("Failed to connect to " + targetAddr, ex);
addToDeadNodes(chosenNode);
if (s != null) {
try {
s.close();
} catch (IOException iex) {
}
}
s = null;
}
}
return chosenNode;
}
/**
* Close it down!
*/
@Override
public synchronized void close() throws IOException {
if (closed) {
return;
}
checkOpen();
if ( blockReader != null ) {
blockReader.close();
blockReader = null;
}
if (s != null) {
s.close();
s = null;
}
super.close();
closed = true;
}
@Override
public synchronized int read() throws IOException {
int ret = read( oneByteBuf, 0, 1 );
return ( ret <= 0 ) ? -1 : (oneByteBuf[0] & 0xff);
}
/* This is a used by regular read() and handles ChecksumExceptions.
* name readBuffer() is chosen to imply similarity to readBuffer() in
* ChecksuFileSystem
*/
private synchronized int readBuffer(byte buf[], int off, int len)
throws IOException {
IOException ioe;
/* we retry current node only once. So this is set to true only here.
* Intention is to handle one common case of an error that is not a
* failure on datanode or client : when DataNode closes the connection
* since client is idle. If there are other cases of "non-errors" then
* then a datanode might be retried by setting this to true again.
*/
boolean retryCurrentNode = true;
while (true) {
// retry as many times as seekToNewSource allows.
try {
int bytesRead = blockReader.read(buf, off, len);
// update length of file under construction if needed
if (isCurrentBlockUnderConstruction
&& blockReader.isBlkLenInfoUpdated()) {
locatedBlocks.setLastBlockSize(currentBlock.getBlockId(),
blockReader.getUpdatedBlockLength());
this.blockEnd = locatedBlocks.getFileLength() - 1;
blockReader.resetBlockLenInfo();
// if the last block is finalized, get file info from name-node.
// It is necessary because there might be new blocks added to
// the file. The client needs to check with the name-node whether
// it is the case, or the file has been finalized.
if (blockReader.isBlockFinalized() && src != null) {
openInfo();
}
}
return bytesRead;
} catch (DataNodeSlowException dnse) {
LOG.warn("Node " + currentNode + " is too slow when reading blk "
+ this.currentBlock + ". Try another datanode.");
ioe = dnse;
retryCurrentNode = false;
} catch ( ChecksumException ce ) {
LOG.warn("Found Checksum error for " + currentBlock + " from " +
currentNode.getName() + " at " + ce.getPos());
reportChecksumFailure(src, currentBlock, currentNode);
ioe = ce;
retryCurrentNode = false;
} catch ( IOException e ) {
if (!retryCurrentNode) {
LOG.warn("Exception while reading from " + currentBlock +
" of " + src + " from " + currentNode + ": " +
StringUtils.stringifyException(e));
}
ioe = e;
}
boolean sourceFound = false;
if (retryCurrentNode) {
/* possibly retry the same node so that transient errors don't
* result in application level failures (e.g. Datanode could have
* closed the connection because the client is idle for too long).
*/
sourceFound = seekToBlockSource(pos, len != 0);
} else {
addToDeadNodes(currentNode);
sourceFound = seekToNewSource(pos, len != 0);
}
if (!sourceFound) {
throw ioe;
} else {
incReadExpCntToStats();
}
retryCurrentNode = false;
}
}
/**
* Read the entire buffer.
*/
@Override
public synchronized int read(byte buf[], int off, int len) throws IOException {
checkOpen();
if (closed) {
incReadExpCntToStats();
throw new IOException("Stream closed");
}
dfsInputStreamfailures.set(0);
long start = System.currentTimeMillis();
if (pos < getFileLength() || (pos == getFileLength() && len == 0)) {
int retries = 2;
while (retries > 0) {
try {
// If position equals or is larger than the end position of the
// block, we try to seek to the next block, unless:
// 1. user tries to read 0 bytes (usually by available() call), AND
// 2. there is at least a known block for the file (blockEnd != -1), AND
// 3. pos is the end of the file, AND
// 4. the end of the block is the end of the file
// (the current block is the known last block of the file)
// For this case, we want to stay in the current block, as in the case
// that it is the last block (which is almost always true given
// len == 0), the current block is the under-construction block whose size
// you want to update.
//
if (len == 0) { // called by available()
if (blockEnd == -1 // No current block selected
|| pos == getFileLength()) { // at the end of the file
currentNode = blockSeekTo(pos, false);
if (currentNode == null) {
// In this case, user wants to know available information of
// the file, but the file doesn't have any block created yet (it
// is a 0 size file). Simply 0 should be returned.
return 0;
}
} else {
throw new IOException(
"Try to read 0 bytes while current position is not the end of the file");
}
} else if (pos > blockEnd) {
currentNode = blockSeekTo(pos, true);
}
int realLen = (int) Math.min((long) len, (blockEnd - pos + 1L));
int result = readBuffer(buf, off, realLen);
if (result >= 0) {
pos += result;
} else if (len != 0){
// got a EOS from reader though we expect more data on it.
throw new IOException("Unexpected EOS from the reader");
}
if (stats != null && result != -1) {
stats.incrementBytesRead(result);
}
long timeval = System.currentTimeMillis() - start;
metrics.incReadTime(timeval);
metrics.incReadSize(result);
metrics.incReadOps();
return (result >= 0) ? result : 0;
} catch (ChecksumException ce) {
incReadExpCntToStats();
throw ce;
} catch (IOException e) {
incReadExpCntToStats();
if (retries == 1) {
LOG.warn("DFS Read: " + StringUtils.stringifyException(e));
}
blockEnd = -1;
if (currentNode != null) { addToDeadNodes(currentNode); }
if (--retries == 0) {
throw e;
}
}
}
}
return -1;
}
private DNAddrPair chooseDataNode(LocatedBlock block)
throws IOException {
while (true) {
DatanodeInfo[] nodes = block.getLocations();
DatanodeInfo chosenNode = null;
try {
chosenNode = bestNode(nodes, deadNodes);
InetSocketAddress targetAddr =
NetUtils.createSocketAddr(chosenNode.getName());
return new DNAddrPair(chosenNode, targetAddr);
} catch (IOException ie) {
int failureTimes = dfsInputStreamfailures.get();
String blockInfo = block.getBlock() + " file=" + src;
if (failureTimes >= maxBlockAcquireFailures) {
throw new BlockMissingException(src, "Could not obtain block: " +
blockInfo, block.getStartOffset());
}
if (nodes == null || nodes.length == 0) {
LOG.info("No node available for block: " + blockInfo);
}
LOG.info("Could not obtain block " + block.getBlock() +
" from node: " +
(chosenNode == null ? "" : chosenNode.getHostName()) + ie +
". Will get new block locations from namenode and retry...");
try {
// Introducing a random factor to the wait time before another retry.
// The wait time is dependent on # of failures and a random factor.
// At the first time of getting a BlockMissingException, the wait time
// is a random number between 0..3000 ms. If the first retry
// still fails, we will wait 3000 ms grace period before the 2nd retry.
// Also at the second retry, the waiting window is expanded to 6000 ms
// alleviating the request rate from the server. Similarly the 3rd retry
// will wait 6000ms grace period before retry and the waiting window is
// expanded to 9000ms.
// waitTime = grace period for the last round of attempt +
// expanding time window for each failure
double waitTime = timeWindow * failureTimes +
timeWindow * (failureTimes + 1) * r.nextDouble();
LOG.warn("DFS chooseDataNode: got # " + (failureTimes + 1) +
" IOException, will wait for " + waitTime + " msec.", ie);
Thread.sleep((long)waitTime);
} catch (InterruptedException iex) {
}
deadNodes.clear(); //2nd option is to remove only nodes[blockId]
openInfo();
block = getBlockAt(block.getStartOffset(), false, true);
dfsInputStreamfailures.set(failureTimes+1);
continue;
}
}
}
private void fetchBlockByteRange(LocatedBlock block, long start,
long end, byte[] buf, int offset) throws IOException {
//
// Connect to best DataNode for desired Block, with potential offset
//
Socket dn = null;
while (true) {
// cached block locations may have been updated by chooseDatNode()
// or fetchBlockAt(). Always get the latest list of locations at the
// start of the loop.
block = getBlockAt(block.getStartOffset(), false, true);
DNAddrPair retval = chooseDataNode(block);
DatanodeInfo chosenNode = retval.info;
InetSocketAddress targetAddr = retval.addr;
BlockReader reader = null;
int len = (int) (end - start + 1);
try {
if (LOG.isDebugEnabled()) {
LOG.debug("fetchBlockByteRange shortCircuitLocalReads " +
shortCircuitLocalReads +
" localhst " + localHost +
" targetAddr " + targetAddr);
}
// first try reading the block locally.
if (shortCircuitLocalReads && NetUtils.isLocalAddress(targetAddr.getAddress())) {
reader = BlockReaderLocal.newBlockReader(conf, src,
namespaceId, block.getBlock(),
chosenNode,
start,
len,
metrics,
verifyChecksum,
this.clearOsBuffer);
reader.setReadLocal(true);
reader.setFsStats(stats);
} else {
// go to the datanode
dn = socketFactory.createSocket();
NetUtils.connect(dn, targetAddr, socketTimeout, ipTosValue);
dn.setSoTimeout(socketTimeout);
reader = BlockReader.newBlockReader(getDataTransferProtocolVersion(),
namespaceId,
dn, src,
block.getBlock().getBlockId(),
block.getBlock().getGenerationStamp(),
start, len, buffersize,
verifyChecksum, clientName,
DFSClient.this.minReadSpeedBps);
boolean isLocalHost = NetUtils.isLocalAddress(targetAddr.getAddress());
reader.setReadLocal(isLocalHost);
if (!isLocalHost) {
reader
.setReadRackLocal(isInLocalRack(targetAddr.getAddress()));
}
reader.setFsStats(stats);
}
int nread = reader.readAll(buf, offset, len);
if (nread != len) {
throw new IOException("truncated return from reader.read(): " +
"excpected " + len + ", got " + nread);
}
return;
} catch (ChecksumException e) {
LOG.warn("fetchBlockByteRange(). Got a checksum exception for " +
src + " at " + block.getBlock() + ":" +
e.getPos() + " from " + chosenNode.getName());
reportChecksumFailure(src, block.getBlock(), chosenNode);
} catch (IOException e) {
LOG.warn("Failed to connect to " + targetAddr +
" for file " + src +
" for block " + block.getBlock().getBlockId() + ":" +
StringUtils.stringifyException(e));
} finally {
IOUtils.closeStream(reader);
IOUtils.closeSocket(dn);
}
// Put chosen node into dead list, continue
addToDeadNodes(chosenNode);
}
}
/**
* This is highly optimized for preads. Reduce number of buffercopies.
* Its is similar to doing a scatter/gather kind of io, all data to be
* returned in a ByteBuffer.
*/
private ByteBuffer fetchBlockByteRangeScatterGather(LocatedBlock block,
long start, long len) throws IOException {
//
// Connect to best DataNode for desired Block, with potential offset
//
Socket dn = null;
while (true) {
// cached block locations may have been updated by chooseDatNode()
// or fetchBlockAt(). Always get the latest list of locations at the
// start of the loop.
block = getBlockAt(block.getStartOffset(), false, true);
DNAddrPair retval = chooseDataNode(block);
DatanodeInfo chosenNode = retval.info;
InetSocketAddress targetAddr = retval.addr;
ByteBuffer result = null;
BlockReaderLocal localReader = null;
BlockReaderAccelerator remoteReader = null;
try {
if (LOG.isDebugEnabled()) {
LOG.debug("fetchBlockByteRangeScatterGather " +
" localhst " + localHost +
" targetAddr " + targetAddr);
}
// first try reading the block locally.
if (shortCircuitLocalReads &&
NetUtils.isLocalAddress(targetAddr.getAddress())) {
localReader = BlockReaderLocal.newBlockReader(conf, src,
namespaceId, block.getBlock(),
chosenNode,
start,
len,
metrics,
verifyChecksum,
this.clearOsBuffer);
localReader.setReadLocal(true);
localReader.setFsStats(stats);
result = localReader.readAll();
} else {
// go to the datanode
dn = socketFactory.createSocket();
NetUtils.connect(dn, targetAddr, socketTimeout,ipTosValue);
dn.setSoTimeout(socketTimeout);
remoteReader = new BlockReaderAccelerator(conf,
targetAddr,
chosenNode,
getDataTransferProtocolVersion(),
namespaceId, clientName,
dn, src,
block,
start, len,
verifyChecksum, metrics);
result = remoteReader.readAll();
}
if (result.remaining() != len) {
throw new IOException("truncated return from reader.read(): " +
"expected " + len + ", got " +
result.remaining());
}
if (NetUtils.isLocalAddress(targetAddr.getAddress())) {
stats.incrementLocalBytesRead(len);
stats.incrementRackLocalBytesRead(len);
} else if (isInLocalRack(targetAddr.getAddress())) {
stats.incrementRackLocalBytesRead(len);
}
return result;
} catch (ChecksumException e) {
LOG.warn("fetchBlockByteRangeScatterGather(). Got a checksum exception for " +
src + " at " + block.getBlock() + ":" +
e.getPos() + " from " + chosenNode.getName());
reportChecksumFailure(src, block.getBlock(), chosenNode);
} catch (IOException e) {
LOG.warn("Failed to connect to " + targetAddr +
" for file " + src +
" for block " + block.getBlock().getBlockId() + ":" +
StringUtils.stringifyException(e));
} finally {
IOUtils.closeStream(localReader);
IOUtils.closeStream(remoteReader);
IOUtils.closeSocket(dn);
}
incReadExpCntToStats();
// Put chosen node into dead list, continue
addToDeadNodes(chosenNode);
}
}
/**
* Read bytes starting from the specified position.
*
* @param position start read from this position
* @param buffer read buffer
* @param offset offset into buffer
* @param length number of bytes to read
*
* @return actual number of bytes read
*/
@Override
public int read(long position, byte[] buffer, int offset, int length)
throws IOException {
// sanity checks
checkOpen();
if (closed) {
throw new IOException("Stream closed");
}
dfsInputStreamfailures.set(0);
long start = System.currentTimeMillis();
long filelen = getFileLength();
if ((position < 0) || (position >= filelen)) {
return -1;
}
int realLen = length;
if ((position + length) > filelen) {
realLen = (int)(filelen - position);
}
// determine the block and byte range within the block
// corresponding to position and realLen
List blockRange = getBlockRange(position, realLen);
int remaining = realLen;
for (LocatedBlock blk : blockRange) {
long targetStart = position - blk.getStartOffset();
long bytesToRead = Math.min(remaining, blk.getBlockSize() - targetStart);
fetchBlockByteRange(blk, targetStart,
targetStart + bytesToRead - 1, buffer, offset);
remaining -= bytesToRead;
position += bytesToRead;
offset += bytesToRead;
}
assert remaining == 0 : "Wrong number of bytes read.";
if (stats != null) {
stats.incrementBytesRead(realLen);
}
long timeval = System.currentTimeMillis() - start;
metrics.incPreadTime(timeval);
metrics.incPreadSize(realLen);
metrics.incPreadOps();
return realLen;
}
/**
* Read bytes starting from the specified position. This is optimized
* for fast preads from an application with minimum of buffer copies.
*
* @param position start read from this position
* @param length number of bytes to read
*
* @return A list of Byte Buffers that represent all the data that was
* read from the underlying system.
*/
@Override
public List readFullyScatterGather(long position, int length)
throws IOException {
// if the server does not support scatter-gather,
// then use default implementation from FSDataInputStream.
if (dataTransferVersion < DataTransferProtocol.SCATTERGATHER_VERSION) {
return super.readFullyScatterGather(position, length);
}
// sanity checks
checkOpen();
if (closed) {
throw new IOException("Stream closed");
}
dfsInputStreamfailures.set(0);
long start = System.currentTimeMillis();
long filelen = getFileLength();
if ((position < 0) || (position > filelen)) {
String msg = " Invalid position " + position +
". File " + src + " is of size " + filelen;
LOG.warn(msg);
throw new IOException(msg);
}
List results = new LinkedList();
int realLen = length;
if ((position + length) > filelen) {
realLen = (int)(filelen - position);
}
// determine the block and byte range within the block
// corresponding to position and realLen
List blockRange = getBlockRange(position, realLen);
int remaining = realLen;
for (LocatedBlock blk : blockRange) {
long targetStart = position - blk.getStartOffset();
long bytesToRead = Math.min(remaining, blk.getBlockSize() - targetStart);
ByteBuffer bb = fetchBlockByteRangeScatterGather(blk, targetStart,
bytesToRead);
results.add(bb);
remaining -= bytesToRead;
position += bytesToRead;
}
assert remaining == 0 : "Wrong number of bytes read.";
if (stats != null) {
stats.incrementBytesRead(realLen);
}
long timeval = System.currentTimeMillis() - start;
metrics.incPreadTime(timeval);
metrics.incPreadSize(realLen);
metrics.incPreadOps();
return results;
}
@Override
public long skip(long n) throws IOException {
if ( n > 0 ) {
long curPos = getPos();
long fileLen = getFileLength();
if( n+curPos > fileLen ) {
n = fileLen - curPos;
}
seek(curPos+n);
return n;
}
return n < 0 ? -1 : 0;
}
/**
* Seek to a new arbitrary location
*/
@Override
public synchronized void seek(long targetPos) throws IOException {
if (targetPos > getFileLength()) {
throw new IOException("Cannot seek after EOF");
}
boolean done = false;
if (pos <= targetPos && targetPos <= blockEnd) {
//
// If this seek is to a positive position in the current
// block, and this piece of data might already be lying in
// the TCP buffer, then just eat up the intervening data.
//
int diff = (int)(targetPos - pos);
if (diff <= TCP_WINDOW_SIZE) {
try {
pos += blockReader.skip(diff);
if (pos == targetPos) {
done = true;
}
} catch (IOException e) {//make following read to retry
incReadExpCntToStats();
LOG.debug("Exception while seek to " + targetPos + " from "
+ currentBlock +" of " + src + " from " + currentNode +
": " + StringUtils.stringifyException(e));
}
}
}
if (!done) {
pos = targetPos;
blockEnd = -1;
}
}
/**
* Same as {@link #seekToNewSource(long)} except that it does not exclude
* the current datanode and might connect to the same node.
*/
private synchronized boolean seekToBlockSource(long targetPos,
boolean throwWhenNotFound) throws IOException {
currentNode = blockSeekTo(targetPos, throwWhenNotFound);
return true;
}
/**
* Seek to given position on a node other than the current node. If
* a node other than the current node is found, then returns true.
* If another node could not be found, then returns false.
*/
@Override
public synchronized boolean seekToNewSource(long targetPos) throws IOException {
return seekToNewSource(targetPos, true);
}
/**
* Seek to given position on a node other than the current node. If
* a node other than the current node is found, then returns true.
* If another node could not be found, then returns false.
*/
public synchronized boolean seekToNewSource(long targetPos,
boolean throwWhenNotFound) throws IOException {
boolean markedDead = deadNodes.containsKey(currentNode);
addToDeadNodes(currentNode);
DatanodeInfo oldNode = currentNode;
DatanodeInfo newNode = blockSeekTo(targetPos, throwWhenNotFound);
if (!markedDead) {
/* remove it from deadNodes. blockSeekTo could have cleared
* deadNodes and added currentNode again. Thats ok. */
deadNodes.remove(oldNode);
}
if (!oldNode.getStorageID().equals(newNode.getStorageID())) {
currentNode = newNode;
return true;
} else {
return false;
}
}
/**
*/
@Override
public synchronized long getPos() throws IOException {
return pos;
}
/**
* WARNING: This method does not work with files larger than 2GB.
* Use getFileLength() - getPos() instead.
*/
@Override
public synchronized int available() throws IOException {
if (closed) {
throw new IOException("Stream closed");
}
long length = getFileLength() - pos;
if (!isUnderConstruction() || length > 0) {
return (int) length;
}
read(emptyByteArray);
return (int) (getFileLength() - pos);
}
/**
* We definitely don't support marks
*/
@Override
public boolean markSupported() {
return false;
}
@Override
public void mark(int readLimit) {
}
@Override
public void reset() throws IOException {
throw new IOException("Mark/reset not supported");
}
}
public static class DFSDataInputStream extends FSDataInputStream {
DFSDataInputStream(DFSInputStream in)
throws IOException {
super(in);
}
/**
* Returns the datanode from which the stream is currently reading.
*/
public DatanodeInfo getCurrentDatanode() {
return ((DFSInputStream)in).getCurrentDatanode();
}
/**
* Returns the block containing the target position.
*/
public Block getCurrentBlock() {
return ((DFSInputStream)in).getCurrentBlock();
}
/**
* Return collection of blocks that has already been located.
*/
public synchronized List getAllBlocks() throws IOException {
return ((DFSInputStream)in).getAllBlocks();
}
@Override
public boolean isUnderConstruction() throws IOException {
return ((DFSInputStream)in).isUnderConstruction();
}
public long getFileLength() {
return ((DFSInputStream)in).getFileLength();
}
}
/**
* Encapsulate multiple output streams into one object.
*/
class MultiDataOutputStream {
DataOutputStream[] streams;
volatile int errorSlot;
MultiDataOutputStream(DataOutputStream[] outs) {
this.streams = outs;
this.errorSlot = -1; // no errors so far
}
DataOutputStream get(int i) {
return streams[i];
}
void set(int i, DataOutputStream st) {
streams[i] = st;
}
void write(byte[] buf, int off, int len) throws IOException {
for (int i = 0; i < streams.length; i++) {
try {
streams[i].write(buf, off, len);
} catch (IOException e) {
errorSlot = i;
throw e;
}
}
}
void writeInt(int v) throws IOException {
for (int i = 0; i < streams.length; i++) {
try {
streams[i].writeInt(v);
} catch (IOException e) {
errorSlot = i;
throw e;
}
}
}
void flush() throws IOException {
for (int i = 0; i < streams.length; i++) {
try {
streams[i].flush();
} catch (IOException e) {
errorSlot = i;
throw e;
}
}
}
void close() throws IOException {
for (int i = 0; i < streams.length; i++) {
try {
streams[i].close();
} catch (IOException e) {
errorSlot = i;
throw e;
}
}
}
/** Returns the slot number of the file descriptor that encountered
* an error. Returns -1 if there were no error.
*/
int getErrorIndex() {
return errorSlot;
}
}
/**
* Encapsulate multiple input streams into one object.
*/
class MultiDataInputStream {
DataInputStream[] streams;
MultiDataInputStream(DataInputStream[] ins) {
this.streams = ins;
}
DataInputStream get(int i) {
return streams[i];
}
int size() {
return streams.length;
}
void set(int i, DataInputStream st) {
streams[i] = st;
}
void close() throws IOException {
for (int i = 0; i < streams.length; i++) {
streams[i].close();
}
}
}
/****************************************************************
* DFSOutputStream creates files from a stream of bytes.
*
* The client application writes data that is cached internally by
* this stream. Data is broken up into packets, each packet is
* typically 64K in size. A packet comprises of chunks. Each chunk
* is typically 512 bytes and has an associated checksum with it.
*
* When a client application fills up the currentPacket, it is
* enqueued into dataQueue. The DataStreamer thread picks up
* packets from the dataQueue, sends it to the first datanode in
* the pipeline and moves it from the dataQueue to the ackQueue.
* The ResponseProcessor receives acks from the datanodes. When an
* successful ack for a packet is received from all datanodes, the
* ResponseProcessor removes the corresponding packet from the
* ackQueue.
*
* In case of error, all outstanding packets and moved from
* ackQueue. A new pipeline is setup by eliminating the bad
* datanode from the original pipeline. The DataStreamer now
* starts sending packets from the dataQueue.
****************************************************************/
class DFSOutputStream extends FSOutputSummer implements Syncable, Replicable {
private Socket[] s;
boolean closed = false;
private String src;
private MultiDataOutputStream blockStream;
private MultiDataInputStream blockReplyStream;
private Block block;
final private long blockSize;
private DataChecksum checksum;
private LinkedList dataQueue = new LinkedList();
private LinkedList ackQueue = new LinkedList();
private int numPendingHeartbeats = 0;
private long lastPacketSentTime = 0;
private final long packetTimeout
= conf.getLong("dfs.client.packet.timeout", 15000); // 15 seconds
private Packet currentPacket = null;
private int maxPackets = 80; // each packet 64K, total 5MB
// private int maxPackets = 1000; // each packet 64K, total 64MB
private DataStreamer streamer = new DataStreamer();;
private ResponseProcessor response = null;
private long currentSeqno = 0;
private long lastQueuedSeqno = -1;
private long lastAckedSeqno = -1;
private long bytesCurBlock = 0; // bytes writen in current block
private int packetSize = 0; // write packet size, including the header.
private int chunksPerPacket = 0;
private DatanodeInfo[] nodes = null; // list of targets for current block
private DatanodeInfo[] favoredNodes = null; // put replicas here if possible
private volatile boolean hasError = false;
private volatile int errorIndex = 0;
private volatile IOException lastException = null;
private long artificialSlowdown = 0;
private long lastFlushOffset = 0; // offset when flush was invoked
private boolean persistBlocks = false; // persist blocks on namenode
private int recoveryErrorCount = 0; // number of times block recovery failed
private final int maxRecoveryErrorCount
= conf.getInt("dfs.client.block.recovery.retries", 5); // try block recovery 5 times
private volatile boolean appendChunk = false; // appending to existing partial block
private long initialFileSize = 0; // at time of file open
private Progressable progress;
private short blockReplication; // replication factor of file
private long lastBlkOffset = 0; // end pos of last block already sent
private boolean forceSync;
private boolean doParallelWrites = false;
private int namespaceId; // the namespace that the file belongs to
private void setLastException(IOException e) {
if (lastException == null) {
lastException = e;
}
}
public void setOffsets(long offset) {
LOG.info("set last block offsets in file: " + src + " pos: " + offset);
lastBlkOffset = offset;
}
private class Packet {
ByteBuffer buffer; // only one of buf and buffer is non-null
byte[] buf;
long seqno; // sequencenumber of buffer in block
long offsetInBlock; // offset in block
boolean lastPacketInBlock; // is this the last packet in block?
int numChunks; // number of chunks currently in packet
int maxChunks; // max chunks in packet
int dataStart;
int dataPos;
int checksumStart;
int checksumPos;
private static final long HEART_BEAT_SEQNO = -1L;
/**
* create a heartbeat packet
*/
Packet() {
this.lastPacketInBlock = false;
this.numChunks = 0;
this.offsetInBlock = 0;
this.seqno = HEART_BEAT_SEQNO;
buffer = null;
int packetSize = DataNode.PKT_HEADER_LEN + SIZE_OF_INTEGER;
buf = new byte[packetSize];
checksumStart = dataStart = packetSize;
checksumPos = checksumStart;
dataPos = dataStart;
maxChunks = 0;
}
// create a new packet
Packet(int pktSize, int chunksPerPkt, long offsetInBlock)
throws IOException {
this.lastPacketInBlock = false;
this.numChunks = 0;
this.offsetInBlock = offsetInBlock;
this.seqno = currentSeqno;
currentSeqno++;
buffer = null;
buf = new byte[pktSize];
checksumStart = DataNode.PKT_HEADER_LEN + SIZE_OF_INTEGER;
checksumPos = checksumStart;
dataStart = checksumStart + chunksPerPkt * checksum.getChecksumSize();
dataPos = dataStart;
maxChunks = chunksPerPkt;
}
void writeData(byte[] inarray, int off, int len) {
if ( dataPos + len > buf.length) {
throw new BufferOverflowException();
}
System.arraycopy(inarray, off, buf, dataPos, len);
dataPos += len;
}
void writeChecksum(byte[] inarray, int off, int len) {
if (checksumPos + len > dataStart) {
throw new BufferOverflowException();
}
System.arraycopy(inarray, off, buf, checksumPos, len);
checksumPos += len;
}
/**
* Returns ByteBuffer that contains one full packet, including header.
* @throws IOException
*/
ByteBuffer getBuffer() throws IOException {
/* Once this is called, no more data can be added to the packet.
* setting 'buf' to null ensures that.
* This is called only when the packet is ready to be sent.
*/
if (buffer != null) {
return buffer;
}
//prepare the header and close any gap between checksum and data.
int dataLen = dataPos - dataStart;
int checksumLen = checksumPos - checksumStart;
if (checksumPos != dataStart) {
/* move the checksum to cover the gap.
* This can happen for the last packet.
*/
System.arraycopy(buf, checksumStart, buf,
dataStart - checksumLen , checksumLen);
}
int pktLen = SIZE_OF_INTEGER + dataLen + checksumLen;
//normally dataStart == checksumPos, i.e., offset is zero.
buffer = ByteBuffer.wrap(buf, dataStart - checksumPos,
DataNode.PKT_HEADER_LEN + pktLen);
buf = null;
buffer.mark();
/* write the header and data length.
* The format is described in comment before DataNode.BlockSender
*/
buffer.putInt(pktLen); // pktSize
buffer.putLong(offsetInBlock);
buffer.putLong(seqno);
if (dataTransferVersion >= getDataTransferProtocolVersion()) {
byte booleanFieldValue = 0x00;
if (lastPacketInBlock) {
booleanFieldValue |= DataNode.isLastPacketInBlockMask;
}
if (forceSync) {
booleanFieldValue |= DataNode.forceSyncMask;
}
buffer.put(booleanFieldValue);
} else {
buffer.put((byte) (lastPacketInBlock? 1: 0));
}
//end of pkt header
buffer.putInt(dataLen); // actual data length, excluding checksum.
buffer.reset();
return buffer;
}
private long getEndPosInCurrBlk() {
return offsetInBlock + (dataPos - dataStart);
}
/**
* Check if this packet is a heart beat packet
* @return true if the sequence number is HEART_BEAT_SEQNO
*/
private boolean isHeartbeatPacket() {
return seqno == HEART_BEAT_SEQNO;
}
}
/** Decide if the write pipeline supports bidirectional heartbeat or not */
private boolean supportClientHeartbeat() throws IOException {
return getDataTransferProtocolVersion() >=
DataTransferProtocol.CLIENT_HEARTBEAT_VERSION;
}
/**
* Check if the last outstanding packet has not received an ack before
* it is timed out.
* If true, for now just log it.
* We will provide a decent solution to this later on.
*/
private void checkIfLastPacketTimeout() {
synchronized (ackQueue) {
if( !ackQueue.isEmpty() && (
System.currentTimeMillis() - lastPacketSentTime > packetTimeout) ) {
LOG.warn("Packet " + ackQueue.getLast().seqno +
" of " + block + " is timed out");
}
}
}
//
// The DataStreamer class is responsible for sending data packets to the
// datanodes in the pipeline. It retrieves a new blockid and block locations
// from the namenode, and starts streaming packets to the pipeline of
// Datanodes. Every packet has a sequence number associated with
// it. When all the packets for a block are sent out and acks for each
// if them are received, the DataStreamer closes the current block.
//
private class DataStreamer extends Daemon {
private volatile boolean closed = false;
private long lastPacket;
private boolean doSleep;
DataStreamer() throws IOException {
// explicitly invoke RPC so avoiding RPC in waitForWork
// that might cause timeout
getDataTransferProtocolVersion();
}
private void waitForWork() throws IOException {
if ( supportClientHeartbeat() ) { // send heart beat
long now = System.currentTimeMillis();
while ((!closed && !hasError && clientRunning
&& dataQueue.size() == 0 &&
(blockStream == null || (
blockStream != null && now - lastPacket < timeoutValue/2)))
|| doSleep) {
long timeout = timeoutValue/2 - (now-lastPacket);
timeout = timeout <= 0 ? 1000 : timeout;
try {
dataQueue.wait(timeout);
checkIfLastPacketTimeout();
now = System.currentTimeMillis();
} catch (InterruptedException e) {
}
doSleep = false;
}
} else { // no sending heart beat
while ((!closed && !hasError && clientRunning
&& dataQueue.size() == 0) || doSleep) {
try {
dataQueue.wait(1000);
} catch (InterruptedException e) {
}
doSleep = false;
}
}
}
public void run() {
while (!closed && clientRunning) {
// if the Responder encountered an error, shutdown Responder
if (hasError && response != null) {
try {
response.close();
response.join();
response = null;
} catch (InterruptedException e) {
}
}
Packet one = null;
synchronized (dataQueue) {
// process IO errors if any
doSleep = processDatanodeError(hasError, false);
try {
// wait for a packet to be sent.
waitForWork();
if (closed || hasError || !clientRunning) {
continue;
}
// get packet to be sent.
if (dataQueue.isEmpty()) {
one = new Packet(); // heartbeat packet
} else {
one = dataQueue.getFirst(); // regular data packet
}
long offsetInBlock = one.offsetInBlock;
// get new block from namenode.
if (blockStream == null) {
LOG.debug("Allocating new block: " + src + " pos: " + lastBlkOffset);
nodes = nextBlockOutputStream(src);
this.setName("DataStreamer for file " + src +
" block " + block);
response = new ResponseProcessor(nodes);
response.start();
}
if (offsetInBlock >= blockSize) {
throw new IOException("BlockSize " + blockSize +
" is smaller than data size. " +
" Offset of packet in block " +
offsetInBlock +
" Aborting file " + src);
}
ByteBuffer buf = one.getBuffer();
// write out data to remote datanode
blockStream.write(buf.array(), buf.position(), buf.remaining());
if (one.lastPacketInBlock) {
blockStream.writeInt(0); // indicate end-of-block
}
blockStream.flush();
lastPacket = System.currentTimeMillis();
if (LOG.isDebugEnabled()) {
LOG.debug("DataStreamer block " + block +
" wrote packet seqno:" + one.seqno +
" size:" + buf.remaining() +
" offsetInBlock:" + one.offsetInBlock +
" lastPacketInBlock:" + one.lastPacketInBlock);
}
// move packet from dataQueue to ackQueue
if (!one.isHeartbeatPacket()) {
dataQueue.removeFirst();
dataQueue.notifyAll();
synchronized (ackQueue) {
ackQueue.addLast(one);
lastPacketSentTime = System.currentTimeMillis();
ackQueue.notifyAll();
}
} else {
synchronized (ackQueue) {
numPendingHeartbeats++;
ackQueue.notifyAll();
}
LOG.info("Sending a heartbeat packet for block " + block);
}
} catch (Throwable e) {
incWriteExpCntToStats();
LOG.warn("DataStreamer Exception: " +
StringUtils.stringifyException(e));
if (e instanceof IOException) {
setLastException((IOException)e);
}
hasError = true;
if (blockStream != null) {
// find the first datanode to which we could not write data.
int possibleError = blockStream.getErrorIndex();
if (possibleError != -1) {
errorIndex = possibleError;
LOG.warn("DataStreamer bad datanode in pipeline:" +
possibleError);
}
}
}
}
if (closed || hasError || !clientRunning) {
continue;
}
// Is this block full?
if (one.lastPacketInBlock) {
synchronized (ackQueue) {
while (!hasError && ackQueue.size() != 0 && clientRunning) {
try {
ackQueue.wait(); // wait for acks to arrive from datanodes
} catch (InterruptedException e) {
}
}
}
LOG.debug("Closing old block " + block);
this.setName("DataStreamer for file " + src);
response.close(); // ignore all errors in Response
try {
response.join();
response = null;
} catch (InterruptedException e) {
}
if (closed || hasError || !clientRunning) {
continue;
}
synchronized (dataQueue) {
try {
blockStream.close();
blockReplyStream.close();
} catch (IOException e) {
}
nodes = null;
response = null;
blockStream = null;
blockReplyStream = null;
}
}
if (progress != null) { progress.progress(); }
// This is used by unit test to trigger race conditions.
if (artificialSlowdown != 0 && clientRunning) {
sleepForUnitTest(artificialSlowdown);
}
}
}
// shutdown thread
void close() {
closed = true;
synchronized (dataQueue) {
dataQueue.notifyAll();
}
synchronized (ackQueue) {
ackQueue.notifyAll();
}
this.interrupt();
}
}
//
// Processes reponses from the datanodes. A packet is removed
// from the ackQueue when its response arrives.
//
private class ResponseProcessor extends Thread {
private volatile boolean closed = false;
private DatanodeInfo[] targets = null;
private boolean lastPacketInBlock = false;
ResponseProcessor (DatanodeInfo[] targets) {
this.targets = targets;
}
public void run() {
this.setName("ResponseProcessor for block " + block);
while (!closed && clientRunning && !lastPacketInBlock) {
// process responses from datanodes.
int recordError = 0;
try {
long seqno = 0;
synchronized (ackQueue) {
while (!closed && clientRunning && ackQueue.isEmpty() &&
numPendingHeartbeats == 0) {
try {
ackQueue.wait();
} catch (InterruptedException e) {
// If the thread is being interrupted when waiting for
// packet, we log the exception and treat it as a normal
// exception.
//
LOG.info("ResponseProcessor thread interrupted when " +
"waiting for new packets");
throw e;
}
}
}
if (closed || !clientRunning) {
break;
}
if (!doParallelWrites) {
// verify seqno from datanode
seqno = blockReplyStream.get(0).readLong();
LOG.debug("DFSClient received ack for seqno " + seqno);
if (seqno == Packet.HEART_BEAT_SEQNO && !supportClientHeartbeat()) {
continue;
}
// regular ack
// processes response status from all datanodes.
for (int i = 0; i < targets.length && clientRunning; i++) {
short reply = blockReplyStream.get(0).readShort();
if (reply != DataTransferProtocol.OP_STATUS_SUCCESS) {
recordError = i; // first bad datanode
throw new IOException("Bad response " + reply + " for block "
+ block + " from datanode " + targets[i].getName());
}
}
} else {
// The client is writing to all replicas in parallel. It also
// expects an ack from all replicas.
long lastsn = 0;
assert blockReplyStream.size() > 0;
for (int i = 0; i < blockReplyStream.size(); i++) {
recordError = i; // remember the current slot
seqno = blockReplyStream.get(i).readLong();
if (LOG.isDebugEnabled()) {
LOG.debug("DFSClient for block " + block + " " + seqno);
}
if (i != 0 && seqno != -2 && seqno != lastsn) {
String msg = "Responses from datanodes do not match "
+ " this replica acked " + seqno
+ " but previous replica acked " + lastsn;
LOG.warn(msg);
throw new IOException(msg);
}
short reply = blockReplyStream.get(i).readShort();
if (reply != DataTransferProtocol.OP_STATUS_SUCCESS) {
recordError = i; // first bad datanode
throw new IOException("Bad parallel response " + reply
+ " for block " + block + " from datanode "
+ targets[i].getName());
}
lastsn = seqno;
}
}
assert seqno != -2 :
"Ack for unkown seqno should be a failed ack!";
if (seqno == Packet.HEART_BEAT_SEQNO) { // a heartbeat ack
assert supportClientHeartbeat();
synchronized(ackQueue) {
assert numPendingHeartbeats > 0;
numPendingHeartbeats--;
}
continue;
}
Packet one = null;
synchronized (ackQueue) {
assert !ackQueue.isEmpty();
one = ackQueue.getFirst();
}
if (one.seqno != seqno) {
throw new IOException("Responseprocessor: Expecting seqno " +
" for block " + block +
one.seqno + " but received " + seqno);
}
lastPacketInBlock = one.lastPacketInBlock;
if (lastPacketInBlock) {
if (LOG.isDebugEnabled()) {
LOG
.debug("Update pos in file: " + src + " curBlckOffset: "
+ lastBlkOffset + " blockSize: "
+ one.getEndPosInCurrBlk());
}
lastBlkOffset += one.getEndPosInCurrBlk();
}
synchronized (ackQueue) {
assert seqno == lastAckedSeqno + 1;
lastAckedSeqno = seqno;
ackQueue.removeFirst();
ackQueue.notifyAll();
}
} catch (Exception e) {
if (!closed) {
hasError = true;
errorIndex = recordError;
if (e instanceof IOException) {
setLastException((IOException)e);
}
LOG.warn("DFSOutputStream ResponseProcessor exception " +
" for block " + block +
StringUtils.stringifyException(e));
closed = true;
}
}
synchronized (dataQueue) {
dataQueue.notifyAll();
}
synchronized (ackQueue) {
ackQueue.notifyAll();
}
}
}
void close() {
closed = true;
this.interrupt();
}
}
// If this stream has encountered any errors so far, shutdown
// threads and mark stream as closed. Returns true if we should
// sleep for a while after returning from this call.
//
private boolean processDatanodeError(boolean hasError, boolean isAppend) {
if (!hasError) {
return false;
}
if (response != null) {
LOG.info("Error Recovery for block " + block +
" waiting for responder to exit. ");
return true;
}
incWriteExpCntToStats();
if (errorIndex >= 0) {
LOG.warn("Error Recovery for block " + block
+ " bad datanode[" + errorIndex + "] "
+ (nodes == null? "nodes == null": nodes[errorIndex].getName()));
}
if (blockStream != null) {
try {
blockStream.close();
blockReplyStream.close();
} catch (IOException e) {
}
}
blockStream = null;
blockReplyStream = null;
// move packets from ack queue to front of the data queue
synchronized (ackQueue) {
if (!ackQueue.isEmpty()) {
LOG.info("First unacked packet in " + block + " starts at "
+ ackQueue.getFirst().offsetInBlock);
dataQueue.addAll(0, ackQueue);
ackQueue.clear();
}
numPendingHeartbeats = 0;
}
boolean success = false;
while (!success && clientRunning) {
DatanodeInfo[] newnodes = null;
if (nodes == null) {
String msg = "Could not get block locations. " +
"Source file \"" + src
+ "\" - Aborting...";
LOG.warn(msg);
setLastException(new IOException(msg));
closed = true;
if (streamer != null) streamer.close();
return false;
}
StringBuilder pipelineMsg = new StringBuilder();
for (int j = 0; j < nodes.length; j++) {
pipelineMsg.append(nodes[j].getName());
if (j < nodes.length - 1) {
pipelineMsg.append(", ");
}
}
// remove bad datanode from list of datanodes.
// If errorIndex was not set (i.e. appends), then do not remove
// any datanodes
//
if (errorIndex < 0) {
newnodes = nodes;
} else {
if (nodes.length <= 1) {
lastException = new IOException("All datanodes " + pipelineMsg +
" are bad. Aborting...");
closed = true;
if (streamer != null) streamer.close();
return false;
}
LOG.warn("Error Recovery for block " + block +
" in pipeline " + pipelineMsg +
": bad datanode " + nodes[errorIndex].getName());
newnodes = new DatanodeInfo[nodes.length-1];
System.arraycopy(nodes, 0, newnodes, 0, errorIndex);
System.arraycopy(nodes, errorIndex+1, newnodes, errorIndex,
newnodes.length-errorIndex);
}
// Tell the primary datanode to do error recovery
// by stamping appropriate generation stamps.
//
LocatedBlock newBlock = null;
ProtocolProxy primary = null;
DatanodeInfo primaryNode = null;
try {
// Pick the "least" datanode as the primary datanode to avoid deadlock.
primaryNode = Collections.min(Arrays.asList(newnodes));
/* considering pipeline recovery needs 3 RPCs to DataNodes
* and 2 RPCs to NameNode; So rpcTimeout sets to be 5 times of
* client socketTimeout
*/
int recoverTimeout = 5*socketTimeout;
primary = createClientDNProtocolProxy(primaryNode, conf,
recoverTimeout);
if (primary.isMethodSupported("recoverBlock", int.class, Block.class,
boolean.class, DatanodeInfo[].class, long.class)) {
// The deadline is up to RPC time out minus one socket timeout
// to be more conservative.
newBlock = primary.getProxy().recoverBlock(namespaceId, block,
isAppend, newnodes,
System.currentTimeMillis() + recoverTimeout - socketTimeout);
} else if (primary.isMethodSupported("recoverBlock", int.class, Block.class, boolean.class, DatanodeInfo[].class)) {
newBlock = primary.getProxy().recoverBlock(
namespaceId, block, isAppend, newnodes);
} else {
newBlock = primary.getProxy().recoverBlock(block, isAppend, newnodes);
}
long nextByteToSend = dataQueue.isEmpty() ?
bytesCurBlock : dataQueue.getFirst().offsetInBlock;
if (nextByteToSend > newBlock.getBlockSize()) {
LOG.warn("Missing bytes! Error Recovery for block " + block +
" end up with " +
newBlock.getBlockSize() + " bytes but client already sent " +
nextByteToSend + " bytes and data queue is " +
(dataQueue.isEmpty() ? "" : "not ") + "empty.");
}
} catch (IOException e) {
incWriteExpCntToStats();
LOG.warn("Failed recovery attempt #" + recoveryErrorCount +
" from primary datanode " + primaryNode, e);
recoveryErrorCount++;
if (recoveryErrorCount > maxRecoveryErrorCount) {
if (nodes.length > 1) {
// if the primary datanode failed, remove it from the list.
// The original bad datanode is left in the list because it is
// conservative to remove only one datanode in one iteration.
for (int j = 0; j < nodes.length; j++) {
if (nodes[j].equals(primaryNode)) {
errorIndex = j; // forget original bad node.
}
}
// remove primary node from list
newnodes = new DatanodeInfo[nodes.length-1];
System.arraycopy(nodes, 0, newnodes, 0, errorIndex);
System.arraycopy(nodes, errorIndex+1, newnodes, errorIndex,
newnodes.length-errorIndex);
nodes = newnodes;
LOG.warn("Error Recovery for block " + block + " failed " +
" because recovery from primary datanode " +
primaryNode + " failed " + recoveryErrorCount +
" times. " + " Pipeline was " + pipelineMsg +
". Marking primary datanode as bad.");
recoveryErrorCount = 0;
errorIndex = -1;
return true; // sleep when we return from here
}
String emsg = "Error Recovery for block " + block + " failed " +
" because recovery from primary datanode " +
primaryNode + " failed " + recoveryErrorCount +
" times. " + " Pipeline was " + pipelineMsg +
". Aborting...";
LOG.warn(emsg);
lastException = new IOException(emsg);
closed = true;
if (streamer != null) streamer.close();
return false; // abort with IOexception
}
LOG.warn("Error Recovery for block " + block + " failed " +
" because recovery from primary datanode " +
primaryNode + " failed " + recoveryErrorCount +
" times. " + " Pipeline was " + pipelineMsg +
". Will retry...");
return true; // sleep when we return from here
} finally {
if (primary != null) {
RPC.stopProxy(primary.getProxy());
}
}
recoveryErrorCount = 0; // block recovery successful
// If the block recovery generated a new generation stamp, use that
// from now on. Also, setup new pipeline
//
if (newBlock != null) {
block = newBlock.getBlock();
nodes = newBlock.getLocations();
}
this.hasError = false;
lastException = null;
errorIndex = 0;
success = createBlockOutputStream(nodes, clientName, true);
}
response = new ResponseProcessor(nodes);
response.start();
return false; // do not sleep, continue processing
}
private void isClosed() throws IOException {
if ((closed || !clientRunning) && lastException != null) {
throw lastException;
}
}
//
// returns the list of targets, if any, that is being currently used.
//
DatanodeInfo[] getPipeline() {
synchronized (dataQueue) {
if (nodes == null) {
return null;
}
DatanodeInfo[] value = new DatanodeInfo[nodes.length];
for (int i = 0; i < nodes.length; i++) {
value[i] = nodes[i];
}
return value;
}
}
private DFSOutputStream(String src, long blockSize, Progressable progress,
int bytesPerChecksum, short replication, boolean forceSync,
boolean doParallelWrites, DatanodeInfo[] favoredNodes)
throws IOException {
super(new CRC32(), bytesPerChecksum, 4);
this.forceSync = forceSync;
this.doParallelWrites = doParallelWrites;
this.src = src;
this.blockSize = blockSize;
this.blockReplication = replication;
this.progress = progress;
if (progress != null) {
LOG.debug("Set non-null progress callback on DFSOutputStream "+src);
}
this.favoredNodes = favoredNodes;
if ( bytesPerChecksum < 1 || blockSize % bytesPerChecksum != 0) {
throw new IOException("io.bytes.per.checksum(" + bytesPerChecksum +
") and blockSize(" + blockSize +
") do not match. " + "blockSize should be a " +
"multiple of io.bytes.per.checksum");
}
checksum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32,
bytesPerChecksum,
new PureJavaCrc32());
}
/**
* Create a new output stream to the given DataNode.
* @see ClientProtocol#create(String, FsPermission, String, boolean, short, long)
*/
DFSOutputStream(String src, int buffersize, Progressable progress,
LocatedBlock lastBlock, FileStatus stat, int bytesPerChecksum)
throws IOException {
this(src, buffersize, progress, lastBlock, stat, bytesPerChecksum, 0);
}
/**
* Create a new output stream to the given DataNode.
* @see ClientProtocol#create(String, FsPermission, String, boolean, short, long)
*/
DFSOutputStream(String src, FsPermission masked, boolean overwrite,
boolean createParent, short replication, long blockSize,
Progressable progress,int buffersize, int bytesPerChecksum,
boolean forceSync, boolean doParallelWrites,
DatanodeInfo[] favoredNodes) throws IOException {
this(src, blockSize, progress, bytesPerChecksum, replication,forceSync,
doParallelWrites, favoredNodes);
computePacketChunkSize(writePacketSize, bytesPerChecksum);
try {
if (namenodeProtocolProxy != null &&
namenodeProtocolProxy.isMethodSupported("create", String.class,
FsPermission.class, String.class, boolean.class, boolean.class,
short.class, long.class)) {
namenode.create(src, masked, clientName, overwrite,
createParent, replication, blockSize);
} else {
namenode.create(src, masked, clientName, overwrite,
replication, blockSize);
}
} catch(RemoteException re) {
incWriteExpCntToStats();
throw re.unwrapRemoteException(AccessControlException.class,
FileAlreadyExistsException.class,
FileNotFoundException.class,
NSQuotaExceededException.class,
DSQuotaExceededException.class);
}
streamer.start();
}
/**
* Create a new output stream to the given DataNode with namespace id.
*/
DFSOutputStream(String src, int buffersize, Progressable progress,
LocatedBlock lastBlock, FileStatus stat,
int bytesPerChecksum, int namespaceId) throws IOException {
this(src, stat.getBlockSize(), progress, bytesPerChecksum,
stat.getReplication(), false, false, null);
initialFileSize = stat.getLen(); // length of file when opened
this.namespaceId = namespaceId;
//
// The last partial block of the file has to be filled.
//
if (lastBlock != null) {
block = lastBlock.getBlock();
long usedInLastBlock = stat.getLen() % blockSize;
int freeInLastBlock = (int)(blockSize - usedInLastBlock);
// calculate the amount of free space in the pre-existing
// last crc chunk
int usedInCksum = (int)(stat.getLen() % bytesPerChecksum);
int freeInCksum = bytesPerChecksum - usedInCksum;
// if there is space in the last block, then we have to
// append to that block
if (freeInLastBlock > blockSize) {
throw new IOException("The last block for file " +
src + " is full.");
}
// indicate that we are appending to an existing block
bytesCurBlock = lastBlock.getBlockSize();
if (usedInCksum > 0 && freeInCksum > 0) {
// if there is space in the last partial chunk, then
// setup in such a way that the next packet will have only
// one chunk that fills up the partial chunk.
//
computePacketChunkSize(0, freeInCksum);
resetChecksumChunk(freeInCksum);
this.appendChunk = true;
} else {
// if the remaining space in the block is smaller than
// that expected size of of a packet, then create
// smaller size packet.
//
computePacketChunkSize(Math.min(writePacketSize, freeInLastBlock),
bytesPerChecksum);
}
// setup pipeline to append to the last block
nodes = lastBlock.getLocations();
errorIndex = -1; // no errors yet.
if (nodes.length < 1) {
throw new IOException("Unable to retrieve blocks locations" +
" for append to last block " + block +
" of file " + src);
}
// keep trying to setup a pipeline until you know all DNs are dead
while (processDatanodeError(true, true)) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
}
}
if (lastException != null) {
throw lastException;
}
}
else {
computePacketChunkSize(writePacketSize, bytesPerChecksum);
}
long blockOffset = stat.getLen();
blockOffset -= blockOffset % blockSize;
setOffsets(blockOffset);
streamer.start();
}
private void computePacketChunkSize(int psize, int csize) {
int chunkSize = csize + checksum.getChecksumSize();
int n = DataNode.PKT_HEADER_LEN + SIZE_OF_INTEGER;
chunksPerPacket = Math.max((psize - n + chunkSize-1)/chunkSize, 1);
packetSize = n + chunkSize*chunksPerPacket;
if (LOG.isDebugEnabled()) {
LOG.debug("computePacketChunkSize: src=" + src +
", chunkSize=" + chunkSize +
", chunksPerPacket=" + chunksPerPacket +
", packetSize=" + packetSize);
}
}
/**
* Open a DataOutputStream to a DataNode so that it can be written to.
* This happens when a file is created and each time a new block is allocated.
* Must get block ID and the IDs of the destinations from the namenode.
* Returns the list of target datanodes.
*/
private DatanodeInfo[] nextBlockOutputStream(String client) throws IOException {
LocatedBlock lb = null;
boolean retry = false;
DatanodeInfo[] nodes;
ArrayList excludedNodes = new ArrayList();
int count = conf.getInt("dfs.client.block.write.retries", 3);
boolean success;
do {
hasError = false;
lastException = null;
errorIndex = 0;
retry = false;
nodes = null;
success = false;
long startTime = System.currentTimeMillis();
DatanodeInfo[] excluded = excludedNodes.toArray(new DatanodeInfo[0]);
lb = locateFollowingBlock(startTime, excluded.length > 0 ? excluded
: null);
block = lb.getBlock();
nodes = lb.getLocations();
//
// Connect to first DataNode in the list.
//
success = createBlockOutputStream(nodes, clientName, false);
if (!success) {
LOG.info("Abandoning block " + block + " for file " + src);
namenode.abandonBlock(block, src, clientName);
if (errorIndex < nodes.length) {
LOG.debug("Excluding datanode " + nodes[errorIndex]);
excludedNodes.add(nodes[errorIndex]);
}
// Connection failed. Let's wait a little bit and retry
retry = true;
}
} while (retry && --count >= 0);
if (!success) {
throw new IOException("Unable to create new block.");
}
return nodes;
}
// For pipelined writes, connects to the first datanode in the pipeline.
// For parallel writes, connect to all specified datanodes.
// Returns true if success, otherwise return failure.
//
private boolean createBlockOutputStream(DatanodeInfo[] nodes, String client,
boolean recoveryFlag) {
String firstBadLink = "";
if (LOG.isDebugEnabled()) {
for (int i = 0; i < nodes.length; i++) {
LOG.debug("pipeline = " + nodes[i].getName());
}
}
// persist blocks on namenode on next flush
persistBlocks = true;
boolean result = false;
int curNode = 0;
int length = 0;
int pipelineDepth;
if (doParallelWrites) {
length = nodes.length; // connect to all datanodes
pipelineDepth = 1;
} else {
length = 1; // connect to only the first datanode
pipelineDepth = nodes.length;
}
DataOutputStream[] tmpOut = new DataOutputStream[length];
DataInputStream[] replyIn = new DataInputStream[length];
Socket[] sockets = new Socket[length];
try {
for (curNode = 0; curNode < length; curNode++) {
LOG.debug("Connecting to " + nodes[curNode].getName());
InetSocketAddress target = NetUtils.createSocketAddr(nodes[curNode].getName());
Socket s = socketFactory.createSocket();
sockets[curNode] = s;
timeoutValue = socketReadExtentionTimeout * pipelineDepth + socketTimeout;
NetUtils.connect(s, target, timeoutValue, ipTosValue);
s.setSoTimeout(timeoutValue);
s.setSendBufferSize(DEFAULT_DATA_SOCKET_SIZE);
LOG.debug("Send buf size " + s.getSendBufferSize());
long writeTimeout = datanodeWriteExtentionTimeout *
pipelineDepth + datanodeWriteTimeout;
//
// Xmit header info to datanode (see DataXceiver.java)
//
DataOutputStream out = new DataOutputStream(
new BufferedOutputStream(NetUtils.getOutputStream(s, writeTimeout),
DataNode.SMALL_BUFFER_SIZE));
tmpOut[curNode] = out;
DataInputStream brs = new DataInputStream(NetUtils.getInputStream(s));
replyIn[curNode] = brs;
int version = getDataTransferProtocolVersion();
WriteBlockHeader header = new WriteBlockHeader(version,
namespaceId, block.getBlockId(), block.getGenerationStamp(),
pipelineDepth, recoveryFlag, false, null, pipelineDepth - 1,
nodes, client);
header.writeVersionAndOpCode(out);
header.write(out);
checksum.writeHeader(out);
out.flush();
// receive ack for connect
firstBadLink = Text.readString(brs);
if (firstBadLink.length() != 0) {
throw new IOException("Bad connect ack with firstBadLink " +
firstBadLink);
}
}
result = true; // success
blockStream = new MultiDataOutputStream(tmpOut);
blockReplyStream = new MultiDataInputStream(replyIn);
this.s = sockets;
} catch (IOException ie) {
LOG.info("Exception in createBlockOutputStream " + nodes[curNode].getName() + " " +
" for file " + src +
ie);
incWriteExpCntToStats();
// find the datanode that matches
if (firstBadLink.length() != 0) {
for (int i = 0; i < nodes.length; i++) {
if (nodes[i].getName().equals(firstBadLink)) {
errorIndex = i;
break;
}
}
} else {
// if we are doing parallel writes, then record the datanode that is bad
errorIndex = curNode;
}
hasError = true;
setLastException(ie);
blockReplyStream = null;
result = false;
} finally {
if (!result) {
for (int i = 0; i < sockets.length; i++) {
IOUtils.closeSocket(sockets[i]);
}
this.s = null;
}
}
return result;
}
private LocatedBlock locateFollowingBlock(long start,
DatanodeInfo[] excludedNodes
) throws IOException {
int retries = conf.getInt("dfs.client.block.write.locateFollowingBlock.retries", 5);
long sleeptime = 400;
while (true) {
long localstart = System.currentTimeMillis();
while (true) {
try {
VersionedLocatedBlock loc = null;
if (namenodeProtocolProxy != null
&& namenodeProtocolProxy.isMethodSupported(
"addBlockAndFetchMetaInfo", String.class, String.class,
DatanodeInfo[].class, DatanodeInfo[].class, long.class,
Block.class)) {
loc = namenode.addBlockAndFetchMetaInfo(src, clientName,
excludedNodes, favoredNodes, this.lastBlkOffset, getLastBlock());
} else if (namenodeProtocolProxy != null
&& namenodeProtocolProxy.isMethodSupported(
"addBlockAndFetchMetaInfo", String.class, String.class,
DatanodeInfo[].class, DatanodeInfo[].class, long.class)) {
loc = namenode.addBlockAndFetchMetaInfo(src, clientName,
excludedNodes, favoredNodes, this.lastBlkOffset);
} else if (namenodeProtocolProxy != null
&& namenodeProtocolProxy.isMethodSupported(
"addBlockAndFetchMetaInfo", String.class, String.class,
DatanodeInfo[].class, long.class)) {
loc = namenode.addBlockAndFetchMetaInfo(src, clientName,
excludedNodes, this.lastBlkOffset);
} else if (namenodeProtocolProxy != null
&& namenodeProtocolProxy.isMethodSupported(
"addBlockAndFetchMetaInfo", String.class, String.class,
DatanodeInfo[].class)) {
loc = namenode.addBlockAndFetchMetaInfo(src, clientName,
excludedNodes);
} else if (namenodeProtocolProxy != null
&& namenodeProtocolProxy.isMethodSupported(
"addBlockAndFetchVersion", String.class, String.class,
DatanodeInfo[].class)) {
loc = namenode.addBlockAndFetchVersion(src, clientName,
excludedNodes);
} else if (namenodeProtocolProxy != null
&& namenodeProtocolProxy.isMethodSupported("addBlock",
String.class, String.class, DatanodeInfo[].class)) {
return namenode.addBlock(src, clientName, excludedNodes);
} else {
return namenode.addBlock(src, clientName);
}
updateDataTransferProtocolVersionIfNeeded(loc.getDataProtocolVersion());
if (loc instanceof LocatedBlockWithMetaInfo) {
LocatedBlockWithMetaInfo metaLoc = (LocatedBlockWithMetaInfo)loc;
this.namespaceId = metaLoc.getNamespaceID();
getNewNameNodeIfNeeded(metaLoc.getMethodFingerPrint());
}
return loc;
} catch (RemoteException e) {
IOException ue =
e.unwrapRemoteException(FileNotFoundException.class,
AccessControlException.class,
NSQuotaExceededException.class,
DSQuotaExceededException.class);
if (ue != e) {
throw ue; // no need to retry these exceptions
}
if (NotReplicatedYetException.class.getName().
equals(e.getClassName())) {
if (retries == 0) {
throw e;
} else {
--retries;
LOG.info(StringUtils.stringifyException(e));
if (System.currentTimeMillis() - localstart > 5000) {
LOG.info("Waiting for replication for "
+ (System.currentTimeMillis() - localstart) / 1000
+ " seconds");
}
try {
LOG.warn("NotReplicatedYetException sleeping " + src
+ " retries left " + retries);
Thread.sleep(sleeptime);
sleeptime *= 2;
} catch (InterruptedException ie) {
}
}
} else {
throw e;
}
}
}
}
}
@Override
protected void incMetrics(int len){
metrics.incWriteOps();
metrics.incWriteSize(len);
}
// @see FSOutputSummer#writeChunk()
@Override
protected synchronized void writeChunk(byte[] b, int offset, int len, byte[] checksum)
throws IOException {
checkOpen();
isClosed();
int cklen = checksum.length;
int bytesPerChecksum = this.checksum.getBytesPerChecksum();
if (len > bytesPerChecksum) {
throw new IOException("writeChunk() buffer size is " + len +
" is larger than supported bytesPerChecksum " +
bytesPerChecksum);
}
if (checksum.length != this.checksum.getChecksumSize()) {
throw new IOException("writeChunk() checksum size is supposed to be " +
this.checksum.getChecksumSize() +
" but found to be " + checksum.length);
}
synchronized (dataQueue) {
// If queue is full, then wait till we can create enough space
while (!closed && dataQueue.size() + ackQueue.size() > maxPackets) {
try {
dataQueue.wait(packetTimeout);
checkIfLastPacketTimeout();
} catch (InterruptedException e) {
}
}
isClosed();
if (currentPacket == null) {
currentPacket = new Packet(packetSize, chunksPerPacket, bytesCurBlock);
if (LOG.isDebugEnabled()) {
LOG.debug("DFSClient writeChunk allocating new packet seqno=" +
currentPacket.seqno +
", src=" + src +
", packetSize=" + packetSize +
", chunksPerPacket=" + chunksPerPacket +
", bytesCurBlock=" + bytesCurBlock +
", forceSync=" + forceSync +
", doParallelWrites=" + doParallelWrites +
", len=" + len +
", blocksize=" + blockSize);
}
}
currentPacket.writeChecksum(checksum, 0, cklen);
currentPacket.writeData(b, offset, len);
currentPacket.numChunks++;
bytesCurBlock += len;
// If packet is full, enqueue it for transmission
if (currentPacket.numChunks == currentPacket.maxChunks ||
bytesCurBlock == blockSize) {
if (LOG.isDebugEnabled()) {
LOG.debug("DFSClient writeChunk packet full seqno=" +
currentPacket.seqno +
", src=" + src +
", bytesCurBlock=" + bytesCurBlock +
", blockSize=" + blockSize +
", appendChunk=" + appendChunk);
}
//
// if we allocated a new packet because we encountered a block
// boundary, reset bytesCurBlock.
//
if (bytesCurBlock == blockSize) {
currentPacket.lastPacketInBlock = true;
bytesCurBlock = 0;
lastFlushOffset = 0;
}
enqueueCurrentPacket();
// If this was the first write after reopening a file, then the above
// write filled up any partial chunk. Tell the summer to generate full
// crc chunks from now on.
if (appendChunk) {
appendChunk = false;
resetChecksumChunk(bytesPerChecksum);
}
int psize = Math.min((int)(blockSize-bytesCurBlock), writePacketSize);
computePacketChunkSize(psize, bytesPerChecksum);
}
}
//LOG.debug("DFSClient writeChunk done length " + len +
// " checksum length " + cklen);
}
private synchronized void enqueueCurrentPacket() {
synchronized (dataQueue) {
if (currentPacket == null) return;
dataQueue.addLast(currentPacket);
dataQueue.notifyAll();
lastQueuedSeqno = currentPacket.seqno;
currentPacket = null;
}
}
/**
* All data is written out to datanodes. It is not guaranteed
* that data has been flushed to persistent store on the
* datanode. Block allocations are persisted on namenode.
*/
public void sync() throws IOException {
long start = System.currentTimeMillis();
try {
long toWaitFor;
synchronized (this) {
/* Record current blockOffset. This might be changed inside
* flushBuffer() where a partial checksum chunk might be flushed.
* After the flush, reset the bytesCurBlock back to its previous value,
* any partial checksum chunk will be sent now and in next packet.
*/
long saveOffset = bytesCurBlock;
Packet oldCurrentPacket = currentPacket;
// flush checksum buffer, but keep checksum buffer intact
flushBuffer(true);
// bytesCurBlock potentially incremented if there was buffered data
if (LOG.isDebugEnabled()) {
LOG.debug("DFSClient flush() : saveOffset " + saveOffset +
" bytesCurBlock " + bytesCurBlock +
" lastFlushOffset " + lastFlushOffset);
}
// Flush only if we haven't already flushed till this offset.
if (lastFlushOffset != bytesCurBlock) {
assert bytesCurBlock > lastFlushOffset;
// record the valid offset of this flush
lastFlushOffset = bytesCurBlock;
enqueueCurrentPacket();
} else {
// just discard the current packet since it is already been sent.
if (oldCurrentPacket == null && currentPacket != null) {
// If we didn't previously have a packet queued, and now we do,
// but we don't plan on sending it, then we should not
// skip a sequence number for it!
currentSeqno--;
}
currentPacket = null;
}
// Restore state of stream. Record the last flush offset
// of the last full chunk that was flushed.
//
bytesCurBlock = saveOffset;
toWaitFor = lastQueuedSeqno;
}
waitForAckedSeqno(toWaitFor);
// If any new blocks were allocated since the last flush,
// then persist block locations on namenode.
//
boolean willPersist;
synchronized (this) {
willPersist = persistBlocks;
persistBlocks = false;
}
if (willPersist) {
namenode.fsync(src, clientName);
}
long timeval = System.currentTimeMillis() - start;
metrics.incSyncTime(timeval);
} catch (IOException e) {
lastException = new IOException("IOException flush:", e);
closed = true;
closeThreads();
throw e;
}
}
private Block getLastBlock() {
return this.block;
}
/**
* Returns the number of replicas of current block. This can be different
* from the designated replication factor of the file because the NameNode
* does not replicate the block to which a client is currently writing to.
* The client continues to write to a block even if a few datanodes in the
* write pipeline have failed. If the current block is full and the next
* block is not yet allocated, then this API will return 0 because there are
* no replicas in the pipeline.
*/
public int getNumCurrentReplicas() throws IOException {
synchronized(dataQueue) {
if (nodes == null) {
return blockReplication;
}
return nodes.length;
}
}
/**
* Waits till all existing data is flushed and confirmations
* received from datanodes.
*/
private void flushInternal() throws IOException {
isClosed();
checkOpen();
long toWaitFor;
synchronized (this) {
enqueueCurrentPacket();
toWaitFor = lastQueuedSeqno;
}
waitForAckedSeqno(toWaitFor);
}
private void waitForAckedSeqno(long seqnumToWaitFor) throws IOException {
boolean interrupted = false;
synchronized (ackQueue) {
while (!closed) {
isClosed();
if (lastAckedSeqno >= seqnumToWaitFor) {
break;
}
try {
ackQueue.wait();
} catch (InterruptedException ie) {
interrupted = true;
}
}
}
if (interrupted) {
Thread.currentThread().interrupt();
}
isClosed();
}
/**
* Closes this output stream and releases any system
* resources associated with this stream.
*/
@Override
public void close() throws IOException {
if (closed) {
IOException e = lastException;
if (e == null)
return;
else
throw e;
}
try {
closeInternal();
leasechecker.remove(src);
if (s != null) {
for (int i = 0; i < s.length; i++) {
s[i].close();
}
s = null;
}
} catch (IOException e) {
lastException = e;
throw e;
}
}
/**
* Harsh abort method that should only be used from tests - this
* is in order to prevent pipeline recovery when eg a DN shuts down.
*/
void abortForTests() throws IOException {
streamer.close();
response.close();
closed = true;
}
/**
* Aborts this output stream and releases any system
* resources associated with this stream.
*/
synchronized void abort() throws IOException {
if (closed) {
return;
}
setLastException(new IOException("Lease timeout of " +
(hdfsTimeout/1000) + " seconds expired."));
closeThreads();
}
// shutdown datastreamer and responseprocessor threads.
private void closeThreads() throws IOException {
try {
if (streamer != null) {
streamer.close();
streamer.join();
}
// shutdown response after streamer has exited.
if (response != null) {
response.close();
response.join();
response = null;
}
} catch (InterruptedException e) {
throw new IOException("Failed to shutdown response thread");
}
}
/**
* Closes this output stream and releases any system
* resources associated with this stream.
*/
private synchronized void closeInternal() throws IOException {
checkOpen();
isClosed();
try {
flushBuffer(); // flush from all upper layers
// Mark that this packet is the last packet in block.
// If there are no outstanding packets and the last packet
// was not the last one in the current block, then create a
// packet with empty payload.
synchronized (dataQueue) {
if (currentPacket == null && bytesCurBlock != 0) {
currentPacket = new Packet(packetSize, chunksPerPacket,
bytesCurBlock);
}
if (currentPacket != null) {
currentPacket.lastPacketInBlock = true;
}
}
flushInternal(); // flush all data to Datanodes
isClosed(); // check to see if flushInternal had any exceptions
closed = true; // allow closeThreads() to showdown threads
closeThreads();
synchronized (dataQueue) {
if (blockStream != null) {
blockStream.writeInt(0); // indicate end-of-block to datanode
blockStream.close();
blockReplyStream.close();
}
if (s != null) {
for (int i = 0; i < s.length; i++) {
s[i].close();
}
s = null;
}
}
streamer = null;
blockStream = null;
blockReplyStream = null;
closeFile(src, lastBlkOffset, getLastBlock());
} finally {
closed = true;
}
}
void setArtificialSlowdown(long period) {
artificialSlowdown = period;
}
synchronized void setChunksPerPacket(int value) {
chunksPerPacket = Math.min(chunksPerPacket, value);
packetSize = DataNode.PKT_HEADER_LEN + SIZE_OF_INTEGER +
(checksum.getBytesPerChecksum() +
checksum.getChecksumSize()) * chunksPerPacket;
}
synchronized void setTestFilename(String newname) {
src = newname;
}
/**
* Returns the size of a file as it was when this stream was opened
*/
long getInitialLen() {
return initialFileSize;
}
}
void reportChecksumFailure(String file, Block blk, DatanodeInfo dn) {
DatanodeInfo [] dnArr = { dn };
LocatedBlock [] lblocks = { new LocatedBlock(blk, dnArr) };
reportChecksumFailure(file, lblocks);
}
// just reports checksum failure and ignores any exception during the report.
void reportChecksumFailure(String file, LocatedBlock lblocks[]) {
try {
reportBadBlocks(lblocks);
} catch (IOException ie) {
LOG.info("Found corruption while reading " + file
+ ". Error repairing corrupt blocks. Bad blocks remain. "
+ StringUtils.stringifyException(ie));
}
}
/**
* Get the data transfer protocol version supported in the cluster
* assuming all the datanodes have the same version.
*
* @return the data transfer protocol version supported in the cluster
*/
int getDataTransferProtocolVersion() throws IOException {
synchronized (dataTransferVersion) {
if (dataTransferVersion == -1) {
// Get the version number from NN
try {
int remoteDataTransferVersion = namenode.getDataTransferProtocolVersion();
updateDataTransferProtocolVersionIfNeeded(remoteDataTransferVersion);
} catch (RemoteException re) {
IOException ioe = re.unwrapRemoteException(IOException.class);
if (ioe.getMessage().startsWith(IOException.class.getName() + ": " +
NoSuchMethodException.class.getName())) {
dataTransferVersion = 14; // last version not supportting this RPC
} else {
throw ioe;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Data Transfer Protocal Version is "+ dataTransferVersion);
}
}
return dataTransferVersion;
}
}
void updateDataTransferProtocolVersionIfNeeded(int remoteDataTransferVersion) {
int newDataTransferVersion = 0;
if (remoteDataTransferVersion < DataTransferProtocol.DATA_TRANSFER_VERSION) {
// client is newer than server
newDataTransferVersion = remoteDataTransferVersion;
} else {
// client is older or the same as server
newDataTransferVersion = DataTransferProtocol.DATA_TRANSFER_VERSION;
}
synchronized (dataTransferVersion) {
if (dataTransferVersion != newDataTransferVersion) {
dataTransferVersion = newDataTransferVersion;
}
}
}
/**
* If stats object is not null, increment the read exception count
*/
void incReadExpCntToStats() {
if (stats != null) {
stats.incrementCntReadException();
}
}
/**
* If stats object is not null, increment the read exception count
*/
void incWriteExpCntToStats() {
if (stats != null) {
stats.incrementCntWriteException();
}
}
/**
* If stats object is not null, increment the files read count
*/
void incFileReadToStats() {
if (stats != null) {
stats.incrementFilesRead();
}
}
/**
* Determine whether the input address is in the same rack as local machine
*/
boolean isInLocalRack(InetAddress addr) {
if (dnsToSwitchMapping == null || this.localhostNetworkLocation == null) {
return false;
}
ArrayList tempList = new ArrayList();
tempList.add(addr.getHostName());
List retList = dnsToSwitchMapping.resolve(tempList);
if (retList != null && retList.size() > 0) {
return retList.get(0).equals(this.localhostNetworkLocation);
} else {
return false;
}
}
public LocatedBlockWithFileName getBlockInfo(final long blockId)
throws IOException {
return namenode.getBlockInfo(blockId);
}
static void sleepForUnitTest(long artificialSlowdown) {
// This is used by unit test to trigger race conditions.
if (artificialSlowdown > 0) {
LOG.debug("Sleeping for artificial slowdown of " +
artificialSlowdown + "ms");
try {
Thread.sleep(artificialSlowdown);
} catch (InterruptedException e) {}
}
}
/** {@inheritDoc} */
public String toString() {
return getClass().getSimpleName() + "[clientName=" + clientName
+ ", ugi=" + ugi + "]";
}
}