org.apache.hadoop.fs.FileContext Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.TreeSet;
import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Options.CreateOpts;
import org.apache.hadoop.fs.Options.Rename;
import org.apache.hadoop.fs.permission.FsPermission;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_DEFAULT;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.ipc.RpcClientException;
import org.apache.hadoop.ipc.RpcServerException;
import org.apache.hadoop.ipc.UnexpectedServerException;
import org.apache.hadoop.fs.InvalidPathException;
import org.apache.hadoop.security.AccessControlException;
/**
* The FileContext class provides an interface to the application writer for
* using the Hadoop file system.
* It provides a set of methods for the usual operation: create, open,
* list, etc
*
*
* *** Path Names ***
*
*
* The Hadoop file system supports a URI name space and URI names.
* It offers a forest of file systems that can be referenced using fully
* qualified URIs.
* Two common Hadoop file systems implementations are
*
* - the local file system: file:///path
*
- the hdfs file system hdfs://nnAddress:nnPort/path
*
*
* While URI names are very flexible, it requires knowing the name or address
* of the server. For convenience one often wants to access the default system
* in one's environment without knowing its name/address. This has an
* additional benefit that it allows one to change one's default fs
* (e.g. admin moves application from cluster1 to cluster2).
*
*
* To facilitate this, Hadoop supports a notion of a default file system.
* The user can set his default file system, although this is
* typically set up for you in your environment via your default config.
* A default file system implies a default scheme and authority; slash-relative
* names (such as /for/bar) are resolved relative to that default FS.
* Similarly a user can also have working-directory-relative names (i.e. names
* not starting with a slash). While the working directory is generally in the
* same default FS, the wd can be in a different FS.
*
* Hence Hadoop path names can be one of:
*
* - fully qualified URI: scheme://authority/path
*
- slash relative names: /path relative to the default file system
*
- wd-relative names: path relative to the working dir
*
* Relative paths with scheme (scheme:foo/bar) are illegal.
*
*
* ****The Role of the FileContext and configuration defaults****
*
* The FileContext provides file namespace context for resolving file names;
* it also contains the umask for permissions, In that sense it is like the
* per-process file-related state in Unix system.
* These two properties
*
* - default file system i.e your slash)
*
- umask
*
* in general, are obtained from the default configuration file
* in your environment, (@see {@link Configuration}).
*
* No other configuration parameters are obtained from the default config as
* far as the file context layer is concerned. All file system instances
* (i.e. deployments of file systems) have default properties; we call these
* server side (SS) defaults. Operation like create allow one to select many
* properties: either pass them in as explicit parameters or use
* the SS properties.
*
* The file system related SS defaults are
*
* - the home directory (default is "/user/userName")
*
- the initial wd (only for local fs)
*
- replication factor
*
- block size
*
- buffer size
*
- bytesPerChecksum (if used).
*
*
*
* *** Usage Model for the FileContext class ***
*
* Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
* Unspecified values come from core-defaults.xml in the release jar.
*
* - myFContext = FileContext.getFileContext(); // uses the default config
* // which has your default FS
*
- myFContext.create(path, ...);
*
- myFContext.setWorkingDir(path)
*
- myFContext.open (path, ...);
*
* Example 2: Get a FileContext with a specific URI as the default FS
*
* - myFContext = FileContext.getFileContext(URI)
*
- myFContext.create(path, ...);
* ...
*
* Example 3: FileContext with local file system as the default
*
* - myFContext = FileContext.getLocalFSFileContext()
*
- myFContext.create(path, ...);
*
- ...
*
* Example 4: Use a specific config, ignoring $HADOOP_CONFIG
* Generally you should not need use a config unless you are doing
*
* - configX = someConfigSomeOnePassedToYou.
*
- myFContext = getFileContext(configX); // configX is not changed,
* // is passed down
*
- myFContext.create(path, ...);
*
- ...
*
*
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving /*Evolving for a release,to be changed to Stable */
public final class FileContext {
public static final Log LOG = LogFactory.getLog(FileContext.class);
public static final FsPermission DEFAULT_PERM = FsPermission.getDefault();
volatile private static FileContext localFsSingleton = null;
/**
* List of files that should be deleted on JVM shutdown.
*/
static final Map> DELETE_ON_EXIT =
new IdentityHashMap>();
/** JVM shutdown hook thread. */
static final FileContextFinalizer FINALIZER =
new FileContextFinalizer();
private static final PathFilter DEFAULT_FILTER = new PathFilter() {
public boolean accept(final Path file) {
return true;
}
};
/**
* The FileContext is defined by.
* 1) defaultFS (slash)
* 2) wd
* 3) umask
*/
private final AbstractFileSystem defaultFS; //default FS for this FileContext.
private Path workingDir; // Fully qualified
private FsPermission umask;
private final Configuration conf;
private FileContext(final AbstractFileSystem defFs,
final FsPermission theUmask, final Configuration aConf) {
defaultFS = defFs;
umask = FsPermission.getUMask(aConf);
conf = aConf;
/*
* Init the wd.
* WorkingDir is implemented at the FileContext layer
* NOT at the AbstractFileSystem layer.
* If the DefaultFS, such as localFilesystem has a notion of
* builtin WD, we use that as the initial WD.
* Otherwise the WD is initialized to the home directory.
*/
workingDir = defaultFS.getInitialWorkingDirectory();
if (workingDir == null) {
workingDir = defaultFS.getHomeDirectory();
}
util = new Util(); // for the inner class
}
/*
* Remove relative part - return "absolute":
* If input is relative path ("foo/bar") add wd: ie "//foo/bar"
* A fully qualified uri ("hdfs://nn:p/foo/bar") or a slash-relative path
* ("/foo/bar") are returned unchanged.
*
* Applications that use FileContext should use #makeQualified() since
* they really want a fully qualified URI.
* Hence this method is not called makeAbsolute() and
* has been deliberately declared private.
*/
private Path fixRelativePart(Path p) {
if (p.isUriPathAbsolute()) {
return p;
} else {
return new Path(workingDir, p);
}
}
/**
* Delete all the paths that were marked as delete-on-exit.
*/
static void processDeleteOnExit() {
synchronized (DELETE_ON_EXIT) {
Set>> set = DELETE_ON_EXIT.entrySet();
for (Entry> entry : set) {
FileContext fc = entry.getKey();
Set paths = entry.getValue();
for (Path path : paths) {
try {
fc.delete(path, true);
} catch (IOException e) {
LOG.warn("Ignoring failure to deleteOnExit for path " + path);
}
}
}
DELETE_ON_EXIT.clear();
}
}
/**
* Pathnames with scheme and relative path are illegal.
* @param path to be checked
*/
private static void checkNotSchemeWithRelative(final Path path) {
if (path.toUri().isAbsolute() && !path.isUriPathAbsolute()) {
throw new HadoopIllegalArgumentException(
"Unsupported name: has scheme but relative path-part");
}
}
/**
* Get the file system of supplied path.
*
* @param absOrFqPath - absolute or fully qualified path
* @return the file system of the path
*
* @throws UnsupportedFileSystemException If the file system for
* absOrFqPath
is not supported.
*/
private AbstractFileSystem getFSofPath(final Path absOrFqPath)
throws UnsupportedFileSystemException {
checkNotSchemeWithRelative(absOrFqPath);
if (!absOrFqPath.isAbsolute() && absOrFqPath.toUri().getScheme() == null) {
throw new HadoopIllegalArgumentException(
"FileContext Bug: path is relative");
}
try {
// Is it the default FS for this FileContext?
defaultFS.checkPath(absOrFqPath);
return defaultFS;
} catch (Exception e) { // it is different FileSystem
return AbstractFileSystem.get(absOrFqPath.toUri(), conf);
}
}
/**
* Protected Static Factory methods for getting a FileContexts
* that take a AbstractFileSystem as input. To be used for testing.
*/
/**
* Create a FileContext with specified FS as default using the specified
* config.
*
* @param defFS
* @param aConf
* @return new FileContext with specifed FS as default.
*/
public static FileContext getFileContext(final AbstractFileSystem defFS,
final Configuration aConf) {
return new FileContext(defFS, FsPermission.getUMask(aConf), aConf);
}
/**
* Create a FileContext for specified file system using the default config.
*
* @param defaultFS
* @return a FileContext with the specified AbstractFileSystem
* as the default FS.
*/
protected static FileContext getFileContext(
final AbstractFileSystem defaultFS) {
return getFileContext(defaultFS, new Configuration());
}
/**
* Static Factory methods for getting a FileContext.
* Note new file contexts are created for each call.
* The only singleton is the local FS context using the default config.
*
* Methods that use the default config: the default config read from the
* $HADOOP_CONFIG/core.xml,
* Unspecified key-values for config are defaulted from core-defaults.xml
* in the release jar.
*
* The keys relevant to the FileContext layer are extracted at time of
* construction. Changes to the config after the call are ignore
* by the FileContext layer.
* The conf is passed to lower layers like AbstractFileSystem and HDFS which
* pick up their own config variables.
*/
/**
* Create a FileContext using the default config read from the
* $HADOOP_CONFIG/core.xml, Unspecified key-values for config are defaulted
* from core-defaults.xml in the release jar.
*
* @throws UnsupportedFileSystemException If the file system from the default
* configuration is not supported
*/
public static FileContext getFileContext()
throws UnsupportedFileSystemException {
return getFileContext(new Configuration());
}
/**
* @return a FileContext for the local file system using the default config.
* @throws UnsupportedFileSystemException If the file system for
* {@link FsConstants#LOCAL_FS_URI} is not supported.
*/
public static FileContext getLocalFSFileContext()
throws UnsupportedFileSystemException {
if (localFsSingleton == null) {
localFsSingleton = getFileContext(FsConstants.LOCAL_FS_URI);
}
return localFsSingleton;
}
/**
* Create a FileContext for specified URI using the default config.
*
* @param defaultFsUri
* @return a FileContext with the specified URI as the default FS.
*
* @throws UnsupportedFileSystemException If the file system for
* defaultFsUri
is not supported
*/
public static FileContext getFileContext(final URI defaultFsUri)
throws UnsupportedFileSystemException {
return getFileContext(defaultFsUri, new Configuration());
}
/**
* Create a FileContext for specified default URI using the specified config.
*
* @param defaultFsUri
* @param aConf
* @return new FileContext for specified uri
* @throws UnsupportedFileSystemException If the file system with specified is
* not supported
*/
public static FileContext getFileContext(final URI defaultFsUri,
final Configuration aConf) throws UnsupportedFileSystemException {
return getFileContext(AbstractFileSystem.get(defaultFsUri, aConf), aConf);
}
/**
* Create a FileContext using the passed config. Generally it is better to use
* {@link #getFileContext(URI, Configuration)} instead of this one.
*
*
* @param aConf
* @return new FileContext
* @throws UnsupportedFileSystemException If file system in the config
* is not supported
*/
public static FileContext getFileContext(final Configuration aConf)
throws UnsupportedFileSystemException {
return getFileContext(
URI.create(aConf.get(FS_DEFAULT_NAME_KEY, FS_DEFAULT_NAME_DEFAULT)),
aConf);
}
/**
* @param aConf - from which the FileContext is configured
* @return a FileContext for the local file system using the specified config.
*
* @throws UnsupportedFileSystemException If default file system in the config
* is not supported
*
*/
public static FileContext getLocalFSFileContext(final Configuration aConf)
throws UnsupportedFileSystemException {
return getFileContext(FsConstants.LOCAL_FS_URI, aConf);
}
/* This method is needed for tests. */
@InterfaceAudience.Private
@InterfaceStability.Unstable /* return type will change to AFS once
HADOOP-6223 is completed */
public AbstractFileSystem getDefaultFileSystem() {
return defaultFS;
}
/**
* Set the working directory for wd-relative names (such a "foo/bar"). Working
* directory feature is provided by simply prefixing relative names with the
* working dir. Note this is different from Unix where the wd is actually set
* to the inode. Hence setWorkingDir does not follow symlinks etc. This works
* better in a distributed environment that has multiple independent roots.
* {@link #getWorkingDirectory()} should return what setWorkingDir() set.
*
* @param newWDir new working directory
* @throws IOException
*
* NewWdir can be one of:
*
* - relative path: "foo/bar";
* - absolute without scheme: "/foo/bar"
* - fully qualified with scheme: "xx://auth/foo/bar"
*
*
* Illegal WDs:
*
* - relative with scheme: "xx:foo/bar"
* - non existent directory
*
*/
public void setWorkingDirectory(final Path newWDir) throws IOException {
checkNotSchemeWithRelative(newWDir);
/* wd is stored as a fully qualified path. We check if the given
* path is not relative first since resolve requires and returns
* an absolute path.
*/
final Path newWorkingDir = new Path(workingDir, newWDir);
FileStatus status = getFileStatus(newWorkingDir);
if (status.isFile()) {
throw new FileNotFoundException("Cannot setWD to a file");
}
workingDir = newWorkingDir;
}
/**
* Gets the working directory for wd-relative names (such a "foo/bar").
*/
public Path getWorkingDirectory() {
return workingDir;
}
/**
*
* @return the umask of this FileContext
*/
public FsPermission getUMask() {
return umask;
}
/**
* Set umask to the supplied parameter.
* @param newUmask the new umask
*/
public void setUMask(final FsPermission newUmask) {
umask = newUmask;
}
/**
* Make the path fully qualified if it is isn't.
* A Fully-qualified path has scheme and authority specified and an absolute
* path.
* Use the default file system and working dir in this FileContext to qualify.
* @param path
* @return qualified path
*/
public Path makeQualified(final Path path) {
return path.makeQualified(defaultFS.getUri(), getWorkingDirectory());
}
/**
* Create or overwrite file on indicated path and returns an output stream for
* writing into the file.
*
* @param f the file name to open
* @param createFlag gives the semantics of create: overwrite, append etc.
* @param opts file creation options; see {@link Options.CreateOpts}.
*
* - Progress - to report progress on the operation - default null
*
- Permission - umask is applied against permisssion: default is
* FsPermissions:getDefault()
*
*
- CreateParent - create missing parent path; default is to not
* to create parents
*
- The defaults for the following are SS defaults of the file
* server implementing the target path. Not all parameters make sense
* for all kinds of file system - eg. localFS ignores Blocksize,
* replication, checksum
*
* - BufferSize - buffersize used in FSDataOutputStream
*
- Blocksize - block size for file blocks
*
- ReplicationFactor - replication for blocks
*
- BytesPerChecksum - bytes per checksum
*
*
*
* @return {@link FSDataOutputStream} for created file
*
* @throws AccessControlException If access is denied
* @throws FileAlreadyExistsException If file f
already exists
* @throws FileNotFoundException If parent of f
does not exist
* and createParent
is false
* @throws ParentNotDirectoryException If parent of f
is not a
* directory.
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*
* RuntimeExceptions:
* @throws InvalidPathException If path f
is not valid
*/
public FSDataOutputStream create(final Path f,
final EnumSet createFlag, Options.CreateOpts... opts)
throws AccessControlException, FileAlreadyExistsException,
FileNotFoundException, ParentNotDirectoryException,
UnsupportedFileSystemException, IOException {
Path absF = fixRelativePart(f);
// If one of the options is a permission, extract it & apply umask
// If not, add a default Perms and apply umask;
// AbstractFileSystem#create
CreateOpts.Perms permOpt =
(CreateOpts.Perms) CreateOpts.getOpt(CreateOpts.Perms.class, opts);
FsPermission permission = (permOpt != null) ? permOpt.getValue() :
FsPermission.getDefault();
permission = permission.applyUMask(umask);
final CreateOpts[] updatedOpts =
CreateOpts.setOpt(CreateOpts.perms(permission), opts);
return new FSLinkResolver() {
public FSDataOutputStream next(final AbstractFileSystem fs, final Path p)
throws IOException {
return fs.create(p, createFlag, updatedOpts);
}
}.resolve(this, absF);
}
/**
* Make(create) a directory and all the non-existent parents.
*
* @param dir - the dir to make
* @param permission - permissions is set permission&~umask
* @param createParent - if true then missing parent dirs are created if false
* then parent must exist
*
* @throws AccessControlException If access is denied
* @throws FileAlreadyExistsException If directory dir
already
* exists
* @throws FileNotFoundException If parent of dir
does not exist
* and createParent
is false
* @throws ParentNotDirectoryException If parent of dir
is not a
* directory
* @throws UnsupportedFileSystemException If file system for dir
* is not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*
* RuntimeExceptions:
* @throws InvalidPathException If path dir
is not valid
*/
public void mkdir(final Path dir, final FsPermission permission,
final boolean createParent) throws AccessControlException,
FileAlreadyExistsException, FileNotFoundException,
ParentNotDirectoryException, UnsupportedFileSystemException,
IOException {
final Path absDir = fixRelativePart(dir);
final FsPermission absFerms = (permission == null ?
FsPermission.getDefault() : permission).applyUMask(umask);
new FSLinkResolver() {
public Void next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
fs.mkdir(p, absFerms, createParent);
return null;
}
}.resolve(this, absDir);
}
/**
* Delete a file.
* @param f the path to delete.
* @param recursive if path is a directory and set to
* true, the directory is deleted else throws an exception. In
* case of a file the recursive can be set to either true or false.
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*
* RuntimeExceptions:
* @throws InvalidPathException If path f
is invalid
*/
public boolean delete(final Path f, final boolean recursive)
throws AccessControlException, FileNotFoundException,
UnsupportedFileSystemException, IOException {
Path absF = fixRelativePart(f);
return new FSLinkResolver() {
public Boolean next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
return Boolean.valueOf(fs.delete(p, recursive));
}
}.resolve(this, absF);
}
/**
* Opens an FSDataInputStream at the indicated Path using
* default buffersize.
* @param f the file name to open
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If file f
does not exist
* @throws UnsupportedFileSystemException If file system for f
* is not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public FSDataInputStream open(final Path f) throws AccessControlException,
FileNotFoundException, UnsupportedFileSystemException, IOException {
final Path absF = fixRelativePart(f);
return new FSLinkResolver() {
public FSDataInputStream next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
return fs.open(p);
}
}.resolve(this, absF);
}
/**
* Opens an FSDataInputStream at the indicated Path.
*
* @param f the file name to open
* @param bufferSize the size of the buffer to be used.
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If file f
does not exist
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public FSDataInputStream open(final Path f, final int bufferSize)
throws AccessControlException, FileNotFoundException,
UnsupportedFileSystemException, IOException {
final Path absF = fixRelativePart(f);
return new FSLinkResolver() {
public FSDataInputStream next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
return fs.open(p, bufferSize);
}
}.resolve(this, absF);
}
/**
* Set replication for an existing file.
*
* @param f file name
* @param replication new replication
*
* @return true if successful
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If file f
does not exist
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public boolean setReplication(final Path f, final short replication)
throws AccessControlException, FileNotFoundException,
IOException {
final Path absF = fixRelativePart(f);
return new FSLinkResolver() {
public Boolean next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
return Boolean.valueOf(fs.setReplication(p, replication));
}
}.resolve(this, absF);
}
/**
* Renames Path src to Path dst
*
* - Fails if src is a file and dst is a directory.
*
- Fails if src is a directory and dst is a file.
*
- Fails if the parent of dst does not exist or is a file.
*
*
* If OVERWRITE option is not passed as an argument, rename fails if the dst
* already exists.
*
* If OVERWRITE option is passed as an argument, rename overwrites the dst if
* it is a file or an empty directory. Rename fails if dst is a non-empty
* directory.
*
* Note that atomicity of rename is dependent on the file system
* implementation. Please refer to the file system documentation for details
*
*
* @param src path to be renamed
* @param dst new path after rename
*
* @throws AccessControlException If access is denied
* @throws FileAlreadyExistsException If dst
already exists and
* options has {@link Rename#OVERWRITE} option
* false.
* @throws FileNotFoundException If src
does not exist
* @throws ParentNotDirectoryException If parent of dst
is not a
* directory
* @throws UnsupportedFileSystemException If file system for src
* and dst
is not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public void rename(final Path src, final Path dst,
final Options.Rename... options) throws AccessControlException,
FileAlreadyExistsException, FileNotFoundException,
ParentNotDirectoryException, UnsupportedFileSystemException,
IOException {
final Path absSrc = fixRelativePart(src);
final Path absDst = fixRelativePart(dst);
AbstractFileSystem srcFS = getFSofPath(absSrc);
AbstractFileSystem dstFS = getFSofPath(absDst);
if(!srcFS.getUri().equals(dstFS.getUri())) {
throw new IOException("Renames across AbstractFileSystems not supported");
}
try {
srcFS.rename(absSrc, absDst, options);
} catch (UnresolvedLinkException e) {
/* We do not know whether the source or the destination path
* was unresolved. Resolve the source path up until the final
* path component, then fully resolve the destination.
*/
final Path source = resolveIntermediate(absSrc);
new FSLinkResolver() {
public Void next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
fs.rename(source, p, options);
return null;
}
}.resolve(this, absDst);
}
}
/**
* Set permission of a path.
* @param f
* @param permission - the new absolute permission (umask is not applied)
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for f
* is not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public void setPermission(final Path f, final FsPermission permission)
throws AccessControlException, FileNotFoundException,
UnsupportedFileSystemException, IOException {
final Path absF = fixRelativePart(f);
new FSLinkResolver() {
public Void next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
fs.setPermission(p, permission);
return null;
}
}.resolve(this, absF);
}
/**
* Set owner of a path (i.e. a file or a directory). The parameters username
* and groupname cannot both be null.
*
* @param f The path
* @param username If it is null, the original username remains unchanged.
* @param groupname If it is null, the original groupname remains unchanged.
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*
* RuntimeExceptions:
* @throws HadoopIllegalArgumentException If username
or
* groupname
is invalid.
*/
public void setOwner(final Path f, final String username,
final String groupname) throws AccessControlException,
UnsupportedFileSystemException, FileNotFoundException,
IOException {
if ((username == null) && (groupname == null)) {
throw new HadoopIllegalArgumentException(
"username and groupname cannot both be null");
}
final Path absF = fixRelativePart(f);
new FSLinkResolver() {
public Void next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
fs.setOwner(p, username, groupname);
return null;
}
}.resolve(this, absF);
}
/**
* Set access time of a file.
* @param f The path
* @param mtime Set the modification time of this file.
* The number of milliseconds since epoch (Jan 1, 1970).
* A value of -1 means that this call should not set modification time.
* @param atime Set the access time of this file.
* The number of milliseconds since Jan 1, 1970.
* A value of -1 means that this call should not set access time.
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public void setTimes(final Path f, final long mtime, final long atime)
throws AccessControlException, FileNotFoundException,
UnsupportedFileSystemException, IOException {
final Path absF = fixRelativePart(f);
new FSLinkResolver() {
public Void next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
fs.setTimes(p, mtime, atime);
return null;
}
}.resolve(this, absF);
}
/**
* Get the checksum of a file.
*
* @param f file path
*
* @return The file checksum. The default return value is null,
* which indicates that no checksum algorithm is implemented
* in the corresponding FileSystem.
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public FileChecksum getFileChecksum(final Path f)
throws AccessControlException, FileNotFoundException,
IOException {
final Path absF = fixRelativePart(f);
return new FSLinkResolver() {
public FileChecksum next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
return fs.getFileChecksum(p);
}
}.resolve(this, absF);
}
/**
* Set the verify checksum flag for the file system denoted by the path.
* This is only applicable if the
* corresponding FileSystem supports checksum. By default doesn't do anything.
* @param verifyChecksum
* @param f set the verifyChecksum for the Filesystem containing this path
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public void setVerifyChecksum(final boolean verifyChecksum, final Path f)
throws AccessControlException, FileNotFoundException,
UnsupportedFileSystemException, IOException {
final Path absF = resolve(fixRelativePart(f));
getFSofPath(absF).setVerifyChecksum(verifyChecksum);
}
/**
* Return a file status object that represents the path.
* @param f The path we want information from
*
* @return a FileStatus object
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public FileStatus getFileStatus(final Path f) throws AccessControlException,
FileNotFoundException, UnsupportedFileSystemException, IOException {
final Path absF = fixRelativePart(f);
return new FSLinkResolver() {
public FileStatus next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
return fs.getFileStatus(p);
}
}.resolve(this, absF);
}
/**
* Return a fully qualified version of the given symlink target if it
* has no scheme and authority. Partially and fully qualified paths
* are returned unmodified.
* @param pathFS The AbstractFileSystem of the path
* @param pathWithLink Path that contains the symlink
* @param target The symlink's absolute target
* @return Fully qualified version of the target.
*/
private Path qualifySymlinkTarget(final AbstractFileSystem pathFS,
Path pathWithLink, Path target) {
/* NB: makeQualified uses the target's scheme and authority, if
* specified, and the scheme and authority of pathFS, if not. If
* the path does have a scheme and authority we assert they match
* those of pathFS since resolve updates the file system of a path
* that contains links each time a link is encountered.
*/
final String scheme = target.toUri().getScheme();
final String auth = target.toUri().getAuthority();
if (scheme != null && auth != null) {
assert scheme.equals(pathFS.getUri().getScheme());
assert auth.equals(pathFS.getUri().getAuthority());
}
return (scheme == null && auth == null)
? target.makeQualified(pathFS.getUri(), pathWithLink.getParent())
: target;
}
/**
* Return a file status object that represents the path. If the path
* refers to a symlink then the FileStatus of the symlink is returned.
* The behavior is equivalent to #getFileStatus() if the underlying
* file system does not support symbolic links.
* @param f The path we want information from.
* @return A FileStatus object
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If an I/O error occurred
*/
public FileStatus getFileLinkStatus(final Path f)
throws AccessControlException, FileNotFoundException,
UnsupportedFileSystemException, IOException {
final Path absF = fixRelativePart(f);
return new FSLinkResolver() {
public FileStatus next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
FileStatus fi = fs.getFileLinkStatus(p);
if (fi.isSymlink()) {
fi.setSymlink(qualifySymlinkTarget(fs, p, fi.getSymlink()));
}
return fi;
}
}.resolve(this, absF);
}
/**
* Returns the target of the given symbolic link as it was specified
* when the link was created. Links in the path leading up to the
* final path component are resolved transparently.
*
* @param f the path to return the target of
* @return The un-interpreted target of the symbolic link.
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If path f
does not exist
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If the given path does not refer to a symlink
* or an I/O error occurred
*/
public Path getLinkTarget(final Path f) throws AccessControlException,
FileNotFoundException, UnsupportedFileSystemException, IOException {
final Path absF = fixRelativePart(f);
return new FSLinkResolver() {
public Path next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
FileStatus fi = fs.getFileLinkStatus(p);
return fi.getSymlink();
}
}.resolve(this, absF);
}
/**
* Return blockLocation of the given file for the given offset and len.
* For a nonexistent file or regions, null will be returned.
*
* This call is most helpful with DFS, where it returns
* hostnames of machines that contain the given file.
*
* @param f - get blocklocations of this file
* @param start position (byte offset)
* @param len (in bytes)
*
* @return block locations for given file at specified offset of len
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*
* RuntimeExceptions:
* @throws InvalidPathException If path f
is invalid
*/
@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
@InterfaceStability.Evolving
public BlockLocation[] getFileBlockLocations(final Path f, final long start,
final long len) throws AccessControlException, FileNotFoundException,
UnsupportedFileSystemException, IOException {
final Path absF = fixRelativePart(f);
return new FSLinkResolver() {
public BlockLocation[] next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
return fs.getFileBlockLocations(p, start, len);
}
}.resolve(this, absF);
}
/**
* Returns a status object describing the use and capacity of the
* file system denoted by the Parh argument p.
* If the file system has multiple partitions, the
* use and capacity of the partition pointed to by the specified
* path is reflected.
*
* @param f Path for which status should be obtained. null means the
* root partition of the default file system.
*
* @return a FsStatus object
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public FsStatus getFsStatus(final Path f) throws AccessControlException,
FileNotFoundException, UnsupportedFileSystemException, IOException {
if (f == null) {
return defaultFS.getFsStatus();
}
final Path absF = fixRelativePart(f);
return new FSLinkResolver() {
public FsStatus next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
return fs.getFsStatus(p);
}
}.resolve(this, absF);
}
/**
* Creates a symbolic link to an existing file. An exception is thrown if
* the symlink exits, the user does not have permission to create symlink,
* or the underlying file system does not support symlinks.
*
* Symlink permissions are ignored, access to a symlink is determined by
* the permissions of the symlink target.
*
* Symlinks in paths leading up to the final path component are resolved
* transparently. If the final path component refers to a symlink some
* functions operate on the symlink itself, these are:
* - delete(f) and deleteOnExit(f) - Deletes the symlink.
* - rename(src, dst) - If src refers to a symlink, the symlink is
* renamed. If dst refers to a symlink, the symlink is over-written.
* - getLinkTarget(f) - Returns the target of the symlink.
* - getFileLinkStatus(f) - Returns a FileStatus object describing
* the symlink.
* Some functions, create() and mkdir(), expect the final path component
* does not exist. If they are given a path that refers to a symlink that
* does exist they behave as if the path referred to an existing file or
* directory. All other functions fully resolve, ie follow, the symlink.
* These are: open, setReplication, setOwner, setTimes, setWorkingDirectory,
* setPermission, getFileChecksum, setVerifyChecksum, getFileBlockLocations,
* getFsStatus, getFileStatus, exists, and listStatus.
*
* Symlink targets are stored as given to createSymlink, assuming the
* underlying file system is capable of storing a fully qualified URI.
* Dangling symlinks are permitted. FileContext supports four types of
* symlink targets, and resolves them as follows
*
* Given a path referring to a symlink of form:
*
* <---X--->
* fs://host/A/B/link
* <-----Y----->
*
* In this path X is the scheme and authority that identify the file system,
* and Y is the path leading up to the final path component "link". If Y is
* a symlink itself then let Y' be the target of Y and X' be the scheme and
* authority of Y'. Symlink targets may:
*
* 1. Fully qualified URIs
*
* fs://hostX/A/B/file Resolved according to the target file system.
*
* 2. Partially qualified URIs (eg scheme but no host)
*
* fs:///A/B/file Resolved according to the target file sytem. Eg resolving
* a symlink to hdfs:///A results in an exception because
* HDFS URIs must be fully qualified, while a symlink to
* file:///A will not since Hadoop's local file systems
* require partially qualified URIs.
*
* 3. Relative paths
*
* path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path
* is "../B/file" then [Y'][path] is hdfs://host/B/file
*
* 4. Absolute paths
*
* path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path
* is "/file" then [X][path] is hdfs://host/file
*
*
* @param target the target of the symbolic link
* @param link the path to be created that points to target
* @param createParent if true then missing parent dirs are created if
* false then parent must exist
*
*
* @throws AccessControlException If access is denied
* @throws FileAlreadyExistsException If file linkcode> already exists
* @throws FileNotFoundException If target
does not exist
* @throws ParentNotDirectoryException If parent of link
is not a
* directory.
* @throws UnsupportedFileSystemException If file system for
* target
or link
is not supported
* @throws IOException If an I/O error occurred
*/
public void createSymlink(final Path target, final Path link,
final boolean createParent) throws AccessControlException,
FileAlreadyExistsException, FileNotFoundException,
ParentNotDirectoryException, UnsupportedFileSystemException,
IOException {
final Path nonRelLink = fixRelativePart(link);
new FSLinkResolver() {
public Void next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
fs.createSymlink(target, p, createParent);
return null;
}
}.resolve(this, nonRelLink);
}
/**
* List the statuses of the files/directories in the given path if the path is
* a directory.
*
* @param f is the path
*
* @return an iterator that traverses statuses of the files/directories
* in the given path
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public RemoteIterator listStatus(final Path f) throws
AccessControlException, FileNotFoundException,
UnsupportedFileSystemException, IOException {
final Path absF = fixRelativePart(f);
return new FSLinkResolver>() {
public RemoteIterator next(
final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
return fs.listStatusIterator(p);
}
}.resolve(this, absF);
}
/**
* List the statuses of the files/directories in the given path if the path is
* a directory.
* Return the file's status and block locations If the path is a file.
*
* If a returned status is a file, it contains the file's block locations.
*
* @param f is the path
*
* @return an iterator that traverses statuses of the files/directories
* in the given path
* If any IO exception (for example the input directory gets deleted while
* listing is being executed), next() or hasNext() of the returned iterator
* may throw a RuntimeException with the io exception as the cause.
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public RemoteIterator listLocatedStatus(
final Path f) throws
AccessControlException, FileNotFoundException,
UnsupportedFileSystemException, IOException {
final Path absF = fixRelativePart(f);
return new FSLinkResolver>() {
public RemoteIterator next(
final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
return fs.listLocatedStatus(p);
}
}.resolve(this, absF);
}
/**
* Mark a path to be deleted on JVM shutdown.
*
* @param f the existing path to delete.
*
* @return true if deleteOnExit is successful, otherwise false.
*
* @throws AccessControlException If access is denied
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public boolean deleteOnExit(Path f) throws AccessControlException,
IOException {
if (!this.util().exists(f)) {
return false;
}
synchronized (DELETE_ON_EXIT) {
if (DELETE_ON_EXIT.isEmpty() && !FINALIZER.isAlive()) {
Runtime.getRuntime().addShutdownHook(FINALIZER);
}
Set set = DELETE_ON_EXIT.get(this);
if (set == null) {
set = new TreeSet();
DELETE_ON_EXIT.put(this, set);
}
set.add(f);
}
return true;
}
private final Util util;
public Util util() {
return util;
}
/**
* Utility/library methods built over the basic FileContext methods.
* Since this are library functions, the oprtation are not atomic
* and some of them may partially complete if other threads are making
* changes to the same part of the name space.
*/
public class Util {
/**
* Does the file exist?
* Note: Avoid using this method if you already have FileStatus in hand.
* Instead reuse the FileStatus
* @param f the file or dir to be checked
*
* @throws AccessControlException If access is denied
* @throws IOException If an I/O error occurred
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public boolean exists(final Path f) throws AccessControlException,
UnsupportedFileSystemException, IOException {
try {
FileStatus fs = FileContext.this.getFileStatus(f);
assert fs != null;
return true;
} catch (FileNotFoundException e) {
return false;
}
}
/**
* Return a list of file status objects that corresponds to supplied paths
* excluding those non-existent paths.
*
* @param paths list of paths we want information from
*
* @return a list of FileStatus objects
*
* @throws AccessControlException If access is denied
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
private FileStatus[] getFileStatus(Path[] paths)
throws AccessControlException, IOException {
if (paths == null) {
return null;
}
ArrayList results = new ArrayList(paths.length);
for (int i = 0; i < paths.length; i++) {
try {
results.add(FileContext.this.getFileStatus(paths[i]));
} catch (FileNotFoundException fnfe) {
// ignoring
}
}
return results.toArray(new FileStatus[results.size()]);
}
/**
* Return the {@link ContentSummary} of path f.
* @param f path
*
* @return the {@link ContentSummary} of path f.
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for
* f
is not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public ContentSummary getContentSummary(Path f)
throws AccessControlException, FileNotFoundException,
UnsupportedFileSystemException, IOException {
FileStatus status = FileContext.this.getFileStatus(f);
if (status.isFile()) {
return new ContentSummary(status.getLen(), 1, 0);
}
long[] summary = {0, 0, 1};
RemoteIterator statusIterator =
FileContext.this.listStatus(f);
while(statusIterator.hasNext()) {
FileStatus s = statusIterator.next();
ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) :
new ContentSummary(s.getLen(), 1, 0);
summary[0] += c.getLength();
summary[1] += c.getFileCount();
summary[2] += c.getDirectoryCount();
}
return new ContentSummary(summary[0], summary[1], summary[2]);
}
/**
* See {@link #listStatus(Path[], PathFilter)}
*/
public FileStatus[] listStatus(Path[] files) throws AccessControlException,
FileNotFoundException, IOException {
return listStatus(files, DEFAULT_FILTER);
}
/**
* Filter files/directories in the given path using the user-supplied path
* filter.
*
* @param f is the path name
* @param filter is the user-supplied path filter
*
* @return an array of FileStatus objects for the files under the given path
* after applying the filter
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for
* pathPattern
is not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public FileStatus[] listStatus(Path f, PathFilter filter)
throws AccessControlException, FileNotFoundException,
UnsupportedFileSystemException, IOException {
ArrayList results = new ArrayList();
listStatus(results, f, filter);
return results.toArray(new FileStatus[results.size()]);
}
/**
* Filter files/directories in the given list of paths using user-supplied
* path filter.
*
* @param files is a list of paths
* @param filter is the filter
*
* @return a list of statuses for the files under the given paths after
* applying the filter
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If a file in files
does not
* exist
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public FileStatus[] listStatus(Path[] files, PathFilter filter)
throws AccessControlException, FileNotFoundException, IOException {
ArrayList results = new ArrayList();
for (int i = 0; i < files.length; i++) {
listStatus(results, files[i], filter);
}
return results.toArray(new FileStatus[results.size()]);
}
/*
* Filter files/directories in the given path using the user-supplied path
* filter. Results are added to the given array results
.
*/
private void listStatus(ArrayList results, Path f,
PathFilter filter) throws AccessControlException,
FileNotFoundException, IOException {
FileStatus[] listing = listStatus(f);
if (listing != null) {
for (int i = 0; i < listing.length; i++) {
if (filter.accept(listing[i].getPath())) {
results.add(listing[i]);
}
}
}
}
/**
* List the statuses of the files/directories in the given path
* if the path is a directory.
*
* @param f is the path
*
* @return an array that contains statuses of the files/directories
* in the given path
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for f
is
* not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public FileStatus[] listStatus(final Path f) throws AccessControlException,
FileNotFoundException, UnsupportedFileSystemException,
IOException {
final Path absF = fixRelativePart(f);
return new FSLinkResolver() {
public FileStatus[] next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
return fs.listStatus(p);
}
}.resolve(FileContext.this, absF);
}
/**
* List the statuses and block locations of the files in the given path.
*
* If the path is a directory,
* if recursive is false, returns files in the directory;
* if recursive is true, return files in the subtree rooted at the path.
* The subtree is traversed in the depth-first order.
* If the path is a file, return the file's status and block locations.
* Files across symbolic links are also returned.
*
* @param f is the path
* @param recursive if the subdirectories need to be traversed recursively
*
* @return an iterator that traverses statuses of the files
* If any IO exception (for example a sub-directory gets deleted while
* listing is being executed), next() or hasNext() of the returned iterator
* may throw a RuntimeException with the IO exception as the cause.
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If f
does not exist
* @throws UnsupportedFileSystemException If file system for f
* is not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public RemoteIterator listFiles(
final Path f, final boolean recursive) throws AccessControlException,
FileNotFoundException, UnsupportedFileSystemException,
IOException {
return new RemoteIterator() {
private Stack> itors =
new Stack>();
RemoteIterator curItor = listLocatedStatus(f);
LocatedFileStatus curFile;
/**
* Returns true if the iterator has more files.
*
* @return true if the iterator has more files.
* @throws AccessControlException if not allowed to access next
* file's status or locations
* @throws FileNotFoundException if next file does not exist any more
* @throws UnsupportedFileSystemException if next file's
* fs is unsupported
* @throws IOException for all other IO errors
* for example, NameNode is not avaialbe or
* NameNode throws IOException due to an error
* while getting the status or block locations
*/
@Override
public boolean hasNext() throws IOException {
while (curFile == null) {
if (curItor.hasNext()) {
handleFileStat(curItor.next());
} else if (!itors.empty()) {
curItor = itors.pop();
} else {
return false;
}
}
return true;
}
/**
* Process the input stat.
* If it is a file, return the file stat.
* If it is a directory, traverse the directory if recursive is true;
* ignore it if recursive is false.
* If it is a symlink, resolve the symlink first and then process it
* depending on if it is a file or directory.
* @param stat input status
* @throws AccessControlException if access is denied
* @throws FileNotFoundException if file is not found
* @throws UnsupportedFileSystemException if fs is not supported
* @throws IOException for all other IO errors
*/
private void handleFileStat(LocatedFileStatus stat)
throws IOException {
if (stat.isFile()) { // file
curFile = stat;
} else if (stat.isSymlink()) { // symbolic link
// resolve symbolic link
FileStatus symstat = FileContext.this.getFileStatus(
stat.getSymlink());
if (symstat.isFile() || (recursive && symstat.isDirectory())) {
itors.push(curItor);
curItor = listLocatedStatus(stat.getPath());
}
} else if (recursive) { // directory
itors.push(curItor);
curItor = listLocatedStatus(stat.getPath());
}
}
/**
* Returns the next file's status with its block locations
*
* @throws AccessControlException if not allowed to access next
* file's status or locations
* @throws FileNotFoundException if next file does not exist any more
* @throws UnsupportedFileSystemException if next file's
* fs is unsupported
* @throws IOException for all other IO errors
* for example, NameNode is not avaialbe or
* NameNode throws IOException due to an error
* while getting the status or block locations
*/
@Override
public LocatedFileStatus next() throws IOException {
if (hasNext()) {
LocatedFileStatus result = curFile;
curFile = null;
return result;
}
throw new java.util.NoSuchElementException("No more entry in " + f);
}
};
}
/**
* Return all the files that match filePattern and are not checksum
* files. Results are sorted by their names.
*
*
* A filename pattern is composed of regular characters and
* special pattern matching characters, which are:
*
*
* -
*
*
*
- ?
*
- Matches any single character.
*
*
*
- *
*
- Matches zero or more characters.
*
*
*
- [abc]
*
- Matches a single character from character set
* {a,b,c}.
*
*
*
- [a-b]
*
- Matches a single character from the character range
* {a...b}. Note: character a must be
* lexicographically less than or equal to character b.
*
*
*
- [^a]
*
- Matches a single char that is not from character set or range
* {a}. Note that the ^ character must occur
* immediately to the right of the opening bracket.
*
*
*
- \c
*
- Removes (escapes) any special meaning of character c.
*
*
*
- {ab,cd}
*
- Matches a string from the string set {ab, cd}
*
*
*
- {ab,c{de,fh}}
*
- Matches a string from string set {ab, cde, cfh}
*
*
*
*
*
* @param pathPattern a regular expression specifying a pth pattern
*
* @return an array of paths that match the path pattern
*
* @throws AccessControlException If access is denied
* @throws UnsupportedFileSystemException If file system for
* pathPattern
is not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public FileStatus[] globStatus(Path pathPattern)
throws AccessControlException, UnsupportedFileSystemException,
IOException {
return globStatus(pathPattern, DEFAULT_FILTER);
}
/**
* Return an array of FileStatus objects whose path names match pathPattern
* and is accepted by the user-supplied path filter. Results are sorted by
* their path names.
* Return null if pathPattern has no glob and the path does not exist.
* Return an empty array if pathPattern has a glob and no path matches it.
*
* @param pathPattern regular expression specifying the path pattern
* @param filter user-supplied path filter
*
* @return an array of FileStatus objects
*
* @throws AccessControlException If access is denied
* @throws UnsupportedFileSystemException If file system for
* pathPattern
is not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public FileStatus[] globStatus(final Path pathPattern,
final PathFilter filter) throws AccessControlException,
UnsupportedFileSystemException, IOException {
URI uri = getFSofPath(fixRelativePart(pathPattern)).getUri();
String filename = pathPattern.toUri().getPath();
List filePatterns = GlobExpander.expand(filename);
if (filePatterns.size() == 1) {
Path absPathPattern = fixRelativePart(pathPattern);
return globStatusInternal(uri, new Path(absPathPattern.toUri()
.getPath()), filter);
} else {
List results = new ArrayList();
for (String iFilePattern : filePatterns) {
Path iAbsFilePattern = fixRelativePart(new Path(iFilePattern));
FileStatus[] files = globStatusInternal(uri, iAbsFilePattern, filter);
for (FileStatus file : files) {
results.add(file);
}
}
return results.toArray(new FileStatus[results.size()]);
}
}
/**
*
* @param uri for all the inPathPattern
* @param inPathPattern - without the scheme & authority (take from uri)
* @param filter
*
* @return an array of FileStatus objects
*
* @throws AccessControlException If access is denied
* @throws IOException If an I/O error occurred
*/
private FileStatus[] globStatusInternal(final URI uri,
final Path inPathPattern, final PathFilter filter)
throws AccessControlException, IOException
{
Path[] parents = new Path[1];
int level = 0;
assert(inPathPattern.toUri().getScheme() == null &&
inPathPattern.toUri().getAuthority() == null &&
inPathPattern.isUriPathAbsolute());
String filename = inPathPattern.toUri().getPath();
// path has only zero component
if ("".equals(filename) || Path.SEPARATOR.equals(filename)) {
Path p = inPathPattern.makeQualified(uri, null);
return getFileStatus(new Path[]{p});
}
// path has at least one component
String[] components = filename.split(Path.SEPARATOR);
// Path is absolute, first component is "/" hence first component
// is the uri root
parents[0] = new Path(new Path(uri), new Path("/"));
level = 1;
// glob the paths that match the parent path, ie. [0, components.length-1]
boolean[] hasGlob = new boolean[]{false};
Path[] relParentPaths =
globPathsLevel(parents, components, level, hasGlob);
FileStatus[] results;
if (relParentPaths == null || relParentPaths.length == 0) {
results = null;
} else {
// fix the pathes to be abs
Path[] parentPaths = new Path [relParentPaths.length];
for(int i=0; i filteredPaths =
new ArrayList(parentPaths.length);
for (int i = 0; i < parentPaths.length; i++) {
parentPaths[i] = new Path(parentPaths[i],
components[components.length - 1]);
if (fp.accept(parentPaths[i])) {
filteredPaths.add(parentPaths[i]);
}
}
// get all their statuses
results = getFileStatus(
filteredPaths.toArray(new Path[filteredPaths.size()]));
}
}
// Decide if the pathPattern contains a glob or not
if (results == null) {
if (hasGlob[0]) {
results = new FileStatus[0];
}
} else {
if (results.length == 0) {
if (!hasGlob[0]) {
results = null;
}
} else {
Arrays.sort(results);
}
}
return results;
}
/*
* For a path of N components, return a list of paths that match the
* components [level
, N-1
].
*/
private Path[] globPathsLevel(Path[] parents, String[] filePattern,
int level, boolean[] hasGlob) throws AccessControlException,
FileNotFoundException, IOException {
if (level == filePattern.length - 1) {
return parents;
}
if (parents == null || parents.length == 0) {
return null;
}
GlobFilter fp = new GlobFilter(filePattern[level]);
if (fp.hasPattern()) {
parents = FileUtil.stat2Paths(listStatus(parents, fp));
hasGlob[0] = true;
} else {
for (int i = 0; i < parents.length; i++) {
parents[i] = new Path(parents[i], filePattern[level]);
}
}
return globPathsLevel(parents, filePattern, level + 1, hasGlob);
}
/**
* Copy file from src to dest. See
* {@link #copy(Path, Path, boolean, boolean)}
*/
public boolean copy(final Path src, final Path dst)
throws AccessControlException, FileAlreadyExistsException,
FileNotFoundException, ParentNotDirectoryException,
UnsupportedFileSystemException, IOException {
return copy(src, dst, false, false);
}
/**
* Copy from src to dst, optionally deleting src and overwriting dst.
* @param src
* @param dst
* @param deleteSource - delete src if true
* @param overwrite overwrite dst if true; throw IOException if dst exists
* and overwrite is false.
*
* @return true if copy is successful
*
* @throws AccessControlException If access is denied
* @throws FileAlreadyExistsException If dst
already exists
* @throws FileNotFoundException If src
does not exist
* @throws ParentNotDirectoryException If parent of dst
is not
* a directory
* @throws UnsupportedFileSystemException If file system for
* src
or dst
is not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*
* RuntimeExceptions:
* @throws InvalidPathException If path dst
is invalid
*/
public boolean copy(final Path src, final Path dst, boolean deleteSource,
boolean overwrite) throws AccessControlException,
FileAlreadyExistsException, FileNotFoundException,
ParentNotDirectoryException, UnsupportedFileSystemException,
IOException {
checkNotSchemeWithRelative(src);
checkNotSchemeWithRelative(dst);
Path qSrc = makeQualified(src);
Path qDst = makeQualified(dst);
checkDest(qSrc.getName(), qDst, overwrite);
FileStatus fs = FileContext.this.getFileStatus(qSrc);
if (fs.isDirectory()) {
checkDependencies(qSrc, qDst);
mkdir(qDst, FsPermission.getDefault(), true);
FileStatus[] contents = listStatus(qSrc);
for (FileStatus content : contents) {
copy(makeQualified(content.getPath()), makeQualified(new Path(qDst,
content.getPath().getName())), deleteSource, overwrite);
}
} else {
InputStream in=null;
OutputStream out = null;
try {
in = open(qSrc);
out = create(qDst, EnumSet.of(CreateFlag.OVERWRITE));
IOUtils.copyBytes(in, out, conf, true);
} catch (IOException e) {
IOUtils.closeStream(out);
IOUtils.closeStream(in);
throw e;
}
}
if (deleteSource) {
return delete(qSrc, true);
} else {
return true;
}
}
}
/**
* Check if copying srcName to dst would overwrite an existing
* file or directory.
* @param srcName File or directory to be copied.
* @param dst Destination to copy srcName to.
* @param overwrite Whether it's ok to overwrite an existing file.
* @throws AccessControlException If access is denied.
* @throws IOException If dst is an existing directory, or dst is an
* existing file and the overwrite option is not passed.
*/
private void checkDest(String srcName, Path dst, boolean overwrite)
throws AccessControlException, IOException {
try {
FileStatus dstFs = getFileStatus(dst);
if (dstFs.isDirectory()) {
if (null == srcName) {
throw new IOException("Target " + dst + " is a directory");
}
// Recurse to check if dst/srcName exists.
checkDest(null, new Path(dst, srcName), overwrite);
} else if (!overwrite) {
throw new IOException("Target " + new Path(dst, srcName)
+ " already exists");
}
} catch (FileNotFoundException e) {
// dst does not exist - OK to copy.
}
}
//
// If the destination is a subdirectory of the source, then
// generate exception
//
private static void checkDependencies(Path qualSrc, Path qualDst)
throws IOException {
if (isSameFS(qualSrc, qualDst)) {
String srcq = qualSrc.toString() + Path.SEPARATOR;
String dstq = qualDst.toString() + Path.SEPARATOR;
if (dstq.startsWith(srcq)) {
if (srcq.length() == dstq.length()) {
throw new IOException("Cannot copy " + qualSrc + " to itself.");
} else {
throw new IOException("Cannot copy " + qualSrc +
" to its subdirectory " + qualDst);
}
}
}
}
/**
* Are qualSrc and qualDst of the same file system?
* @param qualPath1 - fully qualified path
* @param qualPath2 - fully qualified path
* @return
*/
private static boolean isSameFS(Path qualPath1, Path qualPath2) {
URI srcUri = qualPath1.toUri();
URI dstUri = qualPath2.toUri();
return (srcUri.getScheme().equals(dstUri.getScheme()) &&
!(srcUri.getAuthority() != null && dstUri.getAuthority() != null && srcUri
.getAuthority().equals(dstUri.getAuthority())));
}
/**
* Deletes all the paths in deleteOnExit on JVM shutdown.
*/
static class FileContextFinalizer extends Thread {
public synchronized void run() {
processDeleteOnExit();
}
}
/**
* Resolves all symbolic links in the specified path.
* Returns the new path object.
*/
protected Path resolve(final Path f) throws IOException {
return new FSLinkResolver() {
public FileStatus next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
return fs.getFileStatus(p);
}
}.resolve(this, f).getPath();
}
/**
* Resolves all symbolic links in the specified path leading up
* to, but not including the final path component.
* @param f path to resolve
* @return the new path object.
*/
protected Path resolveIntermediate(final Path f) throws IOException {
return new FSLinkResolver() {
public FileStatus next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException {
return fs.getFileLinkStatus(p);
}
}.resolve(this, f).getPath();
}
/**
* Class used to perform an operation on and resolve symlinks in a
* path. The operation may potentially span multiple file systems.
*/
protected abstract class FSLinkResolver {
// The maximum number of symbolic link components in a path
private static final int MAX_PATH_LINKS = 32;
/**
* Generic helper function overridden on instantiation to perform a
* specific operation on the given file system using the given path
* which may result in an UnresolvedLinkException.
* @param fs AbstractFileSystem to perform the operation on.
* @param p Path given the file system.
* @return Generic type determined by the specific implementation.
* @throws UnresolvedLinkException If symbolic link path
could
* not be resolved
* @throws IOException an I/O error occured
*/
public abstract T next(final AbstractFileSystem fs, final Path p)
throws IOException, UnresolvedLinkException;
/**
* Performs the operation specified by the next function, calling it
* repeatedly until all symlinks in the given path are resolved.
* @param fc FileContext used to access file systems.
* @param p The path to resolve symlinks in.
* @return Generic type determined by the implementation of next.
* @throws IOException
*/
public T resolve(final FileContext fc, Path p) throws IOException {
int count = 0;
T in = null;
Path first = p;
// NB: More than one AbstractFileSystem can match a scheme, eg
// "file" resolves to LocalFs but could have come by RawLocalFs.
AbstractFileSystem fs = fc.getFSofPath(p);
// Loop until all symlinks are resolved or the limit is reached
for (boolean isLink = true; isLink;) {
try {
in = next(fs, p);
isLink = false;
} catch (UnresolvedLinkException e) {
if (count++ > MAX_PATH_LINKS) {
throw new IOException("Possible cyclic loop while " +
"following symbolic link " + first);
}
// Resolve the first unresolved path component
p = qualifySymlinkTarget(fs, p, fs.getLinkTarget(p));
fs = fc.getFSofPath(p);
}
}
return in;
}
}
}