org.apache.hadoop.fs.viewfs.ViewFileSystemOverloadScheme Maven / Gradle / Ivy
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.hadoop.fs.viewfs;
import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME;
import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_MOUNTTABLE_LOADER_IMPL;
import static org.apache.hadoop.fs.viewfs.Constants.DEFAULT_MOUNT_TABLE_CONFIG_LOADER_IMPL;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsConstants;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.util.ReflectionUtils;
* This class is extended from the ViewFileSystem for the overloaded
* scheme file system. Mount link configurations and in-memory mount table
* building behaviors are inherited from ViewFileSystem. Unlike
* ViewFileSystem scheme (viewfs://), the users would be able to use
* any scheme.
* To use this class, the following configurations need to be added in
* core-site.xml file.
* 1) fs.{@literal }.impl
* = org.apache.hadoop.fs.viewfs.ViewFileSystemOverloadScheme
* 2){@literal }.impl
* = {@literal >}
* Here {@literal } can be any scheme, but with that scheme there
* should be a hadoop compatible file system available. Second configuration
* value should be the respective scheme's file system implementation class.
* Example: if scheme is configured with "hdfs", then the 2nd configuration
* class name will be org.apache.hadoop.hdfs.DistributedFileSystem.
* if scheme is configured with "s3a", then the 2nd configuration class name
* will be org.apache.hadoop.fs.s3a.S3AFileSystem.
* Use Case 1:
* ===========
* If users want some of their existing cluster (hdfs://Cluster)
* data to mount with other hdfs and object store clusters(hdfs://NN1,
* o3fs://bucket1.volume1/, s3a://bucket1/)
* = hdfs://NN1/user
* = o3fs://bucket1.volume1/data
* = s3a://bucket1/backup/
* Op1: Create file hdfs://Cluster/user/fileA will go to hdfs://NN1/user/fileA
* Op2: Create file hdfs://Cluster/data/datafile will go to
* o3fs://bucket1.volume1/data/datafile
* Op3: Create file hdfs://Cluster/backup/ will go to
* s3a://bucket1/backup/
* Use Case 2:
* ===========
* If users want some of their existing cluster (s3a://bucketA/)
* data to mount with other hdfs and object store clusters
* (hdfs://NN1, o3fs://bucket1.volume1/)
* = hdfs://NN1/user
* = o3fs://bucket1.volume1/data
* = s3a://bucketA/salesDB/
* Op1: Create file s3a://bucketA/user/fileA will go to hdfs://NN1/user/fileA
* Op2: Create file s3a://bucketA/data/datafile will go to
* o3fs://bucket1.volume1/data/datafile
* Op3: Create file s3a://bucketA/salesDB/dbfile will go to
* s3a://bucketA/salesDB/dbfile
* Note:
* (1) In ViewFileSystemOverloadScheme, by default the mount links will be
* represented as non-symlinks. If you want to change this behavior, please see
* {@link ViewFileSystem#listStatus(Path)}
* (2) In ViewFileSystemOverloadScheme, only the initialized uri's hostname will
* be considered as the mount table name. When the passed uri has hostname:port,
* it will simply ignore the port number and only hostname will be considered as
* the mount table name.
* (3) If there are no mount links configured with the initializing uri's
* hostname as the mount table name, then it will automatically consider the
* current uri as fallback( ex:
* {@literal fs.viewfs.mounttable..linkFallback}) target fs uri.
@InterfaceAudience.LimitedPrivate({ "MapReduce", "HBase", "Hive" })
public class ViewFileSystemOverloadScheme extends ViewFileSystem {
private URI myUri;
private boolean supportAutoAddingFallbackOnNoMounts = true;
public ViewFileSystemOverloadScheme() throws IOException {
public String getScheme() {
return myUri.getScheme();
* By default returns false as ViewFileSystemOverloadScheme supports auto
* adding fallback on no mounts.
public boolean supportAutoAddingFallbackOnNoMounts() {
return this.supportAutoAddingFallbackOnNoMounts;
* Sets whether to add fallback automatically when no mount points found.
* @param addAutoFallbackOnNoMounts addAutoFallbackOnNoMounts.
public void setSupportAutoAddingFallbackOnNoMounts(
boolean addAutoFallbackOnNoMounts) {
this.supportAutoAddingFallbackOnNoMounts = addAutoFallbackOnNoMounts;
public void initialize(URI theUri, Configuration conf) throws IOException {
this.myUri = theUri;
if (LOG.isDebugEnabled()) {
LOG.debug("Initializing the ViewFileSystemOverloadScheme with the uri: "
+ theUri);
String mountTableConfigPath =
/* The default value to false in ViewFSOverloadScheme */
/* the default value to true in ViewFSOverloadScheme */
if (null != mountTableConfigPath) {
MountTableConfigLoader loader = getMountTableConfigLoader(conf);
loader.load(mountTableConfigPath, conf);
} else {
// TODO: Should we fail here.?
if (LOG.isDebugEnabled()) {
"Missing configuration for fs.viewfs.mounttable.path. Proceeding"
+ "with core-site.xml mount-table information if avaialable.");
super.initialize(theUri, conf);
private MountTableConfigLoader getMountTableConfigLoader(
final Configuration conf) {
Class extends MountTableConfigLoader> clazz =
if (clazz == null) {
throw new RuntimeException(
String.format("Errors on getting mount table loader class. "
+ "The fs.viewfs.mounttable.config.loader.impl conf is %s. ",
try {
MountTableConfigLoader mountTableConfigLoader =
ReflectionUtils.newInstance(clazz, conf);
return mountTableConfigLoader;
} catch (Exception e) {
throw new RuntimeException(e);
* This method is overridden because in ViewFileSystemOverloadScheme if
* overloaded scheme matches with mounted target fs scheme, file system
* should be created without going into {@literal fs..impl} based
* resolution. Otherwise it will end up in an infinite loop as the target
* will be resolved again to ViewFileSystemOverloadScheme as
* {@literal fs..impl} points to ViewFileSystemOverloadScheme.
* So, below method will initialize the
* {@literal}.
* Other schemes can follow fs.newInstance
protected FsGetter fsGetter() {
return new ChildFsGetter(getScheme());
* This class checks whether the rooScheme is same as URI scheme. If both are
* same, then it will initialize file systems by using the configured
* {@literal} class.
static class ChildFsGetter extends FsGetter {
private final String rootScheme;
ChildFsGetter(String rootScheme) {
this.rootScheme = rootScheme;
public FileSystem getNewInstance(URI uri, Configuration conf)
throws IOException {
if (uri.getScheme().equals(this.rootScheme)) {
if (LOG.isDebugEnabled()) {
"The file system initialized uri scheme is matching with the "
+ "given target uri scheme. The target uri is: " + uri);
* Avoid looping when target fs scheme is matching to overloaded scheme.
return createFileSystem(uri, conf);
} else {
return FileSystem.newInstance(uri, conf);
* When ViewFileSystemOverloadScheme scheme and target uri scheme are
* matching, it will not take advantage of FileSystem cache as it will
* create instance directly. For caching needs please set
* "fs.viewfs.enable.inner.cache" to true.
public FileSystem get(URI uri, Configuration conf) throws IOException {
if (uri.getScheme().equals(this.rootScheme)) {
// Avoid looping when target fs scheme is matching to overloaded
// scheme.
if (LOG.isDebugEnabled()) {
"The file system initialized uri scheme is matching with the "
+ "given target uri scheme. So, the target file system "
+ "instances will not be cached. To cache fs instances, "
+ "please set fs.viewfs.enable.inner.cache to true. "
+ "The target uri is: " + uri);
return createFileSystem(uri, conf);
} else {
return FileSystem.get(uri, conf);
private FileSystem createFileSystem(URI uri, Configuration conf)
throws IOException {
final String fsImplConf = String.format(
Class> clazz = conf.getClass(fsImplConf, null);
if (clazz == null) {
throw new UnsupportedFileSystemException(
String.format("%s=null: %s: %s", fsImplConf,
"No overload scheme fs configured", uri.getScheme()));
FileSystem fs = (FileSystem) newInstance(clazz, uri, conf);
fs.initialize(uri, conf);
return fs;
private T newInstance(Class theClass, URI uri, Configuration conf) {
T result;
try {
Constructor meth = theClass.getConstructor();
result = meth.newInstance();
} catch (InvocationTargetException e) {
Throwable cause = e.getCause();
if (cause instanceof RuntimeException) {
throw (RuntimeException) cause;
} else {
throw new RuntimeException(cause);
} catch (Exception e) {
throw new RuntimeException(e);
return result;
* This is an admin only API to give access to its child raw file system, if
* the path is link. If the given path is an internal directory(path is from
* mount paths tree), it will initialize the file system of given path uri
* directly. If path cannot be resolved to any internal directory or link, it
* will throw NotInMountpointException. Please note, this API will not return
* chrooted file system. Instead, this API will get actual raw file system
* instances.
* @param path - fs uri path
* @param conf - configuration
* @throws IOException raised on errors performing I/O.
* @return file system.
public FileSystem getRawFileSystem(Path path, Configuration conf)
throws IOException {
InodeTree.ResolveResult res;
try {
res = fsState.resolve(getUriPath(path), true);
return res.isInternalDir() ? fsGetter().get(path.toUri(), conf)
: ((ChRootedFileSystem) res.targetFileSystem).getMyFs();
} catch (FileNotFoundException e) {
// No link configured with passed path.
throw new NotInMountpointException(path,
"No link found for the given path.");
* Gets the mount path info, which contains the target file system and
* remaining path to pass to the target file system.
* @param path the path.
* @param conf configuration.
* @return mount path info.
* @throws IOException raised on errors performing I/O.
public MountPathInfo getMountPathInfo(Path path,
Configuration conf) throws IOException {
InodeTree.ResolveResult res;
try {
res = fsState.resolve(getUriPath(path), true);
FileSystem fs = res.isInternalDir() ?
(fsState.getRootFallbackLink() != null ?
fsState.getRootFallbackLink().getTargetFileSystem() :
fsGetter().get(path.toUri(), conf)) :
if (fs instanceof ChRootedFileSystem) {
ChRootedFileSystem chFs = (ChRootedFileSystem) fs;
return new MountPathInfo<>(chFs.fullPath(res.remainingPath),
return new MountPathInfo(res.remainingPath, fs);
} catch (FileNotFoundException e) {
// No link configured with passed path.
throw new NotInMountpointException(path,
"No link found for the given path.");
* A class to maintain the target file system and a path to pass to the target
* file system.
public static class MountPathInfo {
private Path pathOnTarget;
private T targetFs;
public MountPathInfo(Path pathOnTarget, T targetFs) {
this.pathOnTarget = pathOnTarget;
this.targetFs = targetFs;
public Path getPathOnTarget() {
return this.pathOnTarget;
public T getTargetFs() {
return this.targetFs;
* @return Gets the fallback file system configured. Usually, this will be the
* default cluster.
public FileSystem getFallbackFileSystem() {
if (fsState.getRootFallbackLink() == null) {
return null;
try {
return ((ChRootedFileSystem) fsState.getRootFallbackLink()
} catch (IOException ex) {
LOG.error("Could not get fallback filesystem ");
return null;
public URI canonicalizeUri(URI uri) {
return super.canonicalizeUri(uri);