All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.fs.ftp.FTPFileSystem Maven / Gradle / Ivy

There is a newer version: 3.4.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.fs.ftp;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.ConnectException;
import java.net.URI;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.apache.commons.net.ftp.FTP;
import org.apache.commons.net.ftp.FTPClient;
import org.apache.commons.net.ftp.FTPFile;
import org.apache.commons.net.ftp.FTPReply;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.ParentNotDirectoryException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.util.Progressable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 

* A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. *

*/ @InterfaceAudience.Public @InterfaceStability.Stable public class FTPFileSystem extends FileSystem { public static final Logger LOG = LoggerFactory .getLogger(FTPFileSystem.class); public static final int DEFAULT_BUFFER_SIZE = 1024 * 1024; public static final int DEFAULT_BLOCK_SIZE = 4 * 1024; public static final long DEFAULT_TIMEOUT = 0; public static final String FS_FTP_USER_PREFIX = "fs.ftp.user."; public static final String FS_FTP_HOST = "fs.ftp.host"; public static final String FS_FTP_HOST_PORT = "fs.ftp.host.port"; public static final String FS_FTP_PASSWORD_PREFIX = "fs.ftp.password."; public static final String FS_FTP_DATA_CONNECTION_MODE = "fs.ftp.data.connection.mode"; public static final String FS_FTP_TRANSFER_MODE = "fs.ftp.transfer.mode"; public static final String E_SAME_DIRECTORY_ONLY = "only same directory renames are supported"; public static final String FS_FTP_TIMEOUT = "fs.ftp.timeout"; private URI uri; /** * Return the protocol scheme for the FileSystem. *

* * @return ftp */ @Override public String getScheme() { return "ftp"; } /** * Get the default port for this FTPFileSystem. * * @return the default port */ @Override protected int getDefaultPort() { return FTP.DEFAULT_PORT; } @Override public void initialize(URI uri, Configuration conf) throws IOException { // get super.initialize(uri, conf); // get host information from uri (overrides info in conf) String host = uri.getHost(); host = (host == null) ? conf.get(FS_FTP_HOST, null) : host; if (host == null) { throw new IOException("Invalid host specified"); } conf.set(FS_FTP_HOST, host); // get port information from uri, (overrides info in conf) int port = uri.getPort(); port = (port == -1) ? FTP.DEFAULT_PORT : port; conf.setInt(FS_FTP_HOST_PORT, port); // get user/password information from URI (overrides info in conf) String userAndPassword = uri.getUserInfo(); if (userAndPassword == null) { userAndPassword = (conf.get(FS_FTP_USER_PREFIX + host, null) + ":" + conf .get(FS_FTP_PASSWORD_PREFIX + host, null)); } String[] userPasswdInfo = userAndPassword.split(":"); Preconditions.checkState(userPasswdInfo.length > 1, "Invalid username / password"); conf.set(FS_FTP_USER_PREFIX + host, userPasswdInfo[0]); conf.set(FS_FTP_PASSWORD_PREFIX + host, userPasswdInfo[1]); setConf(conf); this.uri = uri; } /** * Connect to the FTP server using configuration parameters * * * @return An FTPClient instance * @throws IOException */ private FTPClient connect() throws IOException { FTPClient client = null; Configuration conf = getConf(); String host = conf.get(FS_FTP_HOST); int port = conf.getInt(FS_FTP_HOST_PORT, FTP.DEFAULT_PORT); String user = conf.get(FS_FTP_USER_PREFIX + host); String password = conf.get(FS_FTP_PASSWORD_PREFIX + host); client = new FTPClient(); client.connect(host, port); int reply = client.getReplyCode(); if (!FTPReply.isPositiveCompletion(reply)) { throw NetUtils.wrapException(host, port, NetUtils.UNKNOWN_HOST, 0, new ConnectException("Server response " + reply)); } else if (client.login(user, password)) { client.setFileTransferMode(getTransferMode(conf)); client.setFileType(FTP.BINARY_FILE_TYPE); client.setBufferSize(DEFAULT_BUFFER_SIZE); setTimeout(client, conf); setDataConnectionMode(client, conf); } else { throw new IOException("Login failed on server - " + host + ", port - " + port + " as user '" + user + "'"); } return client; } /** * Set the FTPClient's timeout based on configuration. * FS_FTP_TIMEOUT is set as timeout (defaults to DEFAULT_TIMEOUT). */ @VisibleForTesting void setTimeout(FTPClient client, Configuration conf) { long timeout = conf.getLong(FS_FTP_TIMEOUT, DEFAULT_TIMEOUT); client.setControlKeepAliveTimeout(timeout); } /** * Set FTP's transfer mode based on configuration. Valid values are * STREAM_TRANSFER_MODE, BLOCK_TRANSFER_MODE and COMPRESSED_TRANSFER_MODE. *

* Defaults to BLOCK_TRANSFER_MODE. * * @param conf * @return */ @VisibleForTesting int getTransferMode(Configuration conf) { final String mode = conf.get(FS_FTP_TRANSFER_MODE); // FTP default is STREAM_TRANSFER_MODE, but Hadoop FTPFS's default is // FTP.BLOCK_TRANSFER_MODE historically. int ret = FTP.BLOCK_TRANSFER_MODE; if (mode == null) { return ret; } final String upper = mode.toUpperCase(); if (upper.equals("STREAM_TRANSFER_MODE")) { ret = FTP.STREAM_TRANSFER_MODE; } else if (upper.equals("COMPRESSED_TRANSFER_MODE")) { ret = FTP.COMPRESSED_TRANSFER_MODE; } else { if (!upper.equals("BLOCK_TRANSFER_MODE")) { LOG.warn("Cannot parse the value for " + FS_FTP_TRANSFER_MODE + ": " + mode + ". Using default."); } } return ret; } /** * Set the FTPClient's data connection mode based on configuration. Valid * values are ACTIVE_LOCAL_DATA_CONNECTION_MODE, * PASSIVE_LOCAL_DATA_CONNECTION_MODE and PASSIVE_REMOTE_DATA_CONNECTION_MODE. *

* Defaults to ACTIVE_LOCAL_DATA_CONNECTION_MODE. * * @param client * @param conf * @throws IOException */ @VisibleForTesting void setDataConnectionMode(FTPClient client, Configuration conf) throws IOException { final String mode = conf.get(FS_FTP_DATA_CONNECTION_MODE); if (mode == null) { return; } final String upper = mode.toUpperCase(); if (upper.equals("PASSIVE_LOCAL_DATA_CONNECTION_MODE")) { client.enterLocalPassiveMode(); } else if (upper.equals("PASSIVE_REMOTE_DATA_CONNECTION_MODE")) { client.enterRemotePassiveMode(); } else { if (!upper.equals("ACTIVE_LOCAL_DATA_CONNECTION_MODE")) { LOG.warn("Cannot parse the value for " + FS_FTP_DATA_CONNECTION_MODE + ": " + mode + ". Using default."); } } } /** * Logout and disconnect the given FTPClient. * * * @param client * @throws IOException */ private void disconnect(FTPClient client) throws IOException { if (client != null) { if (!client.isConnected()) { throw new FTPException("Client not connected"); } boolean logoutSuccess = client.logout(); client.disconnect(); if (!logoutSuccess) { LOG.warn("Logout failed while disconnecting, error code - " + client.getReplyCode()); } } } /** * Resolve against given working directory. * * * @param workDir * @param path * @return */ private Path makeAbsolute(Path workDir, Path path) { if (path.isAbsolute()) { return path; } return new Path(workDir, path); } @Override public FSDataInputStream open(Path file, int bufferSize) throws IOException { FTPClient client = connect(); Path workDir = new Path(client.printWorkingDirectory()); Path absolute = makeAbsolute(workDir, file); FileStatus fileStat = getFileStatus(client, absolute); if (fileStat.isDirectory()) { disconnect(client); throw new FileNotFoundException("Path " + file + " is a directory."); } client.allocate(bufferSize); Path parent = absolute.getParent(); // Change to parent directory on the // server. Only then can we read the // file // on the server by opening up an InputStream. As a side effect the working // directory on the server is changed to the parent directory of the file. // The FTP client connection is closed when close() is called on the // FSDataInputStream. client.changeWorkingDirectory(parent.toUri().getPath()); InputStream is = client.retrieveFileStream(file.getName()); FSDataInputStream fis = new FSDataInputStream(new FTPInputStream(is, client, statistics)); if (!FTPReply.isPositivePreliminary(client.getReplyCode())) { // The ftpClient is an inconsistent state. Must close the stream // which in turn will logout and disconnect from FTP server fis.close(); throw new IOException("Unable to open file: " + file + ", Aborting"); } return fis; } /** * A stream obtained via this call must be closed before using other APIs of * this class or else the invocation will block. */ @Override public FSDataOutputStream create(Path file, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { final FTPClient client = connect(); Path workDir = new Path(client.printWorkingDirectory()); Path absolute = makeAbsolute(workDir, file); FileStatus status; try { status = getFileStatus(client, file); } catch (FileNotFoundException fnfe) { status = null; } if (status != null) { if (overwrite && !status.isDirectory()) { delete(client, file, false); } else { disconnect(client); throw new FileAlreadyExistsException("File already exists: " + file); } } Path parent = absolute.getParent(); if (parent == null || !mkdirs(client, parent, FsPermission.getDirDefault())) { parent = (parent == null) ? new Path("/") : parent; disconnect(client); throw new IOException("create(): Mkdirs failed to create: " + parent); } client.allocate(bufferSize); // Change to parent directory on the server. Only then can we write to the // file on the server by opening up an OutputStream. As a side effect the // working directory on the server is changed to the parent directory of the // file. The FTP client connection is closed when close() is called on the // FSDataOutputStream. client.changeWorkingDirectory(parent.toUri().getPath()); FSDataOutputStream fos = new FSDataOutputStream(client.storeFileStream(file .getName()), statistics) { @Override public void close() throws IOException { super.close(); if (!client.isConnected()) { throw new FTPException("Client not connected"); } boolean cmdCompleted = client.completePendingCommand(); disconnect(client); if (!cmdCompleted) { throw new FTPException("Could not complete transfer, Reply Code - " + client.getReplyCode()); } } }; if (!FTPReply.isPositivePreliminary(client.getReplyCode())) { // The ftpClient is an inconsistent state. Must close the stream // which in turn will logout and disconnect from FTP server fos.close(); throw new IOException("Unable to create file: " + file + ", Aborting"); } return fos; } /** This optional operation is not yet supported. */ @Override public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException { throw new UnsupportedOperationException("Append is not supported " + "by FTPFileSystem"); } /** * Convenience method, so that we don't open a new connection when using this * method from within another method. Otherwise every API invocation incurs * the overhead of opening/closing a TCP connection. * @throws IOException on IO problems other than FileNotFoundException */ private boolean exists(FTPClient client, Path file) throws IOException { try { getFileStatus(client, file); return true; } catch (FileNotFoundException fnfe) { return false; } } @Override public boolean delete(Path file, boolean recursive) throws IOException { FTPClient client = connect(); try { boolean success = delete(client, file, recursive); return success; } finally { disconnect(client); } } /** * Convenience method, so that we don't open a new connection when using this * method from within another method. Otherwise every API invocation incurs * the overhead of opening/closing a TCP connection. */ private boolean delete(FTPClient client, Path file, boolean recursive) throws IOException { Path workDir = new Path(client.printWorkingDirectory()); Path absolute = makeAbsolute(workDir, file); String pathName = absolute.toUri().getPath(); try { FileStatus fileStat = getFileStatus(client, absolute); if (fileStat.isFile()) { return client.deleteFile(pathName); } } catch (FileNotFoundException e) { //the file is not there return false; } FileStatus[] dirEntries = listStatus(client, absolute); if (dirEntries != null && dirEntries.length > 0 && !(recursive)) { throw new IOException("Directory: " + file + " is not empty."); } for (FileStatus dirEntry : dirEntries) { delete(client, new Path(absolute, dirEntry.getPath()), recursive); } return client.removeDirectory(pathName); } @VisibleForTesting FsAction getFsAction(int accessGroup, FTPFile ftpFile) { FsAction action = FsAction.NONE; if (ftpFile.hasPermission(accessGroup, FTPFile.READ_PERMISSION)) { action = action.or(FsAction.READ); } if (ftpFile.hasPermission(accessGroup, FTPFile.WRITE_PERMISSION)) { action = action.or(FsAction.WRITE); } if (ftpFile.hasPermission(accessGroup, FTPFile.EXECUTE_PERMISSION)) { action = action.or(FsAction.EXECUTE); } return action; } private FsPermission getPermissions(FTPFile ftpFile) { FsAction user, group, others; user = getFsAction(FTPFile.USER_ACCESS, ftpFile); group = getFsAction(FTPFile.GROUP_ACCESS, ftpFile); others = getFsAction(FTPFile.WORLD_ACCESS, ftpFile); return new FsPermission(user, group, others); } @Override public URI getUri() { return uri; } @Override public FileStatus[] listStatus(Path file) throws IOException { FTPClient client = connect(); try { FileStatus[] stats = listStatus(client, file); return stats; } finally { disconnect(client); } } /** * Convenience method, so that we don't open a new connection when using this * method from within another method. Otherwise every API invocation incurs * the overhead of opening/closing a TCP connection. */ private FileStatus[] listStatus(FTPClient client, Path file) throws IOException { Path workDir = new Path(client.printWorkingDirectory()); Path absolute = makeAbsolute(workDir, file); FileStatus fileStat = getFileStatus(client, absolute); if (fileStat.isFile()) { return new FileStatus[] { fileStat }; } FTPFile[] ftpFiles = client.listFiles(absolute.toUri().getPath()); FileStatus[] fileStats = new FileStatus[ftpFiles.length]; for (int i = 0; i < ftpFiles.length; i++) { fileStats[i] = getFileStatus(ftpFiles[i], absolute); } return fileStats; } @Override public FileStatus getFileStatus(Path file) throws IOException { FTPClient client = connect(); try { FileStatus status = getFileStatus(client, file); return status; } finally { disconnect(client); } } /** * Convenience method, so that we don't open a new connection when using this * method from within another method. Otherwise every API invocation incurs * the overhead of opening/closing a TCP connection. */ private FileStatus getFileStatus(FTPClient client, Path file) throws IOException { FileStatus fileStat = null; Path workDir = new Path(client.printWorkingDirectory()); Path absolute = makeAbsolute(workDir, file); Path parentPath = absolute.getParent(); if (parentPath == null) { // root dir long length = -1; // Length of root dir on server not known boolean isDir = true; int blockReplication = 1; long blockSize = DEFAULT_BLOCK_SIZE; // Block Size not known. long modTime = -1; // Modification time of root dir not known. Path root = new Path("/"); return new FileStatus(length, isDir, blockReplication, blockSize, modTime, this.makeQualified(root)); } String pathName = parentPath.toUri().getPath(); FTPFile[] ftpFiles = client.listFiles(pathName); if (ftpFiles != null) { for (FTPFile ftpFile : ftpFiles) { if (ftpFile.getName().equals(file.getName())) { // file found in dir fileStat = getFileStatus(ftpFile, parentPath); break; } } if (fileStat == null) { throw new FileNotFoundException("File " + file + " does not exist."); } } else { throw new FileNotFoundException("File " + file + " does not exist."); } return fileStat; } /** * Convert the file information in FTPFile to a {@link FileStatus} object. * * * @param ftpFile * @param parentPath * @return FileStatus */ private FileStatus getFileStatus(FTPFile ftpFile, Path parentPath) { long length = ftpFile.getSize(); boolean isDir = ftpFile.isDirectory(); int blockReplication = 1; // Using default block size since there is no way in FTP client to know of // block sizes on server. The assumption could be less than ideal. long blockSize = DEFAULT_BLOCK_SIZE; long modTime = ftpFile.getTimestamp().getTimeInMillis(); long accessTime = 0; FsPermission permission = getPermissions(ftpFile); String user = ftpFile.getUser(); String group = ftpFile.getGroup(); Path filePath = new Path(parentPath, ftpFile.getName()); return new FileStatus(length, isDir, blockReplication, blockSize, modTime, accessTime, permission, user, group, this.makeQualified(filePath)); } @Override public boolean mkdirs(Path file, FsPermission permission) throws IOException { FTPClient client = connect(); try { boolean success = mkdirs(client, file, permission); return success; } finally { disconnect(client); } } /** * Convenience method, so that we don't open a new connection when using this * method from within another method. Otherwise every API invocation incurs * the overhead of opening/closing a TCP connection. */ private boolean mkdirs(FTPClient client, Path file, FsPermission permission) throws IOException { boolean created = true; Path workDir = new Path(client.printWorkingDirectory()); Path absolute = makeAbsolute(workDir, file); String pathName = absolute.getName(); if (!exists(client, absolute)) { Path parent = absolute.getParent(); created = (parent == null || mkdirs(client, parent, FsPermission .getDirDefault())); if (created) { String parentDir = parent.toUri().getPath(); client.changeWorkingDirectory(parentDir); created = created && client.makeDirectory(pathName); } } else if (isFile(client, absolute)) { throw new ParentNotDirectoryException(String.format( "Can't make directory for path %s since it is a file.", absolute)); } return created; } /** * Convenience method, so that we don't open a new connection when using this * method from within another method. Otherwise every API invocation incurs * the overhead of opening/closing a TCP connection. */ private boolean isFile(FTPClient client, Path file) { try { return getFileStatus(client, file).isFile(); } catch (FileNotFoundException e) { return false; // file does not exist } catch (IOException ioe) { throw new FTPException("File check failed", ioe); } } /* * Assuming that parent of both source and destination is the same. Is the * assumption correct or it is suppose to work like 'move' ? */ @Override public boolean rename(Path src, Path dst) throws IOException { FTPClient client = connect(); try { boolean success = rename(client, src, dst); return success; } finally { disconnect(client); } } /** * Probe for a path being a parent of another * @param parent parent path * @param child possible child path * @return true if the parent's path matches the start of the child's */ private boolean isParentOf(Path parent, Path child) { URI parentURI = parent.toUri(); String parentPath = parentURI.getPath(); if (!parentPath.endsWith("/")) { parentPath += "/"; } URI childURI = child.toUri(); String childPath = childURI.getPath(); return childPath.startsWith(parentPath); } /** * Convenience method, so that we don't open a new connection when using this * method from within another method. Otherwise every API invocation incurs * the overhead of opening/closing a TCP connection. * * @param client * @param src * @param dst * @return * @throws IOException */ @SuppressWarnings("deprecation") private boolean rename(FTPClient client, Path src, Path dst) throws IOException { Path workDir = new Path(client.printWorkingDirectory()); Path absoluteSrc = makeAbsolute(workDir, src); Path absoluteDst = makeAbsolute(workDir, dst); if (!exists(client, absoluteSrc)) { throw new FileNotFoundException("Source path " + src + " does not exist"); } if (isDirectory(absoluteDst)) { // destination is a directory: rename goes underneath it with the // source name absoluteDst = new Path(absoluteDst, absoluteSrc.getName()); } if (exists(client, absoluteDst)) { throw new FileAlreadyExistsException("Destination path " + dst + " already exists"); } String parentSrc = absoluteSrc.getParent().toUri().toString(); String parentDst = absoluteDst.getParent().toUri().toString(); if (isParentOf(absoluteSrc, absoluteDst)) { throw new IOException("Cannot rename " + absoluteSrc + " under itself" + " : "+ absoluteDst); } if (!parentSrc.equals(parentDst)) { throw new IOException("Cannot rename source: " + absoluteSrc + " to " + absoluteDst + " -"+ E_SAME_DIRECTORY_ONLY); } String from = absoluteSrc.getName(); String to = absoluteDst.getName(); client.changeWorkingDirectory(parentSrc); boolean renamed = client.rename(from, to); return renamed; } @Override public Path getWorkingDirectory() { // Return home directory always since we do not maintain state. return getHomeDirectory(); } @Override public Path getHomeDirectory() { FTPClient client = null; try { client = connect(); Path homeDir = new Path(client.printWorkingDirectory()); return homeDir; } catch (IOException ioe) { throw new FTPException("Failed to get home directory", ioe); } finally { try { disconnect(client); } catch (IOException ioe) { throw new FTPException("Failed to disconnect", ioe); } } } @Override public void setWorkingDirectory(Path newDir) { // we do not maintain the working directory state } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy