org.apache.hadoop.fs.shell.CopyCommands Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.shell;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathIsDirectoryException;
import org.apache.hadoop.io.IOUtils;
/** Various commands for copy files */
@InterfaceAudience.Private
@InterfaceStability.Evolving
class CopyCommands {
public static void registerCommands(CommandFactory factory) {
factory.addClass(Merge.class, "-getmerge");
factory.addClass(Cp.class, "-cp");
factory.addClass(CopyFromLocal.class, "-copyFromLocal");
factory.addClass(CopyToLocal.class, "-copyToLocal");
factory.addClass(Get.class, "-get");
factory.addClass(Put.class, "-put");
factory.addClass(AppendToFile.class, "-appendToFile");
}
/** merge multiple files together */
public static class Merge extends FsCommand {
public static final String NAME = "getmerge";
public static final String USAGE = "[-nl] [-skip-empty-file] "
+ " ";
public static final String DESCRIPTION =
"Get all the files in the directories that "
+ "match the source file pattern and merge and sort them to only "
+ "one file on local fs. is kept.\n"
+ "-nl: Add a newline character at the end of each file.\n"
+ "-skip-empty-file: Do not add new line character for empty file.";
protected PathData dst = null;
protected String delimiter = null;
private boolean skipEmptyFileDelimiter;
protected List srcs = null;
@Override
protected void processOptions(LinkedList args) throws IOException {
try {
CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "nl",
"skip-empty-file");
cf.parse(args);
delimiter = cf.getOpt("nl") ? "\n" : null;
skipEmptyFileDelimiter = cf.getOpt("skip-empty-file");
dst = new PathData(new URI(args.removeLast()), getConf());
if (dst.exists && dst.stat.isDirectory()) {
throw new PathIsDirectoryException(dst.toString());
}
srcs = new LinkedList();
} catch (URISyntaxException e) {
throw new IOException("unexpected URISyntaxException", e);
}
}
@Override
protected void processArguments(LinkedList items)
throws IOException {
super.processArguments(items);
if (exitCode != 0) { // check for error collecting paths
return;
}
FSDataOutputStream out = dst.fs.create(dst.path);
try {
for (PathData src : srcs) {
if (src.stat.getLen() != 0) {
// Always do sequential reads.
try (FSDataInputStream in = src.openForSequentialIO()) {
IOUtils.copyBytes(in, out, getConf(), false);
writeDelimiter(out);
}
} else if (!skipEmptyFileDelimiter) {
writeDelimiter(out);
}
}
} finally {
out.close();
}
}
private void writeDelimiter(FSDataOutputStream out) throws IOException {
if (delimiter != null) {
out.write(delimiter.getBytes(StandardCharsets.UTF_8));
}
}
@Override
protected void processNonexistentPath(PathData item) throws IOException {
exitCode = 1; // flag that a path is bad
super.processNonexistentPath(item);
}
// this command is handled a bit differently than others. the paths
// are batched up instead of actually being processed. this avoids
// unnecessarily streaming into the merge file and then encountering
// a path error that should abort the merge
@Override
protected void processPath(PathData src) throws IOException {
// for directories, recurse one level to get its files, else skip it
if (src.stat.isDirectory()) {
if (getDepth() == 0) {
recursePath(src);
} // skip subdirs
} else {
srcs.add(src);
}
}
@Override
protected boolean isSorted() {
//Sort the children for merge
return true;
}
}
static class Cp extends CopyCommandWithMultiThread {
public static final String NAME = "cp";
public static final String USAGE =
"[-f] [-p | -p[topax]] [-d] [-t ]"
+ " [-q ] ... ";
public static final String DESCRIPTION =
"Copy files that match the file pattern to a destination."
+ " When copying multiple files, the destination must be a "
+ "directory.\nFlags :\n"
+ " -p[topax] : Preserve file attributes [topx] (timestamps, "
+ "ownership, permission, ACL, XAttr). If -p is specified with "
+ "no arg, then preserves timestamps, ownership, permission. "
+ "If -pa is specified, then preserves permission also because "
+ "ACL is a super-set of permission. Determination of whether raw "
+ "namespace extended attributes are preserved is independent of "
+ "the -p flag.\n"
+ " -f : Overwrite the destination if it already exists.\n"
+ " -d : Skip creation of temporary file(._COPYING_).\n"
+ " -t : Number of threads to be used, "
+ "default is 1.\n"
+ " -q : Thread pool queue size to be "
+ "used, default is 1024.\n";
@Override
protected void processOptions(LinkedList args) throws IOException {
popPreserveOption(args);
CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "f", "d");
cf.addOptionWithValue("t");
cf.addOptionWithValue("q");
cf.parse(args);
setDirectWrite(cf.getOpt("d"));
setOverwrite(cf.getOpt("f"));
setThreadCount(cf.getOptValue("t"));
setThreadPoolQueueSize(cf.getOptValue("q"));
// should have a -r option
setRecursive(true);
getRemoteDestination(args);
}
private void popPreserveOption(List args) {
for (Iterator iter = args.iterator(); iter.hasNext(); ) {
String cur = iter.next();
if (cur.equals("--")) {
// stop parsing arguments when you see --
break;
} else if (cur.startsWith("-p")) {
iter.remove();
if (cur.length() == 2) {
setPreserve(true);
} else {
String attributes = cur.substring(2);
for (int index = 0; index < attributes.length(); index++) {
preserve(FileAttribute.getAttribute(attributes.charAt(index)));
}
}
return;
}
}
}
}
/**
* Copy local files to a remote filesystem
*/
public static class Get extends CopyCommandWithMultiThread {
public static final String NAME = "get";
public static final String USAGE =
"[-f] [-p] [-crc] [-ignoreCrc] [-t ]"
+ " [-q ] ... ";
public static final String DESCRIPTION =
"Copy files that match the file pattern to the local name. "
+ " is kept.\nWhen copying multiple files, the destination"
+ " must be a directory.\nFlags:\n"
+ " -p : Preserves timestamps, ownership and the mode.\n"
+ " -f : Overwrites the destination if it already exists.\n"
+ " -crc : write CRC checksums for the files downloaded.\n"
+ " -ignoreCrc : Skip CRC checks on the file(s) downloaded.\n"
+ " -t : Number of threads to be used,"
+ " default is 1.\n"
+ " -q : Thread pool queue size to be"
+ " used, default is 1024.\n";
@Override
protected void processOptions(LinkedList args) throws IOException {
CommandFormat cf =
new CommandFormat(1, Integer.MAX_VALUE, "crc", "ignoreCrc", "p", "f");
cf.addOptionWithValue("t");
cf.addOptionWithValue("q");
cf.parse(args);
setWriteChecksum(cf.getOpt("crc"));
setVerifyChecksum(!cf.getOpt("ignoreCrc"));
setPreserve(cf.getOpt("p"));
setOverwrite(cf.getOpt("f"));
setThreadCount(cf.getOptValue("t"));
setThreadPoolQueueSize(cf.getOptValue("q"));
setRecursive(true);
getLocalDestination(args);
}
}
/**
* Copy local files to a remote filesystem
*/
public static class Put extends CopyCommandWithMultiThread {
public static final String NAME = "put";
public static final String USAGE =
"[-f] [-p] [-l] [-d] [-t ] [-q ]"
+ " ... ";
public static final String DESCRIPTION =
"Copy files from the local file system " +
"into fs. Copying fails if the file already " +
"exists, unless the -f flag is given.\n" +
"Flags:\n" +
" -p : Preserves timestamps, ownership and the mode.\n" +
" -f : Overwrites the destination if it already exists.\n" +
" -t : Number of threads to be used, default is 1.\n" +
" -q : Thread pool queue size to be used, " +
"default is 1024.\n" +
" -l : Allow DataNode to lazily persist the file to disk. Forces " +
"replication factor of 1. This flag will result in reduced " +
"durability. Use with care.\n" +
" -d : Skip creation of temporary file(._COPYING_).\n";
@Override
protected void processOptions(LinkedList args) throws IOException {
CommandFormat cf =
new CommandFormat(1, Integer.MAX_VALUE, "f", "p", "l", "d");
cf.addOptionWithValue("t");
cf.addOptionWithValue("q");
cf.parse(args);
setThreadCount(cf.getOptValue("t"));
setThreadPoolQueueSize(cf.getOptValue("q"));
setOverwrite(cf.getOpt("f"));
setPreserve(cf.getOpt("p"));
setLazyPersist(cf.getOpt("l"));
setDirectWrite(cf.getOpt("d"));
getRemoteDestination(args);
// should have a -r option
setRecursive(true);
}
// commands operating on local paths have no need for glob expansion
@Override
protected List expandArgument(String arg) throws IOException {
List items = new LinkedList();
try {
items.add(new PathData(new URI(arg), getConf()));
} catch (URISyntaxException e) {
items.add(new PathData(arg, getConf()));
}
return items;
}
@Override
protected void processArguments(LinkedList args)
throws IOException {
// NOTE: this logic should be better, mimics previous implementation
if (args.size() == 1 && args.get(0).toString().equals("-")) {
copyStreamToTarget(System.in, getTargetPath(args.get(0)));
return;
}
super.processArguments(args);
}
}
public static class CopyFromLocal extends Put {
public static final String NAME = "copyFromLocal";
public static final String USAGE = Put.USAGE;
public static final String DESCRIPTION = "Identical to the -put command.";
}
public static class CopyToLocal extends Get {
public static final String NAME = "copyToLocal";
public static final String USAGE = Get.USAGE;
public static final String DESCRIPTION = "Identical to the -get command.";
}
/**
* Append the contents of one or more local files to a remote
* file.
*/
public static class AppendToFile extends CommandWithDestination {
public static final String NAME = "appendToFile";
public static final String USAGE = "[-n] ... ";
public static final String DESCRIPTION =
"Appends the contents of all the given local files to the " +
"given dst file. The dst file will be created if it does " +
"not exist. If is -, then the input is read " +
"from stdin. Option -n represents that use NEW_BLOCK create flag to append file.";
private static final int DEFAULT_IO_LENGTH = 1024 * 1024;
boolean readStdin = false;
private boolean appendToNewBlock = false;
public boolean isAppendToNewBlock() {
return appendToNewBlock;
}
public void setAppendToNewBlock(boolean appendToNewBlock) {
this.appendToNewBlock = appendToNewBlock;
}
// commands operating on local paths have no need for glob expansion
@Override
protected List expandArgument(String arg) throws IOException {
List items = new LinkedList();
if (arg.equals("-")) {
readStdin = true;
} else {
try {
items.add(new PathData(new URI(arg), getConf()));
} catch (URISyntaxException e) {
if (Path.WINDOWS) {
// Unlike URI, PathData knows how to parse Windows drive-letter paths.
items.add(new PathData(arg, getConf()));
} else {
throw new IOException("Unexpected URISyntaxException: " + e.toString());
}
}
}
return items;
}
@Override
protected void processOptions(LinkedList args)
throws IOException {
if (args.size() < 2) {
throw new IOException("missing destination argument");
}
CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "n");
cf.parse(args);
appendToNewBlock = cf.getOpt("n");
getRemoteDestination(args);
super.processOptions(args);
}
@Override
protected void processArguments(LinkedList args)
throws IOException {
if (!dst.exists) {
dst.fs.create(dst.path, false).close();
}
InputStream is = null;
try (FSDataOutputStream fos = appendToNewBlock ?
dst.fs.append(dst.path, true) : dst.fs.append(dst.path)) {
if (readStdin) {
if (args.size() == 0) {
IOUtils.copyBytes(System.in, fos, DEFAULT_IO_LENGTH);
} else {
throw new IOException(
"stdin (-) must be the sole input argument when present");
}
}
// Read in each input file and write to the target.
for (PathData source : args) {
is = Files.newInputStream(source.toFile().toPath());
IOUtils.copyBytes(is, fos, DEFAULT_IO_LENGTH);
IOUtils.closeStream(is);
is = null;
}
} finally {
if (is != null) {
IOUtils.closeStream(is);
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy