htsjdk.samtools.util.IOUtil Maven / Gradle / Ivy
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.samtools.util;
import htsjdk.samtools.Defaults;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.seekablestream.SeekableBufferedStream;
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.seekablestream.SeekableHTTPStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.nio.DeleteOnExitPathHook;
import htsjdk.tribble.Tribble;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.file.FileSystemNotFoundException;
import java.nio.file.FileSystems;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.StandardOpenOption;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Scanner;
import java.util.Set;
import java.util.Stack;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.Deflater;
import java.util.zip.GZIPInputStream;
/**
* Miscellaneous stateless static IO-oriented methods.
* Also used for utility methods that wrap or aggregate functionality in Java IO.
*/
public class IOUtil {
/**
* @deprecated Use {@link Defaults#NON_ZERO_BUFFER_SIZE} instead.
*/
@Deprecated
public static final int STANDARD_BUFFER_SIZE = Defaults.NON_ZERO_BUFFER_SIZE;
public static final long ONE_GB = 1024 * 1024 * 1024;
public static final long TWO_GBS = 2 * ONE_GB;
public static final long FIVE_GBS = 5 * ONE_GB;
/**
* @deprecated since June 2019 Use {@link FileExtensions#VCF} instead.
*/
@Deprecated
public static final String VCF_FILE_EXTENSION = FileExtensions.VCF;
/**
* @deprecated since June 2019 Use {@link FileExtensions#VCF_INDEX} instead.
*/
@Deprecated
public static final String VCF_INDEX_EXTENSION = FileExtensions.VCF_INDEX;
/**
* @deprecated since June 2019 Use {@link FileExtensions#BCF} instead.
*/
@Deprecated
public static final String BCF_FILE_EXTENSION = FileExtensions.BCF;
/**
* @deprecated since June 2019 Use {@link FileExtensions#COMPRESSED_VCF} instead.
*/
@Deprecated
public static final String COMPRESSED_VCF_FILE_EXTENSION = FileExtensions.COMPRESSED_VCF;
/**
* @deprecated since June 2019 Use {@link FileExtensions#COMPRESSED_VCF_INDEX} instead.
*/
@Deprecated
public static final String COMPRESSED_VCF_INDEX_EXTENSION = FileExtensions.COMPRESSED_VCF_INDEX;
/** Possible extensions for VCF files and related formats. */
/**
* @deprecated since June 2019 Use {@link FileExtensions#VCF_LIST} instead.
*/
@Deprecated
public static final List VCF_EXTENSIONS_LIST = FileExtensions.VCF_LIST;
/**
* Possible extensions for VCF files and related formats.
* @deprecated since June 2019 Use {@link FileExtensions#VCF_LIST} instead.
*/
@Deprecated
public static final String[] VCF_EXTENSIONS = FileExtensions.VCF_LIST.toArray(new String[0]);
/**
* @deprecated since June 2019 Use {@link FileExtensions#INTERVAL_LIST} instead.
*/
@Deprecated
public static final String INTERVAL_LIST_FILE_EXTENSION = FileExtensions.INTERVAL_LIST;
/**
* @deprecated since June 2019 Use {@link FileExtensions#SAM} instead.
*/
@Deprecated
public static final String SAM_FILE_EXTENSION = FileExtensions.SAM;
/**
* @deprecated since June 2019 Use {@link FileExtensions#DICT} instead.
*/
@Deprecated
public static final String DICT_FILE_EXTENSION = FileExtensions.DICT;
/**
* @deprecated Use since June 2019 {@link FileExtensions#BLOCK_COMPRESSED} instead.
*/
@Deprecated
public static final Set BLOCK_COMPRESSED_EXTENSIONS = FileExtensions.BLOCK_COMPRESSED;
/** number of bytes that will be read for the GZIP-header in the function {@link #isGZIPInputStream(InputStream)} */
public static final int GZIP_HEADER_READ_LENGTH = 8000;
private static final OpenOption[] EMPTY_OPEN_OPTIONS = new OpenOption[0];
private static int compressionLevel = Defaults.COMPRESSION_LEVEL;
/**
* Sets the GZip compression level for subsequent GZIPOutputStream object creation.
* @param compressionLevel 0 <= compressionLevel <= 9
*/
public static void setCompressionLevel(final int compressionLevel) {
if (compressionLevel < Deflater.NO_COMPRESSION || compressionLevel > Deflater.BEST_COMPRESSION) {
throw new IllegalArgumentException("Invalid compression level: " + compressionLevel);
}
IOUtil.compressionLevel = compressionLevel;
}
public static int getCompressionLevel() {
return compressionLevel;
}
/**
* Wrap the given stream in a BufferedInputStream, if it isn't already wrapper
*
* @param stream stream to be wrapped
* @return A BufferedInputStream wrapping stream, or stream itself if stream instanceof BufferedInputStream.
*/
public static BufferedInputStream toBufferedStream(final InputStream stream) {
if (stream instanceof BufferedInputStream) {
return (BufferedInputStream) stream;
} else {
return new BufferedInputStream(stream, Defaults.NON_ZERO_BUFFER_SIZE);
}
}
/**
* Transfers from the input stream to the output stream using stream operations and a buffer.
*/
public static void transferByStream(final InputStream in, final OutputStream out, final long bytes) {
final byte[] buffer = new byte[Defaults.NON_ZERO_BUFFER_SIZE];
long remaining = bytes;
try {
while (remaining > 0) {
final int read = in.read(buffer, 0, (int) Math.min(buffer.length, remaining));
out.write(buffer, 0, read);
remaining -= read;
}
}
catch (final IOException ioe) {
throw new RuntimeIOException(ioe);
}
}
/**
* @return If Defaults.BUFFER_SIZE > 0, wrap os in BufferedOutputStream, else return os itself.
*/
public static OutputStream maybeBufferOutputStream(final OutputStream os) {
return maybeBufferOutputStream(os, Defaults.BUFFER_SIZE);
}
/**
* @return If bufferSize > 0, wrap os in BufferedOutputStream, else return os itself.
*/
public static OutputStream maybeBufferOutputStream(final OutputStream os, final int bufferSize) {
if (bufferSize > 0) return new BufferedOutputStream(os, bufferSize);
else return os;
}
public static SeekableStream maybeBufferedSeekableStream(final SeekableStream stream, final int bufferSize) {
return bufferSize > 0 ? new SeekableBufferedStream(stream, bufferSize) : stream;
}
public static SeekableStream maybeBufferedSeekableStream(final SeekableStream stream) {
return maybeBufferedSeekableStream(stream, Defaults.BUFFER_SIZE);
}
public static SeekableStream maybeBufferedSeekableStream(final File file) {
try {
return maybeBufferedSeekableStream(new SeekableFileStream(file));
} catch (final FileNotFoundException e) {
throw new RuntimeIOException(e);
}
}
public static SeekableStream maybeBufferedSeekableStream(final URL url) {
return maybeBufferedSeekableStream(new SeekableHTTPStream(url));
}
/**
* @return If Defaults.BUFFER_SIZE > 0, wrap is in BufferedInputStream, else return is itself.
*/
public static InputStream maybeBufferInputStream(final InputStream is) {
return maybeBufferInputStream(is, Defaults.BUFFER_SIZE);
}
/**
* @return If bufferSize > 0, wrap is in BufferedInputStream, else return is itself.
*/
public static InputStream maybeBufferInputStream(final InputStream is, final int bufferSize) {
if (bufferSize > 0) return new BufferedInputStream(is, bufferSize);
else return is;
}
public static Reader maybeBufferReader(Reader reader, final int bufferSize) {
if (bufferSize > 0) reader = new BufferedReader(reader, bufferSize);
return reader;
}
public static Reader maybeBufferReader(final Reader reader) {
return maybeBufferReader(reader, Defaults.BUFFER_SIZE);
}
public static Writer maybeBufferWriter(Writer writer, final int bufferSize) {
if (bufferSize > 0) writer = new BufferedWriter(writer, bufferSize);
return writer;
}
public static Writer maybeBufferWriter(final Writer writer) {
return maybeBufferWriter(writer, Defaults.BUFFER_SIZE);
}
/**
* Delete a list of files, and write a warning message if one could not be deleted.
*
* @param files Files to be deleted.
*/
public static void deleteFiles(final File... files) {
for (final File f : files) {
if (!f.delete()) {
System.err.println("Could not delete file " + f);
}
}
}
public static void deleteFiles(final Iterable files) {
for (final File f : files) {
if (!f.delete()) {
System.err.println("Could not delete file " + f);
}
}
}
public static void deletePaths(final Path... paths) {
deletePaths(Arrays.asList(paths));
}
public static void deletePaths(final Iterable paths) {
for (final Path p : paths) {
try {
Files.delete(p);
} catch (IOException e) {
System.err.println("Could not delete file " + p);
}
}
}
/**
* @return true if the path is not a device (e.g. /dev/null or /dev/stdin), and is not
* an existing directory. I.e. is is a regular path that may correspond to an existing
* file, or a path that could be a regular output file.
*/
public static boolean isRegularPath(final File file) {
return !file.exists() || file.isFile();
}
/**
* @return true if the path is not a device (e.g. /dev/null or /dev/stdin), and is not
* an existing directory. I.e. is is a regular path that may correspond to an existing
* file, or a path that could be a regular output file.
*/
public static boolean isRegularPath(final Path path) {
return !Files.exists(path) || Files.isRegularFile(path);
}
/**
* Creates a new tmp file on one of the available temp filesystems, registers it for deletion
* on JVM exit and then returns it.
*/
public static File newTempFile(final String prefix, final String suffix,
final File[] tmpDirs, final long minBytesFree) throws IOException {
File f = null;
for (int i = 0; i < tmpDirs.length; ++i) {
if (i == tmpDirs.length - 1 || tmpDirs[i].getUsableSpace() > minBytesFree) {
f = File.createTempFile(prefix, suffix, tmpDirs[i]);
f.deleteOnExit();
break;
}
}
return f;
}
/** Creates a new tmp file on one of the potential filesystems that has at least 5GB free. */
public static File newTempFile(final String prefix, final String suffix,
final File[] tmpDirs) throws IOException {
return newTempFile(prefix, suffix, tmpDirs, FIVE_GBS);
}
/** Returns a default tmp directory. */
public static File getDefaultTmpDir() {
final String user = System.getProperty("user.name");
final String tmp = System.getProperty("java.io.tmpdir");
if (tmp.endsWith(File.separatorChar + user)) return new File(tmp);
else return new File(tmp, user);
}
/**
* Creates a new tmp path on one of the available temp filesystems, registers it for deletion
* on JVM exit and then returns it.
*/
public static Path newTempPath(final String prefix, final String suffix,
final Path[] tmpDirs, final long minBytesFree) throws IOException {
Path p = null;
for (int i = 0; i < tmpDirs.length; ++i) {
if (i == tmpDirs.length - 1 || Files.getFileStore(tmpDirs[i]).getUsableSpace() > minBytesFree) {
p = Files.createTempFile(tmpDirs[i], prefix, suffix);
deleteOnExit(p);
break;
}
}
return p;
}
/** Creates a new tmp file on one of the potential filesystems that has at least 5GB free. */
public static Path newTempPath(final String prefix, final String suffix,
final Path[] tmpDirs) throws IOException {
return newTempPath(prefix, suffix, tmpDirs, FIVE_GBS);
}
/** Returns a default tmp directory as a Path. */
public static Path getDefaultTmpDirPath() {
try {
final String user = System.getProperty("user.name");
final String tmp = System.getProperty("java.io.tmpdir");
final Path tmpParent = getPath(tmp);
if (tmpParent.endsWith(tmpParent.getFileSystem().getSeparator() + user)) {
return tmpParent;
} else {
return tmpParent.resolve(user);
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
/**
* Register a {@link Path} for deletion on JVM exit.
*
* @see DeleteOnExitPathHook
*/
public static void deleteOnExit(final Path path) {
DeleteOnExitPathHook.add(path);
}
/** Returns the name of the file minus the extension (i.e. text after the last "." in the filename). */
public static String basename(final File f) {
final String full = f.getName();
final int index = full.lastIndexOf('.');
if (index > 0 && index > full.lastIndexOf(File.separator)) {
return full.substring(0, index);
}
else {
return full;
}
}
/**
* Checks that an input is is non-null, a URL or a file, exists,
* and if its a file then it is not a directory and is readable. If any
* condition is false then a runtime exception is thrown.
*
* @param input the input to check for validity
*/
public static void assertInputIsValid(final String input) {
if (input == null) {
throw new IllegalArgumentException("Cannot check validity of null input.");
}
if (!isUrl(input)) {
assertFileIsReadable(new File(input));
}
}
/**
* Returns true iff the string is a url.
* Helps distinguish url inputs form file path inputs.
*/
public static boolean isUrl(final String input) {
try {
new URL(input);
return true;
} catch (MalformedURLException e) {
return false;
}
}
/**
* Checks that a file is non-null, exists, is not a directory and is readable. If any
* condition is false then a runtime exception is thrown.
*
* @param file the file to check for readability
*/
public static void assertFileIsReadable(final File file) {
assertFileIsReadable(toPath(file));
}
/**
* Checks that a file is non-null, exists, is not a directory and is readable. If any
* condition is false then a runtime exception is thrown.
*
* @param path the file to check for readability
*/
public static void assertFileIsReadable(final Path path) {
if (path == null) {
throw new IllegalArgumentException("Cannot check readability of null file.");
} else if (!Files.exists(path)) {
throw new SAMException("Cannot read non-existent file: " + path.toUri().toString());
}
else if (Files.isDirectory(path)) {
throw new SAMException("Cannot read file because it is a directory: " + path.toUri().toString());
}
else if (!Files.isReadable(path)) {
throw new SAMException("File exists but is not readable: " + path.toUri().toString());
}
}
/**
* Checks that each file is non-null, exists, is not a directory and is readable. If any
* condition is false then a runtime exception is thrown.
*
* @param files the list of files to check for readability
*/
public static void assertFilesAreReadable(final List files) {
for (final File file : files) assertFileIsReadable(file);
}
/**
* Checks that each path is non-null, exists, is not a directory and is readable. If any
* condition is false then a runtime exception is thrown.
*
* @param paths the list of paths to check for readability
*/
public static void assertPathsAreReadable(final List paths) {
for (final Path path: paths) assertFileIsReadable(path);
}
/**
* Checks that each string is non-null, exists or is a URL,
* and if it is a file then not a directory and is readable. If any
* condition is false then a runtime exception is thrown.
*
* @param inputs the list of files to check for readability
*/
public static void assertInputsAreValid(final List inputs) {
for (final String input : inputs) assertInputIsValid(input);
}
/**
* Checks that a file is non-null, and is either extent and writable, or non-existent but
* that the parent directory exists and is writable. If any
* condition is false then a runtime exception is thrown.
*
* @param file the file to check for writability
*/
public static void assertFileIsWritable(final File file) {
if (file == null) {
throw new IllegalArgumentException("Cannot check readability of null file.");
} else if (!file.exists()) {
// If the file doesn't exist, check that it's parent directory does and is writable
final File parent = file.getAbsoluteFile().getParentFile();
if (!parent.exists()) {
throw new SAMException("Cannot write file: " + file.getAbsolutePath() + ". " +
"Neither file nor parent directory exist.");
}
else if (!parent.isDirectory()) {
throw new SAMException("Cannot write file: " + file.getAbsolutePath() + ". " +
"File does not exist and parent is not a directory.");
}
else if (!parent.canWrite()) {
throw new SAMException("Cannot write file: " + file.getAbsolutePath() + ". " +
"File does not exist and parent directory is not writable..");
}
}
else if (file.isDirectory()) {
throw new SAMException("Cannot write file because it is a directory: " + file.getAbsolutePath());
}
else if (!file.canWrite()) {
throw new SAMException("File exists but is not writable: " + file.getAbsolutePath());
}
}
/**
* Checks that each file is non-null, and is either extent and writable, or non-existent but
* that the parent directory exists and is writable. If any
* condition is false then a runtime exception is thrown.
*
* @param files the list of files to check for writability
*/
public static void assertFilesAreWritable(final List files) {
for (final File file : files) assertFileIsWritable(file);
}
/**
* Checks that a directory is non-null, extent, writable and a directory
* otherwise a runtime exception is thrown.
*
* @param dir the dir to check for writability
*/
public static void assertDirectoryIsWritable(final File dir) {
final Path asPath = IOUtil.toPath(dir);
assertDirectoryIsWritable(asPath);
}
/**
* Checks that a directory is non-null, extent, writable and a directory
* otherwise a runtime exception is thrown.
*
* @param dir the dir to check for writability
*/
public static void assertDirectoryIsWritable(final Path dir) {
if (dir == null) {
throw new IllegalArgumentException("Cannot check readability of null file.");
}
else if (!Files.exists(dir)) {
throw new SAMException("Directory does not exist: " + dir.toUri().toString());
}
else if (!Files.isDirectory(dir)) {
throw new SAMException("Cannot write to directory because it is not a directory: " + dir.toUri().toString());
}
else if (!Files.isWritable(dir)) {
throw new SAMException("Directory exists but is not writable: " + dir.toUri().toString());
}
}
/**
* Checks that a directory is non-null, extent, readable and a directory
* otherwise a runtime exception is thrown.
*
* @param dir the dir to check for writability
*/
public static void assertDirectoryIsReadable(final File dir) {
if (dir == null) {
throw new IllegalArgumentException("Cannot check readability of null file.");
}
else if (!dir.exists()) {
throw new SAMException("Directory does not exist: " + dir.getAbsolutePath());
}
else if (!dir.isDirectory()) {
throw new SAMException("Cannot read from directory because it is not a directory: " + dir.getAbsolutePath());
}
else if (!dir.canRead()) {
throw new SAMException("Directory exists but is not readable: " + dir.getAbsolutePath());
}
}
/**
* Checks that the two files are the same length, and have the same content, otherwise throws a runtime exception.
*/
public static void assertFilesEqual(final File f1, final File f2) {
if (f1.length() != f2.length()) {
throw new SAMException("File " + f1 + " is " + f1.length() + " bytes but file " + f2 + " is " + f2.length() + " bytes.");
}
try (
final FileInputStream s1 = new FileInputStream(f1);
final FileInputStream s2 = new FileInputStream(f2);
) {
final byte[] buf1 = new byte[1024 * 1024];
final byte[] buf2 = new byte[1024 * 1024];
int len1;
while ((len1 = s1.read(buf1)) != -1) {
final int len2 = s2.read(buf2);
if (len1 != len2) {
throw new SAMException("Unexpected EOF comparing files that are supposed to be the same length.");
}
if (!Arrays.equals(buf1, buf2)) {
throw new SAMException("Files " + f1 + " and " + f2 + " differ.");
}
}
} catch (final IOException e) {
throw new SAMException("Exception comparing files " + f1 + " and " + f2, e);
}
}
/**
* Checks that a file is of non-zero length
*/
public static void assertFileSizeNonZero(final File file) {
if (file.length() == 0) {
throw new SAMException(file.getAbsolutePath() + " has length 0");
}
}
/**
* Opens a file for reading, decompressing it if necessary
*
* @param file The file to open
* @return the input stream to read from
*/
public static InputStream openFileForReading(final File file) {
return openFileForReading(toPath(file));
}
/**
* Opens a file for reading, decompressing it if necessary
*
* @param path The file to open
* @return the input stream to read from
*/
public static InputStream openFileForReading(final Path path) {
try {
if (hasGzipFileExtension(path)) {
return openGzipFileForReading(path);
}
else {
return Files.newInputStream(path);
}
}
catch (IOException ioe) {
throw new SAMException("Error opening file: " + path, ioe);
}
}
/**
* Opens a GZIP-encoded file for reading, decompressing it if necessary
*
* @param file The file to open
* @return the input stream to read from
*/
public static InputStream openGzipFileForReading(final File file) {
return openGzipFileForReading(toPath(file));
}
/**
* Opens a GZIP-encoded file for reading, decompressing it if necessary
*
* @param path The file to open
* @return the input stream to read from
*/
public static InputStream openGzipFileForReading(final Path path) {
try {
return new GZIPInputStream(Files.newInputStream(path));
}
catch (IOException ioe) {
throw new SAMException("Error opening file: " + path, ioe);
}
}
/**
* Opens a file for writing, overwriting the file if it already exists
*
* @param file the file to write to
* @return the output stream to write to
*/
public static OutputStream openFileForWriting(final File file) {
return openFileForWriting(toPath(file));
}
/**
* Opens a file for writing, gzip it if it ends with ".gz" or "bfq"
*
* @param file the file to write to
* @param append whether to append to the file if it already exists (we overwrite it if false)
* @return the output stream to write to
*/
public static OutputStream openFileForWriting(final File file, final boolean append) {
return openFileForWriting(toPath(file), getAppendOpenOption(append));
}
/**
* Opens a file for writing, gzip it if it ends with ".gz" or "bfq"
*
* @param path the file to write to
* @param openOptions options to use when opening the file
* @return the output stream to write to
*/
public static OutputStream openFileForWriting(final Path path, OpenOption... openOptions) {
try {
if (hasGzipFileExtension(path)) {
return openGzipFileForWriting(path, openOptions);
} else {
return Files.newOutputStream(path, openOptions);
}
} catch (final IOException ioe) {
throw new SAMException("Error opening file for writing: " + path.toUri().toString(), ioe);
}
}
/**
* check if the file name ends with .gz, .gzip, or .bfq
*/
public static boolean hasGzipFileExtension(Path path) {
final List gzippedEndings = Arrays.asList(".gz", ".gzip", ".bfq");
final String fileName = path.getFileName().toString();
return gzippedEndings.stream().anyMatch(fileName::endsWith);
}
/**
* Preferred over PrintStream and PrintWriter because an exception is thrown on I/O error
*/
public static BufferedWriter openFileForBufferedWriting(final File file, final boolean append) {
return new BufferedWriter(new OutputStreamWriter(openFileForWriting(file, append)), Defaults.NON_ZERO_BUFFER_SIZE);
}
/**
* Preferred over PrintStream and PrintWriter because an exception is thrown on I/O error
*/
public static BufferedWriter openFileForBufferedWriting(final Path path, final OpenOption ... openOptions) {
return new BufferedWriter(new OutputStreamWriter(openFileForWriting(path, openOptions)), Defaults.NON_ZERO_BUFFER_SIZE);
}
/**
* Preferred over PrintStream and PrintWriter because an exception is thrown on I/O error
*/
public static BufferedWriter openFileForBufferedWriting(final File file) {
return openFileForBufferedWriting(IOUtil.toPath(file));
}
/**
* Preferred over PrintStream and PrintWriter because an exception is thrown on I/O error
*/
public static BufferedWriter openFileForBufferedUtf8Writing(final File file) {
return openFileForBufferedUtf8Writing(IOUtil.toPath(file));
}
/**
* Preferred over PrintStream and PrintWriter because an exception is thrown on I/O error
*/
public static BufferedWriter openFileForBufferedUtf8Writing(final Path path) {
return new BufferedWriter(new OutputStreamWriter(openFileForWriting(path), Charset.forName("UTF-8")), Defaults.NON_ZERO_BUFFER_SIZE);
}
/**
* Opens a file for reading, decompressing it if necessary
*
* @param file The file to open
* @return the input stream to read from
*/
public static BufferedReader openFileForBufferedUtf8Reading(final File file) {
return new BufferedReader(new InputStreamReader(openFileForReading(file), Charset.forName("UTF-8")));
}
/**
* Opens a GZIP encoded file for writing
*
* @param file the file to write to
* @param append whether to append to the file if it already exists (we overwrite it if false)
* @return the output stream to write to
*/
public static OutputStream openGzipFileForWriting(final File file, final boolean append) {
return openGzipFileForWriting(IOUtil.toPath(file), getAppendOpenOption(append));
}
/**
* converts a boolean into an array containing either the append option or nothing
*/
private static OpenOption[] getAppendOpenOption(boolean append) {
return append ? new OpenOption[]{StandardOpenOption.APPEND} : EMPTY_OPEN_OPTIONS;
}
/**
* Opens a GZIP encoded file for writing
*
* @param path the file to write to
* @param openOptions options to control how the file is opened
* @return the output stream to write to
*/
public static OutputStream openGzipFileForWriting(final Path path, final OpenOption ... openOptions) {
try {
final OutputStream out = Files.newOutputStream(path, openOptions);
if (Defaults.BUFFER_SIZE > 0) {
return new CustomGzipOutputStream(out, Defaults.BUFFER_SIZE, compressionLevel);
} else {
return new CustomGzipOutputStream(out, compressionLevel);
}
} catch (final IOException ioe) {
throw new SAMException("Error opening file for writing: " + path.toUri().toString(), ioe);
}
}
public static OutputStream openFileForMd5CalculatingWriting(final File file) {
return openFileForMd5CalculatingWriting(toPath(file));
}
public static OutputStream openFileForMd5CalculatingWriting(final Path file) {
return new Md5CalculatingOutputStream(IOUtil.openFileForWriting(file), file.resolve(".md5"));
}
/**
* Utility method to copy the contents of input to output. The caller is responsible for
* opening and closing both streams.
*
* @param input contents to be copied
* @param output destination
*/
public static void copyStream(final InputStream input, final OutputStream output) {
try {
final byte[] buffer = new byte[Defaults.NON_ZERO_BUFFER_SIZE];
int bytesRead = 0;
while((bytesRead = input.read(buffer)) > 0) {
output.write(buffer, 0, bytesRead);
}
} catch (IOException e) {
throw new SAMException("Exception copying stream", e);
}
}
/**
* Copy input to output, overwriting output if it already exists.
*/
public static void copyFile(final File input, final File output) {
try {
final InputStream is = new FileInputStream(input);
final OutputStream os = new FileOutputStream(output);
copyStream(is, os);
os.close();
is.close();
} catch (IOException e) {
throw new SAMException("Error copying " + input + " to " + output, e);
}
}
/**
*
* @param directory
* @param regexp
* @return list of files matching regexp.
*/
public static File[] getFilesMatchingRegexp(final File directory, final String regexp) {
final Pattern pattern = Pattern.compile(regexp);
return getFilesMatchingRegexp(directory, pattern);
}
public static File[] getFilesMatchingRegexp(final File directory, final Pattern regexp) {
return directory.listFiles( new FilenameFilter() {
@Override
public boolean accept(final File dir, final String name) {
return regexp.matcher(name).matches();
}
});
}
/**
* Delete the given file or directory. If a directory, all enclosing files and subdirs are also deleted.
*/
public static boolean deleteDirectoryTree(final File fileOrDirectory) {
boolean success = true;
if (fileOrDirectory.isDirectory()) {
for (final File child : fileOrDirectory.listFiles()) {
success = success && deleteDirectoryTree(child);
}
}
success = success && fileOrDirectory.delete();
return success;
}
/**
* Returns the size (in bytes) of the file or directory and all it's children.
*/
public static long sizeOfTree(final File fileOrDirectory) {
long total = fileOrDirectory.length();
if (fileOrDirectory.isDirectory()) {
for (final File f : fileOrDirectory.listFiles()) {
total += sizeOfTree(f);
}
}
return total;
}
/**
*
* Copies a directory tree (all subdirectories and files) recursively to a destination
*/
public static void copyDirectoryTree(final File fileOrDirectory, final File destination) {
if (fileOrDirectory.isDirectory()) {
destination.mkdir();
for(final File f : fileOrDirectory.listFiles()) {
final File destinationFileOrDirectory = new File(destination.getPath(),f.getName());
if (f.isDirectory()){
copyDirectoryTree(f,destinationFileOrDirectory);
}
else {
copyFile(f,destinationFileOrDirectory);
}
}
}
}
/**
* Create a temporary subdirectory in the default temporary-file directory, using the given prefix and suffix to generate the name.
* Note that this method is not completely safe, because it create a temporary file, deletes it, and then creates
* a directory with the same name as the file. Should be good enough.
*
* @param prefix The prefix string to be used in generating the file's name; must be at least three characters long
* @param suffix The suffix string to be used in generating the file's name; may be null, in which case the suffix ".tmp" will be used
* @return File object for new directory
*/
public static File createTempDir(final String prefix, final String suffix) {
try {
final File tmp = File.createTempFile(prefix, suffix);
if (!tmp.delete()) {
throw new SAMException("Could not delete temporary file " + tmp);
}
if (!tmp.mkdir()) {
throw new SAMException("Could not create temporary directory " + tmp);
}
return tmp;
} catch (IOException e) {
throw new SAMException("Exception creating temporary directory.", e);
}
}
/** Checks that a file exists and is readable, and then returns a buffered reader for it. */
public static BufferedReader openFileForBufferedReading(final File file) {
return openFileForBufferedReading(toPath(file));
}
/** Checks that a path exists and is readable, and then returns a buffered reader for it. */
public static BufferedReader openFileForBufferedReading(final Path path) {
return new BufferedReader(new InputStreamReader(openFileForReading(path)), Defaults.NON_ZERO_BUFFER_SIZE);
}
/** Takes a string and replaces any characters that are not safe for filenames with an underscore */
public static String makeFileNameSafe(final String str) {
return str.trim().replaceAll("[\\s!\"#$%&'()*/:;<=>?@\\[\\]\\\\^`{|}~]", "_");
}
/** Returns the name of the file extension (i.e. text after the last "." in the filename) including the . */
public static String fileSuffix(final File f) {
final String full = f.getName();
final int index = full.lastIndexOf('.');
if (index > 0 && index > full.lastIndexOf(File.separator)) {
return full.substring(index);
} else {
return null;
}
}
/** Returns the full path to the file with all symbolic links resolved **/
public static String getFullCanonicalPath(final File file) {
try {
File f = file.getCanonicalFile();
String canonicalPath = "";
while (f != null && !f.getName().equals("")) {
canonicalPath = "/" + f.getName() + canonicalPath;
f = f.getParentFile();
if (f != null) f = f.getCanonicalFile();
}
return canonicalPath;
} catch (final IOException ioe) {
throw new RuntimeIOException("Error getting full canonical path for " +
file + ": " + ioe.getMessage(), ioe);
}
}
/**
* Reads everything from an input stream as characters and returns a single String.
*/
public static String readFully(final InputStream in) {
try {
final BufferedReader r = new BufferedReader(new InputStreamReader(in), Defaults.NON_ZERO_BUFFER_SIZE);
final StringBuilder builder = new StringBuilder(512);
String line = null;
while ((line = r.readLine()) != null) {
if (builder.length() > 0) builder.append('\n');
builder.append(line);
}
return builder.toString();
}
catch (final IOException ioe) {
throw new RuntimeIOException("Error reading stream", ioe);
}
}
/**
* Returns an iterator over the lines in a text file. The underlying resources are automatically
* closed when the iterator hits the end of the input, or manually by calling close().
*
* @param f a file that is to be read in as text
* @return an iterator over the lines in the text file
*/
public static IterableOnceIterator readLines(final File f) {
try {
final BufferedReader in = IOUtil.openFileForBufferedReading(f);
return new IterableOnceIterator() {
private String next = in.readLine();
/** Returns true if there is another line to read or false otherwise. */
@Override public boolean hasNext() { return next != null; }
/** Returns the next line in the file or null if there are no more lines. */
@Override public String next() {
try {
final String tmp = next;
next = in.readLine();
if (next == null) in.close();
return tmp;
}
catch (final IOException ioe) { throw new RuntimeIOException(ioe); }
}
/** Closes the underlying input stream. Not required if end of stream has already been hit. */
@Override public void close() throws IOException { CloserUtil.close(in); }
};
}
catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
/** Returns all of the untrimmed lines in the provided file. */
public static List slurpLines(final File file) throws FileNotFoundException {
return slurpLines(new FileInputStream(file));
}
public static List slurpLines(final InputStream is) throws FileNotFoundException {
/** See {@link java.util.Scanner} source for origin of delimiter used here. */
return tokenSlurp(is, Charset.defaultCharset(), "\r\n|[\n\r\u2028\u2029\u0085]");
}
/** Convenience overload for {@link #slurp(java.io.InputStream, java.nio.charset.Charset)} using the default charset {@link java.nio.charset.Charset#defaultCharset()}. */
public static String slurp(final File file) throws FileNotFoundException {
return slurp(new FileInputStream(file));
}
/** Convenience overload for {@link #slurp(java.io.InputStream, java.nio.charset.Charset)} using the default charset {@link java.nio.charset.Charset#defaultCharset()}. */
public static String slurp(final InputStream is) {
return slurp(is, Charset.defaultCharset());
}
/** Reads all of the stream into a String, decoding with the provided {@link java.nio.charset.Charset} then closes the stream quietly. */
public static String slurp(final InputStream is, final Charset charSet) {
final List tokenOrEmpty = tokenSlurp(is, charSet, "\\A");
return tokenOrEmpty.isEmpty() ? StringUtil.EMPTY_STRING : CollectionUtil.getSoleElement(tokenOrEmpty);
}
/** Tokenizes the provided input stream into memory using the given delimiter. */
private static List tokenSlurp(final InputStream is, final Charset charSet, final String delimiterPattern) {
try {
final Scanner s = new Scanner(is, charSet.toString()).useDelimiter(delimiterPattern);
final LinkedList tokens = new LinkedList<>();
while (s.hasNext()) {
tokens.add(s.next());
}
return tokens;
} finally {
CloserUtil.close(is);
}
}
/**
* Go through the files provided and if they have one of the provided file extensions pass the file into the output
* otherwise assume that file is a list of filenames and unfold it into the output.
*/
public static List unrollFiles(final Collection inputs, final String... extensions) {
Collection paths = unrollPaths(filesToPaths(inputs), extensions);
return paths.stream().map(Path::toFile).collect(Collectors.toList());
}
/**
* Go through the files provided and if they have one of the provided file extensions pass the file to the output
* otherwise assume that file is a list of filenames and unfold it into the output (recursively).
*/
public static List unrollPaths(final Collection inputs, final String... extensions) {
if (extensions.length < 1) throw new IllegalArgumentException("Must provide at least one extension.");
final Stack stack = new Stack<>();
final List output = new ArrayList<>();
stack.addAll(inputs);
while (!stack.empty()) {
final Path p = stack.pop();
final String name = p.toString();
boolean matched = false;
for (final String ext : extensions) {
if (!matched && name.endsWith(ext)) {
output.add(p);
matched = true;
}
}
// If the file didn't match a given extension, treat it as a list of files
if (!matched) {
try {
Files.lines(p)
.map(String::trim)
.filter(s -> !s.isEmpty())
.forEach(s -> {
final Path innerPath;
try {
innerPath = getPath(s);
stack.push(innerPath);
} catch (IOException e) {
throw new IllegalArgumentException("cannot convert " + s + " to a Path.", e);
}
}
);
} catch (IOException e) {
throw new IllegalArgumentException("had trouble reading from " + p.toUri().toString(), e);
}
}
}
// Preserve input order (since we're using a stack above) for things that care
Collections.reverse(output);
return output;
}
/**
* Check if the given URI has a scheme.
*
* @param uriString the URI to check
* @return true
if the given URI has a scheme, false
if
* not, or if the URI is malformed.
*/
public static boolean hasScheme(String uriString) {
try {
return new URI(uriString).getScheme() != null;
} catch (URISyntaxException e) {
return false;
}
}
/**
* Converts the given URI to a {@link Path} object. If the filesystem cannot be found in the usual way, then attempt
* to load the filesystem provider using the thread context classloader. This is needed when the filesystem
* provider is loaded using a URL classloader (e.g. in spark-submit).
*
* @param uriString the URI to convert
* @return the resulting {@code Path}
* @throws IOException an I/O error occurs creating the file system
*/
public static Path getPath(String uriString) throws IOException {
URI uri = URI.create(uriString);
try {
// if the URI has no scheme, then treat as a local file, otherwise use the scheme to determine the filesystem to use
return uri.getScheme() == null ? Paths.get(uriString) : Paths.get(uri);
} catch (FileSystemNotFoundException e) {
ClassLoader cl = Thread.currentThread().getContextClassLoader();
if (cl == null) {
throw e;
}
return FileSystems.newFileSystem(uri, new HashMap<>(), cl).provider().getPath(uri);
}
}
public static List getPaths(List uriStrings) throws RuntimeIOException {
return uriStrings.stream().map(s -> {
try {
return IOUtil.getPath(s);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}).collect(Collectors.toList());
}
/*
* Converts the File to a Path, preserving nullness.
*
* @param fileOrNull a File, or null
* @return the corresponding Path (or null)
*/
public static Path toPath(File fileOrNull) {
return (null == fileOrNull ? null : fileOrNull.toPath());
}
/** Takes a list of Files and converts them to a list of Paths
* Runs .toPath() on the contents of the input.
*
* @param files a {@link List} of {@link File}s to convert to {@link Path}s
* @return a new List containing the results of running toPath on the elements of the input
*/
public static List filesToPaths(Collection files){
return files.stream().map(File::toPath).collect(Collectors.toList());
}
/**
* Test whether a input stream looks like a GZIP input.
* This identifies both gzip and bgzip streams as being GZIP.
* @param stream the input stream.
* @return true if `stream` starts with a gzip signature.
* @throws IllegalArgumentException if `stream` cannot mark or reset the stream
*/
public static boolean isGZIPInputStream(final InputStream stream) {
if (!stream.markSupported()) {
throw new IllegalArgumentException("isGZIPInputStream() : Cannot test a stream that doesn't support marking.");
}
stream.mark(GZIP_HEADER_READ_LENGTH);
try {
final GZIPInputStream gunzip = new GZIPInputStream(stream);
final int ch = gunzip.read();
return true;
} catch (final IOException ioe) {
return false;
} finally {
try {
stream.reset();
} catch (final IOException ioe) {
throw new IllegalStateException("isGZIPInputStream(): Could not reset stream.");
}
}
}
/**
* Adds the extension to the given path.
*
* @param path the path to start from, eg. "/folder/file.jpg"
* @param extension the extension to add, eg. ".bak"
* @return "/folder/file.jpg.bak"
*/
public static Path addExtension(Path path, String extension) {
return path.resolveSibling(path.getFileName() + extension);
}
/**
* Checks if the provided path is block-compressed.
*
* Note that using {@code checkExtension=true} would avoid the cost of opening the file, but
* if {@link #hasBlockCompressedExtension(String)} returns {@code false} this would not detect
* block-compressed files such BAM.
*
* @param path file to check if it is block-compressed.
* @param checkExtension if {@code true}, checks the extension before opening the file.
* @return {@code true} if the file is block-compressed; {@code false} otherwise.
* @throws IOException if there is an I/O error.
*/
public static boolean isBlockCompressed(final Path path, final boolean checkExtension) throws IOException {
if (checkExtension && !hasBlockCompressedExtension(path)) {
return false;
}
try (final InputStream stream = new BufferedInputStream(Files.newInputStream(path), Math.max(Defaults.BUFFER_SIZE, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE))) {
return BlockCompressedInputStream.isValidFile(stream);
}
}
/**
* Checks if the provided path is block-compressed (including extension).
*
*
Note that block-compressed file extensions {@link FileExtensions#BLOCK_COMPRESSED} are not
* checked by this method.
*
* @param path file to check if it is block-compressed.
* @return {@code true} if the file is block-compressed; {@code false} otherwise.
* @throws IOException if there is an I/O error.
*/
public static boolean isBlockCompressed(final Path path) throws IOException {
return isBlockCompressed(path, false);
}
/**
* Checks if a file ends in one of the {@link FileExtensions#BLOCK_COMPRESSED}.
*
* @param fileName string name for the file. May be an HTTP/S url.
*
* @return {@code true} if the file has a block-compressed extension; {@code false} otherwise.
*/
public static boolean hasBlockCompressedExtension (final String fileName) {
String cleanedPath = stripQueryStringIfPathIsAnHttpUrl(fileName);
for (final String extension : FileExtensions.BLOCK_COMPRESSED) {
if (cleanedPath.toLowerCase().endsWith(extension))
return true;
}
return false;
}
/**
* Checks if a path ends in one of the {@link FileExtensions#BLOCK_COMPRESSED}.
*
* @param path object to extract the name from.
*
* @return {@code true} if the path has a block-compressed extension; {@code false} otherwise.
*/
public static boolean hasBlockCompressedExtension(final Path path) {
return hasBlockCompressedExtension(path.getFileName().toString());
}
/**
* Checks if a file ends in one of the {@link FileExtensions#BLOCK_COMPRESSED}.
*
* @param file object to extract the name from.
*
* @return {@code true} if the file has a block-compressed extension; {@code false} otherwise.
*/
public static boolean hasBlockCompressedExtension (final File file) {
return hasBlockCompressedExtension(file.getName());
}
/**
* Checks if a file ends in one of the {@link FileExtensions#BLOCK_COMPRESSED}.
*
* @param uri file as an URI.
*
* @return {@code true} if the file has a block-compressed extension; {@code false} otherwise.
*/
public static boolean hasBlockCompressedExtension (final URI uri) {
String path = uri.getPath();
return hasBlockCompressedExtension(path);
}
/**
* Remove http query before checking extension
* Path might be a local file, in which case a '?' is a legal part of the filename.
* @param path a string representing some sort of path, potentially an http url
* @return path with no trailing queryString (ex: http://something.com/path.vcf?stuff=something => http://something.com/path.vcf)
*/
private static String stripQueryStringIfPathIsAnHttpUrl(String path) {
if(path.startsWith("http://") || path.startsWith("https://")) {
int qIdx = path.indexOf('?');
if (qIdx > 0) {
return path.substring(0, qIdx);
}
}
return path;
}
/**
* Delete a directory and all files in it.
*
* @param directory The directory to be deleted (along with its subdirectories)
*/
public static void recursiveDelete(final Path directory) {
final SimpleFileVisitor simpleFileVisitor = new SimpleFileVisitor() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
super.visitFile(file, attrs);
Files.deleteIfExists(file);
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
super.postVisitDirectory(dir, exc);
Files.deleteIfExists(dir);
return FileVisitResult.CONTINUE;
}
};
try {
Files.walkFileTree(directory, simpleFileVisitor);
} catch (final IOException e){
throw new RuntimeIOException(e);
}
}
}