org.broadinstitute.hellbender.utils.io.IOUtils Maven / Gradle / Ivy
The newest version!
package org.broadinstitute.hellbender.utils.io;
import com.google.cloud.storage.contrib.nio.CloudStorageFileSystem;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.FileExtensions;
import htsjdk.samtools.util.IOUtil;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.GetSampleName;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
import org.broadinstitute.hellbender.utils.runtime.ProcessController;
import org.broadinstitute.hellbender.utils.runtime.ProcessOutput;
import org.broadinstitute.hellbender.utils.runtime.ProcessSettings;
import java.io.*;
import java.net.URI;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import java.util.zip.*;
public final class IOUtils {
private static final Logger logger = LogManager.getLogger(IOUtils.class);
private static final File DEV_DIR = new File("/dev");
// see https://support.hdfgroup.org/HDF5/doc/H5.format.html
private static final byte[] hdf5HeaderSignature = {(byte) 0x89, 'H', 'D', 'F', '\r', '\n', (byte) 0x1A, '\n'};
/**
* Schemes starting with gendb could be GenomicsDB paths
*/
public static final String GENOMIC_DB_URI_SCHEME = "gendb";
/**
* Patterns identifying GenomicsDB paths
*/
private static final Pattern GENOMICSDB_URI_PATTERN = Pattern.compile("^" + GENOMIC_DB_URI_SCHEME + "(\\.?)(.*)(://)(.*)");
/**
* Given a Path, determine if it is an HDF5 file without requiring that we're on a platform that supports
* HDF5 (let the caller decide if a return value of false is fatal).
*
* @param hdf5Candidate a Path representing the input to be inspected
* @return true if the candidate Path is an HDF5 file, otherwise false
*/
public static boolean isHDF5File(final Path hdf5Candidate) {
try (final DataInputStream candidateStream = new DataInputStream(Files.newInputStream(hdf5Candidate))) {
final byte[] candidateHeader = new byte[hdf5HeaderSignature.length];
candidateStream.read(candidateHeader, 0, candidateHeader.length);
return Arrays.equals(candidateHeader, hdf5HeaderSignature);
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(String.format("I/O error reading from input stream %s", hdf5Candidate), e);
}
}
/**
* Creates a temp directory with the given prefix.
*
* The directory and any contents will be automatically deleted at shutdown.
*
* This will not work if the temp dir is not representable as a File.
*
* @param prefix Prefix for the directory name.
* @return The created temporary directory.
*/
public static File createTempDir(String prefix) {
try {
final Path tmpDir = Files.createTempDirectory(prefix).normalize();
deleteOnExit(tmpDir);
return tmpDir.toFile();
} catch (final IOException | SecurityException e) {
throw new UserException.BadTempDir(e.getMessage(), e);
}
}
/**
* Writes content to a temp file and returns the path to the temporary file.
*
* @param content to write.
* @param prefix Prefix for the temp file; {@link File#createTempFile(String, String, File)} requires that this be >= 3 characters
* @param suffix Suffix for the temp file.
* @return the path to the temp file.
*/
public static File writeTempFile(String content, String prefix, String suffix) {
return writeTempFile(content, prefix, suffix, null);
}
/**
* Writes content to a temp file and returns the path to the temporary file.
*
* @param content to write.
* @param prefix Prefix for the temp file; {@link File#createTempFile(String, String, File)} requires that this be >= 3 characters
* @param suffix Suffix for the temp file.
* @param directory Directory for the temp file.
* @return the path to the temp file.
*/
public static File writeTempFile(String content, String prefix, String suffix, File directory) {
try {
File tempFile = File.createTempFile(prefix, suffix, directory).toPath().normalize().toFile();
FileUtils.writeStringToFile(tempFile, content, StandardCharsets.UTF_8);
return tempFile;
} catch (IOException e) {
throw new UserException.BadTempDir(e.getMessage(), e);
}
}
/**
* Writes multiple lines of content to a temp file and returns the temporary file.
* @param prefix prefix to use for the temp file name
* @param suffix extension to use for the temp file
* @param content List Strings that will be written to the file as separate lines
* @return temporary File that will be deleted on exit
*/
public static File writeTempFile(final List content, final String prefix, final String suffix) {
try {
final File tempFile = createTempFile(prefix, suffix);
FileUtils.writeLines(tempFile, content);
return tempFile;
} catch (IOException e) {
throw new UserException.BadTempDir(e.getMessage(), e);
}
}
/**
* Returns true if the file is a special file.
* @param file File path to check.
* @return true if the file is a special file.
*/
public static boolean isSpecialFile(File file) {
return file != null && (file.getAbsolutePath().startsWith("/dev/") || file.equals(DEV_DIR));
}
/**
* Tries to delete a file. Emits a warning if the file
* is not a special file and was unable to be deleted.
*
* @param file File to delete.
* @return true if the file was deleted.
*/
public static boolean tryDelete(File file) {
if (isSpecialFile(file)) {
logger.debug("Not trying to delete " + file);
return false;
}
boolean deleted = FileUtils.deleteQuietly(file);
if (deleted)
logger.debug("Deleted " + file);
else if (file.exists())
logger.warn("Unable to delete " + file);
return deleted;
}
/**
* Writes an embedded resource to a temporary file. The temporary file is automatically scheduled for deletion
* on exit.
* @param resource Embedded resource.
* @return the temporary file containing the contents of the resource, which is automatically scheduled for
* deletion on exit.
*/
public static File writeTempResource(final Resource resource) {
final File tempFile = createTempFile(
FilenameUtils.getBaseName(resource.getPath()) + ".",
"." + FilenameUtils.getExtension(resource.getPath()));
writeResource(resource, tempFile);
return tempFile;
}
/**
* Create a resource from a path and a relative class, and write it to a temporary file.
* If the relative class is null then the system classloader will be used and the path must be absolute.
* The temporary file is automatically scheduled for deletion on exit.
* @param resourcePath Relative or absolute path to the class.
* @param relativeClass Relative class to use as a class loader and for a relative package.
* @return a temporary file containing the contents of the resource, which is automatically scheduled
* for deletion on exit.
*/
public static File writeTempResourceFromPath(final String resourcePath, final Class> relativeClass) {
Utils.nonNull(resourcePath, "A resource path must be provided");
final Resource resource = new Resource(resourcePath, relativeClass);
return writeTempResource(resource);
}
/**
* Writes the an embedded resource to a file.
* File is not scheduled for deletion and must be cleaned up by the caller.
* @param resource Embedded resource.
* @param file File path to write.
*/
@SuppressWarnings("deprecation")
public static void writeResource(Resource resource, File file) {
String path = resource.getPath();
InputStream inputStream = resource.getResourceContentsAsStream();
OutputStream outputStream = null;
try {
outputStream = FileUtils.openOutputStream(file);
org.apache.commons.io.IOUtils.copy(inputStream, outputStream);
} catch (IOException e) {
throw new GATKException(String.format("Unable to copy resource '%s' to '%s'", path, file), e);
} finally {
org.apache.commons.io.IOUtils.closeQuietly(inputStream);
org.apache.commons.io.IOUtils.closeQuietly(outputStream);
}
}
/**
* Reads the entirety of the given file into a byte array. Uses a read buffer size of 4096 bytes.
*
* @param source File to read
* @return The contents of the file as a byte array
*/
public static byte[] readFileIntoByteArray ( File source ) {
return readFileIntoByteArray(source, 4096);
}
/**
* Reads the entirety of the given file into a byte array using the requested read buffer size.
*
* @param source File to read
* @param readBufferSize Number of bytes to read in at one time
* @return The contents of the file as a byte array
*/
public static byte[] readFileIntoByteArray ( File source, int readBufferSize ) {
if ( source == null ) {
throw new GATKException("Source file was null");
}
byte[] fileContents;
try {
fileContents = readStreamIntoByteArray(new FileInputStream(source), readBufferSize);
}
catch ( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(source.getAbsolutePath(), e);
}
if ( fileContents.length != source.length() ) {
throw new UserException.CouldNotReadInputFile(String.format("Unable to completely read file %s: read only %d/%d bytes",
source.getAbsolutePath(), fileContents.length, source.length()));
}
return fileContents;
}
/**
* Reads all data from the given stream into a byte array using the requested read buffer size.
*
* @param in Stream to read data from
* @param readBufferSize Number of bytes to read in at one time
* @return The contents of the stream as a byte array
*/
public static byte[] readStreamIntoByteArray ( InputStream in, int readBufferSize ) {
if ( in == null ) {
throw new IllegalArgumentException("Input stream was null");
}
else if ( readBufferSize <= 0 ) {
throw new IllegalArgumentException("Read buffer size must be > 0");
}
// Use a fixed-size buffer for each read, but a dynamically-growing buffer
// to hold the accumulated contents of the file/stream:
byte[] readBuffer = new byte[readBufferSize];
ByteArrayOutputStream fileBuffer = new ByteArrayOutputStream(readBufferSize * 4);
try {
try {
int currentBytesRead;
while ( (currentBytesRead = in.read(readBuffer, 0, readBuffer.length)) >= 0 ) {
fileBuffer.write(readBuffer, 0, currentBytesRead);
}
}
finally {
in.close();
}
}
catch ( IOException e ) {
throw new UserException.CouldNotReadInputFile("I/O error reading from input stream", e);
}
return fileBuffer.toByteArray();
}
/**
* Writes the given array of bytes to a file
*
* @param bytes Data to write
* @param destination File to write the data to
*/
public static void writeByteArrayToFile ( byte[] bytes, File destination ) {
if ( destination == null ) {
throw new GATKException("Destination file was null");
}
try {
writeByteArrayToStream(bytes, new FileOutputStream(destination));
}
catch ( FileNotFoundException e ) {
throw new UserException.CouldNotCreateOutputFile(destination, e);
}
}
/**
* Writes the given array of bytes to a stream
*
* @param bytes Data to write
* @param out Stream to write the data to
*/
public static void writeByteArrayToStream ( byte[] bytes, OutputStream out ) {
if ( bytes == null || out == null ) {
throw new GATKException("Data to write or output stream was null");
}
try {
try {
out.write(bytes);
}
finally {
out.close();
}
}
catch ( IOException e ) {
throw new UserException.CouldNotCreateOutputFile("I/O error writing to output stream", e);
}
}
/**
* Un-gzips the input file to the output file.
*/
public static void gunzip(File input, File output) {
try {
try (GZIPInputStream in = new GZIPInputStream(new FileInputStream(input));
OutputStream out = new FileOutputStream(output)) {
byte[] buf = new byte[4096];
int len;
while ((len = in.read(buf)) > 0) {
out.write(buf, 0, len);
}
}
} catch (IOException e){
throw new GATKException("Exception while unzipping a file:" + input + " to:" + output, e);
}
}
/**
* Un-gzips the input file to a output file but only if the file's name ends with '.gz'.
* In this case the new temp file is masked for deletion on exit and returned from this method.
* Otherwise, that is if the argument is not a gzipped file, this method just returns the argument.
*/
public static File gunzipToTempIfNeeded(File maybeGzipedFile) {
if (! maybeGzipedFile.getPath().endsWith(".gz")) {
return maybeGzipedFile;
}
final File result = IOUtils.createTempFile("unzippedFile", "tmp");
gunzip(maybeGzipedFile, result);
return result;
}
/**
* Makes a reader for a file, unzipping if the file's name ends with '.gz'.
*/
public static Reader makeReaderMaybeGzipped(Path path) throws IOException {
final InputStream in = new BufferedInputStream(Files.newInputStream(path));
// toString because path.endsWith only checks whole path components, not substrings.
return makeReaderMaybeGzipped(in, path.toString().endsWith(".gz"));
}
/**
* makes a reader for an inputStream wrapping it in an appropriate unzipper if necessary
* @param zipped is this stream zipped
*/
public static Reader makeReaderMaybeGzipped(InputStream in, boolean zipped) throws IOException {
if (zipped) {
return new InputStreamReader(makeZippedInputStream(in));
} else {
return new InputStreamReader(in);
}
}
/**
* creates an input stream from a zipped stream
* @return tries to create a block gzipped input stream and if it's not block gzipped it produces to a gzipped stream instead
* @throws ZipException if !in.markSupported()
*/
public static InputStream makeZippedInputStream(InputStream in) throws IOException {
Utils.nonNull(in);
if (BlockCompressedInputStream.isValidFile(in)) {
return new BlockCompressedInputStream(in);
} else {
return new GZIPInputStream(in);
}
}
/**
* Extracts the tar.gz file given by {@code tarGzFilePath}.
* Input {@link Path} MUST be to a gzipped tar file.
* Will extract contents in the containing folder of {@code tarGzFilePath}.
* Will throw an exception if files exist already.
* @param tarGzFilePath {@link Path} to a gzipped tar file for extraction.
*/
public static void extractTarGz(final Path tarGzFilePath) {
extractTarGz(tarGzFilePath, tarGzFilePath.getParent(), false);
}
/**
* Extracts the tar.gz file given by {@code tarGzFilePath}.
* Input {@link Path} MUST be to a gzipped tar file.
* Will throw an exception if files exist already.
* @param tarGzFilePath {@link Path} to a gzipped tar file for extraction.
* @param destDir {@link Path} to the directory where the contents of {@code tarGzFilePath} will be extracted.
*/
public static void extractTarGz(final Path tarGzFilePath, final Path destDir) {
extractTarGz(tarGzFilePath, destDir, false);
}
/**
* Extracts the tar.gz file given by {@code tarGzFilePath}.
* Input {@link Path} MUST be to a gzipped tar file.
* @param tarGzFilePath {@link Path} to a gzipped tar file for extraction.
* @param destDir {@link Path} to the directory where the contents of {@code tarGzFilePath} will be extracted.
* @param overwriteExistingFiles If {@code true}, will enable overwriting of existing files. If {@code false}, will cause an exception to be thrown if files exist already.
*/
public static void extractTarGz(final Path tarGzFilePath, final Path destDir, final boolean overwriteExistingFiles) {
logger.info("Extracting data from archive: " + tarGzFilePath.toUri());
// Create a stream for the data sources input.
// (We know it will be a tar.gz):
try ( final InputStream fi = Files.newInputStream(tarGzFilePath);
final InputStream bi = new BufferedInputStream(fi);
final InputStream gzi = new GzipCompressorInputStream(bi);
final TarArchiveInputStream archiveStream = new TarArchiveInputStream(gzi)) {
extractFilesFromArchiveStream(archiveStream, tarGzFilePath, destDir, overwriteExistingFiles);
}
catch (final IOException ex) {
throw new UserException("Could not extract data from: " + tarGzFilePath.toUri(), ex);
}
}
private static void extractFilesFromArchiveStream(final TarArchiveInputStream archiveStream,
final Path localTarGzPath,
final Path destDir,
final boolean overwriteExistingFiles) throws IOException {
// Adapted from: http://commons.apache.org/proper/commons-compress/examples.html
// Go through the archive and get the entries:
TarArchiveEntry entry;
while ((entry = archiveStream.getNextEntry()) != null) {
logger.info("Extracting file: " + entry.getName());
// Make sure we can read the data for the entry:
if (!archiveStream.canReadEntryData(entry)) {
throw new UserException("Could not read data from archive file(" + localTarGzPath.toUri() + "): " + entry.getName());
}
// Get the path for the entry on disk and make sure it's OK:
final Path extractedEntryPath = destDir.resolve(entry.getName()).normalize();
ensurePathIsOkForOutput(extractedEntryPath, overwriteExistingFiles);
// Now we can create the entry in our output location:
if (entry.isDirectory()) {
Files.createDirectories(extractedEntryPath);
}
else {
// Make sure the parent directory exists:
Files.createDirectories(extractedEntryPath.getParent());
if ( entry.isFIFO() ) {
// Handle a fifo file:
createFifoFile(extractedEntryPath, overwriteExistingFiles);
}
else if ( entry.isSymbolicLink() ) {
// Handle a symbolic link:
final String linkName = entry.getLinkName();
// If the link already exists, we must clear it:
if ( Files.exists(extractedEntryPath) && overwriteExistingFiles ) {
removeFileWithWarning(extractedEntryPath);
}
Files.createSymbolicLink(extractedEntryPath, Paths.get(linkName));
}
else if ( entry.isLink() ) {
// Handle a hard link:
final String linkName = entry.getLinkName();
// If the link already exists, we must clear it:
if ( Files.exists(extractedEntryPath) && overwriteExistingFiles ) {
removeFileWithWarning(extractedEntryPath);
}
Files.createLink(extractedEntryPath, Paths.get(linkName));
}
else if ( entry.isFile() ) {
// Handle a (default) file entry:
// Create the output file from the stream:
try (final OutputStream o = Files.newOutputStream(extractedEntryPath)) {
org.apache.commons.io.IOUtils.copy(archiveStream, o);
}
}
else {
// Right now we don't know how to handle any other file types:
throw new UserException("Cannot extract file from tar.gz (unknown type): " + entry.toString());
}
}
}
}
public static void writeTarGz(String name, File... files) throws IOException {
try (TarArchiveOutputStream taos = new TarArchiveOutputStream(new GzipCompressorOutputStream(new FileOutputStream(name)))){
// TAR has an 8 gig file limit by default, this gets around that
taos.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_STAR);
// TAR originally didn't support long file names, so enable the support for it
taos.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
taos.setAddPaxHeadersForNonAsciiNames(true);
for (File file : files){
addToTar(taos, file, ".");
}
}
}
private static void addToTar(TarArchiveOutputStream out, File file, String dir) throws IOException {
String entry = dir + File.separator + file.getName();
if (file.isFile()){
out.putArchiveEntry(new TarArchiveEntry(file, entry));
try (FileInputStream in = new FileInputStream(file)){
org.apache.commons.io.IOUtils.copy(in, out);
}
out.closeArchiveEntry();
} else if (file.isDirectory()) {
File[] children = file.listFiles();
if (children != null){
for (File child : children){
addToTar(out, child, entry);
}
}
} else {
System.out.println(file.getName() + " is not supported");
}
}
private static void ensurePathIsOkForOutput(final Path p, final boolean overwriteExistingFiles) {
if ( Files.exists(p) ) {
if ( overwriteExistingFiles ) {
logger.warn("Overwriting existing output destination: " + p.toUri());
}
else {
throw new UserException("Output destination already exists: " + p.toUri());
}
}
}
/**
* Create a Unix FIFO file with the given path string.
* If requested file already exists, will throw an exception.
* Will throw an Exception on failure.
* @param fifoFilePath {@link Path} to the FIFO file to be created.
* @return The {@link File} object pointing to the created FIFO file.
*/
public static File createFifoFile(final Path fifoFilePath) {
return createFifoFile(fifoFilePath, false);
}
private static void removeFileWithWarning(final Path filePath) {
logger.warn("File already exists in path. Replacing existing file: " + filePath.toUri());
try {
Files.delete(filePath);
}
catch (final IOException ex) {
throw new UserException("Could not replace existing file: " + filePath.toUri());
}
}
/**
* Create a Unix FIFO file with the given path string.
* Will throw an Exception on failure.
* @param fifoFilePath {@link Path} to the FIFO file to be created.
* @param overwriteExisting If {@code true} will overwrite an existing file in the requested location for the FIFO file. If {@code false} will throw an exception if the file exists.
* @return The {@link File} object pointing to the created FIFO file.
*/
public static File createFifoFile(final Path fifoFilePath, final boolean overwriteExisting) {
// Make sure we're allowed to create the file:
if ( Files.exists(fifoFilePath) ) {
if ( (!overwriteExisting) ) {
throw new UserException("Cannot create fifo file. File already exists: " + fifoFilePath.toUri());
}
else {
removeFileWithWarning(fifoFilePath);
}
}
// Create the FIFO by executing mkfifo via another ProcessController
final ProcessSettings mkFIFOSettings = new ProcessSettings(new String[]{"mkfifo", fifoFilePath.toFile().getAbsolutePath()});
mkFIFOSettings.getStdoutSettings().setBufferSize(-1);
mkFIFOSettings.setRedirectErrorStream(true);
// Now perform the system call:
final ProcessController mkFIFOController = new ProcessController();
final ProcessOutput result = mkFIFOController.exec(mkFIFOSettings);
final int exitValue = result.getExitValue();
final File fifoFile = fifoFilePath.toFile();
// Make sure we're OK:
if (exitValue != 0) {
throw new GATKException(String.format(
"Failure creating FIFO named (%s). Got exit code (%d) stderr (%s) and stdout (%s)",
fifoFilePath.toFile().getAbsolutePath(),
exitValue,
result.getStderr() == null ? "" : result.getStderr().getBufferString(),
result.getStdout() == null ? "" : result.getStdout().getBufferString()));
} else if (!fifoFile.exists()) {
throw new GATKException(String.format("FIFO (%s) created but doesn't exist", fifoFilePath.toFile().getAbsolutePath()));
} else if (!fifoFile.canWrite()) {
throw new GATKException(String.format("FIFO (%s) created isn't writable", fifoFilePath.toFile().getAbsolutePath()));
}
return fifoFile;
}
/**
* Makes a print stream for a file, gzipping on the fly if the file's name ends with '.gz'.
*/
public static PrintStream makePrintStreamMaybeGzipped(GATKPath filePath) throws IOException {
if (filePath.hasExtension(".gz")) {
return new PrintStream(new GZIPOutputStream(filePath.getOutputStream()));
} else {
return new PrintStream(filePath.getOutputStream());
}
}
/**
* Creates a temp file that will be deleted on exit
*
* This will also mark the corresponding Tribble/Tabix/BAM indices matching the temp file for deletion.
* @param name Prefix of the file; {@link File#createTempFile(String, String, File)} requires that this be >= 3 characters
* @param extension Extension to concat to the end of the file.
* @return A file in the temporary directory starting with name, ending with extension, which will be deleted after the program exits.
*/
public static File createTempFile(String name, String extension) {
return createTempFileInDirectory(name, extension, null);
}
/**
* Creates a temp file in a target directory that will be deleted on exit
*
* This will also mark the corresponding Tribble/Tabix/BAM indices matching the temp file for deletion.
* @param name Prefix of the file; {@link File#createTempFile(String, String, File)} requires that this be >= 3 characters
* @param extension Extension to concat to the end of the file name.
* @param targetDir Directory in which to create the temp file. If null, the default temp directory is used.
* @return A file in the temporary directory starting with name, ending with extension, which will be deleted after the program exits.
*/
public static File createTempFileInDirectory(final String name, String extension, final File targetDir) {
try {
if ( !extension.startsWith(".") ) {
extension = "." + extension;
}
final File file = File.createTempFile(name, extension, targetDir);
file.deleteOnExit();
// Mark corresponding indices for deletion on exit as well just in case an index is created for the temp file:
new File(file.getAbsolutePath() + FileExtensions.TRIBBLE_INDEX).deleteOnExit();
new File(file.getAbsolutePath() + FileExtensions.TABIX_INDEX).deleteOnExit();
new File(file.getAbsolutePath() + ".bai").deleteOnExit();
new File(file.getAbsolutePath() + ".md5").deleteOnExit();
new File(file.getAbsolutePath().replaceAll(extension + "$", ".bai")).deleteOnExit();
return file;
} catch (IOException ex) {
throw new GATKException("Cannot create temp file: " + ex.getMessage(), ex);
}
}
/**
* Creates a temp path that will be deleted on exit.
*
* This will also mark the corresponding Tribble/Tabix/BAM indices matching the temp file for deletion.
*
* @param name Prefix of the file.
* @param extension Extension to concat to the end of the file.
*
* @return A file in the temporary directory starting with name, ending with extension, which will be deleted after the program exits.
*/
public static Path createTempPath(String name, String extension) {
try {
if ( !extension.startsWith(".") ) {
extension = "." + extension;
}
final Path path = Files.createTempFile(getPath(System.getProperty("java.io.tmpdir")), name, extension);
IOUtils.deleteOnExit(path);
// Mark corresponding indices for deletion on exit as well just in case an index is created for the temp file:
final String filename = path.getFileName().toString();
IOUtils.deleteOnExit(path.resolveSibling(filename + FileExtensions.TRIBBLE_INDEX));
IOUtils.deleteOnExit(path.resolveSibling(filename + FileExtensions.TABIX_INDEX));
IOUtils.deleteOnExit(path.resolveSibling(filename + FileExtensions.BAI_INDEX));
IOUtils.deleteOnExit(path.resolveSibling(filename.replaceAll(extension + "$", ".bai")));
IOUtils.deleteOnExit(path.resolveSibling(filename + ".md5"));
return path;
} catch (final IOException ex) {
throw new GATKException("Cannot create temp file: " + ex.getMessage(), ex);
}
}
/**
* @param extension a file extension, may include 0 or more leading dots which will be replaced with a single dot
* @return replace the final extension on a path with the given extension
*/
public static String replaceExtension(String path, String extension){
Utils.nonNull(path);
Utils.nonNull(extension);
final String extensionNoLeadingDot = StringUtils.stripStart(extension, ".");
return FilenameUtils.removeExtension(path) + '.' + extensionNoLeadingDot;
}
public static File replaceExtension(File file, String extension){
return new File(replaceExtension(file.getPath(), extension));
}
/**
* Converts the given URI to a {@link Path} object. If the filesystem cannot be found in the usual way, then attempt
* to load the filesystem provider using the thread context classloader. This is needed when the filesystem
* provider is loaded using a URL classloader (e.g. in spark-submit).
*
* Also makes an attempt to interpret the argument as a file name if it's not a URI.
*
* @param uriString the URI to convert.
* @return the resulting {@code Path}
* @throws UserException if an I/O error occurs when creating the file system
*/
public static Path getPath(String uriString) {
Utils.nonNull(uriString);
URI uri;
try {
uri = URI.create(uriString);
} catch (IllegalArgumentException x) {
// not a valid URI. Caller probably just gave us a file name.
return Paths.get(uriString);
}
try {
// special case GCS, in case the filesystem provider wasn't installed properly but is available.
if (CloudStorageFileSystem.URI_SCHEME.equals(uri.getScheme())) {
return BucketUtils.getPathOnGcs(uriString);
}
// Paths.get(String) assumes the default file system
// Paths.get(URI) uses the scheme
return uri.getScheme() == null ? Paths.get(uriString) : Paths.get(uri);
} catch (FileSystemNotFoundException e) {
try {
ClassLoader cl = Thread.currentThread().getContextClassLoader();
if ( cl == null ) {
throw e;
}
return FileSystems.newFileSystem(uri, new HashMap<>(), cl).provider().getPath(uri);
}
catch (ProviderNotFoundException x) {
// TODO: this creates bogus Path on the current file system for schemes such as gendb, nonexistent, gcs
// TODO: we depend on this code path to allow IntervalUtils to all getPath on a string that may be either
// a literal interval or a feature file containing intervals
// not a valid URI. Caller probably just gave us a file name or "chr1:1-2".
return Paths.get(uriString);
}
catch ( IOException io ) {
throw new UserException(uriString + " is not a supported path", io);
}
}
}
/**
* Appends path to the given parent dir. Parent dir could be a URI or a File.
* @param dir the folder to append the path to
* @param path the path relative to dir.
* @return the appended path as a String if path is relative, else path is returned.
*/
public static String appendPathToDir(String dir, String path) {
if (path.startsWith("/")) { // Already an absolute path
return path;
}
if (BucketUtils.isRemoteStorageUrl(dir) || BucketUtils.isFileUrl(dir)) {
Path dirPath = getPath(dir);
return dirPath.resolve(path).toUri().toString();
} else {
return new File(dir, path).getPath();
}
}
/**
* Gets the absolute Path name with the URI marker, handling the special case of the default file system by removing
* the file:// prefix.
*
* @param path path to get the absolute name.
* @return a String with the absolute name, and the file:// protocol removed, if it was present.
*/
public static String getAbsolutePathWithoutFileProtocol(final Path path) {
return path.toAbsolutePath().toUri().toString().replaceFirst("^file://", "");
}
/**
* @param path Path to test
* @throws org.broadinstitute.hellbender.exceptions.UserException.CouldNotReadInputFile if the file isn't readable
* and a regular file
*/
public static void assertFileIsReadable(final Path path) {
Utils.nonNull(path);
try {
if ( ! Files.exists(path) ) {
throw new UserException.CouldNotReadInputFile(path, "It doesn't exist.");
}
if ( ! Files.isRegularFile(path) ) {
throw new UserException.CouldNotReadInputFile(path, "It isn't a regular file");
}
if ( ! Files.isReadable(path) ) {
throw new UserException.CouldNotReadInputFile(path, "It is not readable, check the file permissions");
}
} catch (com.google.cloud.storage.StorageException cloudBoom) {
// probably a permissions problem, or perhaps a disabled account.
// Looks like this for a disabled bucket error:
// A USER ERROR has occurred: Couldn't read file gs://foo/bar. Error was:
// 403: The account for bucket "foo" has been disabled.
// For no access, it looks like this:
// (use `gcloud auth application-default revoke` to forget the default credentials)
// A USER ERROR has occurred: Couldn't read file gs://(...). Error was:
// 401: Anonymous users does not have storage.objects.get access to object (...).
// The user can see the underlying exception by passing
// -DGATK_STACKTRACE_ON_USER_EXCEPTION=true
throw new UserException.CouldNotReadInputFile(path, cloudBoom.getCode() + ": " + cloudBoom.getMessage(), cloudBoom);
}
}
/**
*
* @param paths paths to test, as Strings
* @throws org.broadinstitute.hellbender.exceptions.UserException.CouldNotReadInputFile if any of the paths aren't
* readable and a regular file
*/
public static void assertPathsAreReadable(final String ... paths) {
for (String path : paths) {
IOUtils.assertFileIsReadable(IOUtils.getPath(path));
}
}
/**
* Checks that one or more user provided files are in fact regular (i.e. not a directory or a special device) readable files.
*
* @param files the input files to test.
* @throws IllegalArgumentException if any input file {@code file} is {@code null} or {@code files} is {@code null}.
* @throws UserException if any {@code file} is not a regular file or it cannot be read.
*/
public static void canReadFile( final File... files) {
Utils.nonNull(files, "Unexpected null input.");
for (final File file : files) {
Utils.nonNull(file, "Unexpected null file reference.");
if (!file.exists()) {
throw new UserException.CouldNotReadInputFile(file.getAbsolutePath(), "The input file does not exist.");
} else if (!file.isFile()) {
throw new UserException.CouldNotReadInputFile(file.getAbsolutePath(), "The input file is not a regular file");
} else if (!file.canRead()) {
throw new UserException.CouldNotReadInputFile(file.getAbsolutePath(), "The input file cannot be read. Check the permissions.");
}
}
}
/**
* Creates a directory, in local FS, HDFS, or Google buckets to write individual files in.
*/
public static void createDirectory(final String pathString) throws IOException {
Utils.nonNull(pathString);
Files.createDirectory(getPath(pathString));
}
public static String urlEncode(final String string) {
try {
return URLEncoder.encode(string, GetSampleName.STANDARD_ENCODING);
} catch (final UnsupportedEncodingException ex) {
throw new UserException("Could not encode sample name", ex);
}
}
public static String urlDecode(final String string) {
try {
return URLDecoder.decode(string, GetSampleName.STANDARD_ENCODING);
} catch (final UnsupportedEncodingException ex) {
throw new UserException("Could not decode sample name", ex);
}
}
/**
* Check if a given GATKPath represents a GenomicsDB URI.
*
* @param pathSpec {@code GATKPath} containing the path to test
* @return true if path represents a GenomicsDB URI, otherwise false
*/
public static boolean isGenomicsDBPath(final GATKPath pathSpec) {
return getGenomicsDBPath(pathSpec) != null;
}
/**
* Check if a given path represents GenomicsDB URI.
*
* @param path String containing the path to test
* @return true if path represents a GenomicsDB URI, otherwise false
*/
public static boolean isGenomicsDBPath(final String path) {
return getGenomicsDBPath(path) != null;
}
/**
* Get the GenomicsDB equivalent absolute URL for a given path
*
* @param genomicsDBPath String representing legal gendb URI
* @return absolute gendb URI to the path
*/
public static String getAbsolutePathWithGenomicsDBURIScheme(final GATKPath genomicsDBPath) {
String path = getGenomicsDBAbsolutePath(genomicsDBPath);
if (path == null) {
return null;
} else if (path.contains("://")) {
return GENOMIC_DB_URI_SCHEME + "." + path;
} else {
return GENOMIC_DB_URI_SCHEME + "://" + path;
}
}
/**
* Gets the absolute Path for a GenomicsDB path
*
* @param gendbPath gendb URI
* @return absolute name to the given GenomicsDB path
* @see #getGenomicsDBPath(String)
*/
public static String getGenomicsDBAbsolutePath(final GATKPath gendbPath) {
String path = getGenomicsDBPath(gendbPath);
if (path == null) {
return null;
} else if (path.contains("://")) {
return path;
} else {
return new File(path).getAbsolutePath();
}
}
/**
* If path is prefaced with gendb:// or gendb.CloudURIScheme://, this method returns an absolute path acceptable
* by GenomicsDB by stripping off gendb:// for files or gendb. for Cloud URIs respectively .
* Otherwise, returns null.
*
* @param path GenomicsDB paths that start with gendb:// or gendb.CloudURIScheme://
* Following are valid gendb URI examples
*
* - gendb://my_folder
*
- gendb:///my_abs_folder
*
- gendb.hdfs://name_node/my_folder
*
- gendb.gs://my_bucket/my_folder
*
- gendb.s3://my_bucket/my_folder
*
* @return Valid GenomicsDB path or null
*/
public static String getGenomicsDBPath(final GATKPath path) {
return getGenomicsDBPath(path.getRawInputString());
}
/**
* If path is prefaced with gendb:// or gendb.CloudURIScheme://, this method returns an absolute path acceptable
* by GenomicsDB by stripping off gendb:// for files or gendb. for Cloud URIs respectively .
* Otherwise, returns null.
*
* @param path GenomicsDB paths that start with gendb:// or gendb.CloudURIScheme://
* Following are valid gendb URI examples
*
* - gendb://my_folder
*
- gendb:///my_abs_folder
*
- gendb.hdfs://name_node/my_folder
*
- gendb.gs://my_bucket/my_folder
*
- gendb.s3://my_bucket/my_folder
*
* @return Valid GenomicsDB path or null
*/
public static String getGenomicsDBPath(final String path) {
// GENOMICSDB_URI_PATTERN = Pattern.compile("^" + GENOMIC_DB_URI_SCHEME + "(\\.?)(.*)(://)(.*)");
// gendb.supportedCloudURI://
// ^^group2^^ ^^group4^^
String genomicsdbPath = null;
if (path != null && path.startsWith(GENOMIC_DB_URI_SCHEME)) { // Check if path starts with "gendb"
Matcher matcher = GENOMICSDB_URI_PATTERN.matcher(path);
if (matcher.find() && !matcher.group(3).isEmpty()) { // path contains "://"
if (!matcher.group(1).isEmpty()) { // path has a period after gendb, so it is a URI
if (!matcher.group(2).isEmpty()) { //path has a scheme, so it is valid URI for GenomicsDB
genomicsdbPath = matcher.group(2) + matcher.group(3) + matcher.group(4);
}
} else if (matcher.group(2).isEmpty()) {
genomicsdbPath = matcher.group(4);
}
}
}
return genomicsdbPath;
}
/**
* Schedule a file or directory to be deleted on JVM shutdown.
*
* This calls {@link IOUtils#deleteRecursively(Path)} on {@code fileToDelete }as a shutdown hook.
* @param fileToDelete file or directory to be deleted recursively at JVM shutdown.
*/
public static void deleteOnExit(final Path fileToDelete){
DeleteRecursivelyOnExitPathHook.add(fileToDelete);
}
/**
* Delete rootPath recursively
* @param rootPath is the file/directory to be deleted
*/
public static void deleteRecursively(final Path rootPath) {
IOUtil.recursiveDelete(rootPath);
}
/**
* Convert File to Path, returning null if input was null.
*
* @param toConvert File to convert to a Path
* @return a Path, or null if the input was null.
*/
public static Path fileToPath(File toConvert) {
return (null == toConvert ? null : toConvert.toPath());
}
/**
* Strips extension from the given path, if it has one. Note it will use the first matching extension in the list.
*
* @param path Path to modify. May not be null.
* @param extensions Possible extensions to remove, in order of priority
* @return Resulting path
*/
public static Path removeExtension(final Path path, final List extensions) {
Utils.nonNull(path);
Utils.nonNull(extensions);
final String pathString = path.toString();
for (final String testExtension : extensions) {
if (pathString.endsWith(testExtension)) {
return Paths.get(pathString.substring(0, pathString.length() - testExtension.length()));
}
}
return path;
}
/**
* A simple helper method that reads a zipped archive and unzippes it to a file target, preserving the directory structure.
*
* @param zippedArchive A Path to a zipped archive
* @param toUnzipFolder A Path to a folder into which to place the archive contents
* @throws IOException
*/
public static void unzipToFolder(final Path zippedArchive, final Path toUnzipFolder) throws IOException {
ZipFile resultZip = new ZipFile(zippedArchive.toFile());
Enumeration extends ZipEntry> entries = resultZip.entries();
while(entries.hasMoreElements()) {
ZipEntry current = entries.nextElement();
String suffix = current.toString();
if (!current.isDirectory()) {
// if the entry is a file, extracts it
try {
Path entryTarget = toUnzipFolder.resolve(suffix);
entryTarget.getParent().toFile().mkdirs();
extractZipStreamToFile(resultZip.getInputStream(current), entryTarget.toString());
} catch (IOException e) {
e.printStackTrace();
}
} else {
// if the entry is a directory, make the directory
new File(String.valueOf(toUnzipFolder.resolve(suffix))).mkdirs();
}
}
}
/**
* Extracts a zip entry (file entry)
*/
private static void extractZipStreamToFile(InputStream zipIn, String filePath) throws IOException {
BufferedInputStream bis = new BufferedInputStream(zipIn);
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(filePath));
byte[] bytesIn = new byte[1024 * 1024];
int read = 0;
while ((read = bis.read(bytesIn)) != -1) {
bos.write(bytesIn, 0, read);
}
bos.close();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy