All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.nio.NioFileCopierWithProgressMeter Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.utils.nio;

import org.apache.commons.collections4.queue.CircularFifoQueue;
import org.apache.commons.io.FileUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;

import javax.xml.bind.DatatypeConverter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.time.Duration;
import java.time.Instant;
import java.time.ZoneId;
import java.util.Queue;

/**
 * Class to copy a file using {@link java.nio}.
 * Operates using paths.
 *
 * INSTANCES OF THIS CLASS ARE NOT THREAD-SAFE!
 *
 * Created by jonn on 8/27/18.
 */
public class NioFileCopierWithProgressMeter {

    //==================================================================================================================
    // Standard logger:
    private static final Logger logger = LogManager.getLogger(NioFileCopierWithProgressMeter.class);

    //==================================================================================================================
    // Public Static Members:

    //==================================================================================================================
    // Private Static Members:

    protected static final int    BUFFER_SIZE_BYTES                          = 1024 * 1024;
    protected static final double DEFAULT_PROGRESS_DISPLAY_PERCENT_INCREMENT = 0.25;

    protected static final long SECOND_IN_MS = 1000;
    protected static final long MINUTE_IN_MS = SECOND_IN_MS * 60;
    protected static final long HOUR_IN_MS   = MINUTE_IN_MS * 60;
    protected static final long DAY_IN_MS    = HOUR_IN_MS * 24;

    protected static final int KB_TO_BYTES      = 1024;
    protected static final int MS_TO_SEC        = 1000;
    protected static final int NANOS_TO_MILLIS  = 1000000;
    protected static final int NANOS_TO_SECONDS = 1000000000;

    protected static final int COPY_SPEED_HISTORY_SIZE = 10;

    protected static final boolean OVERWRITE_EXISTING_DEFAULT   = false;
    protected static final Verbosity VERBOSITY_DEFAULT          = Verbosity.MODERATE;

    //==================================================================================================================
    // Private Members:

    // Data variables:
    protected final Path source;
    protected final Path dest;

    protected long srcFileSize;
    protected int srcFileSizeNumDigits;

    protected String        checksum         = "";
    protected MessageDigest messageDigest    = null;
    protected String        expectedChecksum = "";

    // Flag defaults:
    protected boolean   overwriteExisting              = OVERWRITE_EXISTING_DEFAULT;
    protected Verbosity verbosity                      = Verbosity.MODERATE;
    protected boolean   formatTimeRemainingAsTimestamp = true;

    // Copy buffer:
    protected final byte          copyBuffer[]                = new byte[ BUFFER_SIZE_BYTES ];

    // Progress variables:
    protected double progressPercentDisplayIncrement             = DEFAULT_PROGRESS_DISPLAY_PERCENT_INCREMENT;
    protected final Queue downloadBytesPerMilliSecond = new CircularFifoQueue<>(COPY_SPEED_HISTORY_SIZE);
    protected       boolean       copyComplete                = false;
    protected       long          totalBytesRead              = 0;
    protected       long          progressBytesRead           = 0;
    protected       double        lastProgressValue           = 0;
    protected long lastProgressTime_ns;

    //==================================================================================================================
    // Constructors:

    /**
     * {@link NioFileCopierWithProgressMeter} uses a factory pattern.
     * This internal constructor is to be used by the class itself.
     * @param source The {@link Path} to the source file for the copy.
     * @param dest The {@link Path} to the destination file for the copy.
     * @param overwriteExisting If {@code true} will overwrite an existing file in the location specified by {@code dest}.
     * @param verbosity {@link Verbosity} of the progress progress log over the duration of the copy.
     */
    protected NioFileCopierWithProgressMeter(final Path source, final Path dest, final boolean overwriteExisting, final Verbosity verbosity) {
        this.source = source.toAbsolutePath();
        this.dest = dest.toAbsolutePath();
        this.overwriteExisting = overwriteExisting;
        this.verbosity = verbosity;
    }

    //==================================================================================================================
    // Static Methods:

    /**
     * Create an {@link NioFileCopierWithProgressMeter}.
     * By default the resulting {@link NioFileCopierWithProgressMeter} will not overwrite the destination if anything already exists there.
     * @param source The {@link Path} to the source file for the copy.
     * @param dest The {@link Path} to the destination file for the copy.
     * @return An {@link NioFileCopierWithProgressMeter} initialized to copy the file located at {@code source} to the location specified by {@code dest}.
     */
    public static NioFileCopierWithProgressMeter create(final Path source, final Path dest) {
        return create(source, dest, OVERWRITE_EXISTING_DEFAULT);
    }

    /**
     * Create an {@link NioFileCopierWithProgressMeter}.
     * Will periodically display progress of copying files.
     * @param source The {@link Path} to the source file for the copy.
     * @param dest The {@link Path} to the destination file for the copy.
     * @param overwriteExisting If {@code true} will overwrite an existing file in the location specified by {@code dest}.
     * @return An {@link NioFileCopierWithProgressMeter} initialized to copy the file located at {@code source} to the location specified by {@code dest}.
     */
    public static NioFileCopierWithProgressMeter create(final Path source, final Path dest, final boolean overwriteExisting) {
        return new NioFileCopierWithProgressMeter(source, dest, overwriteExisting, VERBOSITY_DEFAULT);
    }

    /**
     * Create an {@link NioFileCopierWithProgressMeter}.
     * @param source The {@link Path} to the source file for the copy.
     * @param dest The {@link Path} to the destination file for the copy.
     * @param overwriteExisting If {@code true} will overwrite an existing file in the location specified by {@code dest}.
     * @param verbosity {@link Verbosity} of the progress progress log over the duration of the copy.
     * @return An {@link NioFileCopierWithProgressMeter} initialized to copy the file located at {@code source} to the location specified by {@code dest}.
     */
    public static NioFileCopierWithProgressMeter create(final Path source, final Path dest, final boolean overwriteExisting, final Verbosity verbosity) {
        return new NioFileCopierWithProgressMeter(source, dest, overwriteExisting, verbosity);
    }

    //==================================================================================================================
    // Getters / Setters:

    /**
     * @return A copy of the {@link Path} used as the source for this {@link NioFileCopierWithProgressMeter}.
     */
    public Path getSource() {
        return source;
    }

    /**
     * @return A copy of the {@link Path} used as the destination for this {@link NioFileCopierWithProgressMeter}.
     */
    public Path getDest() {
        return dest;
    }

    /**
     * @return {@code true} iff the {@link #source} has been copied to the {@link #dest} in this {@link NioFileCopierWithProgressMeter}.  {@code false} otherwise.
     */
    public boolean isCopyComplete() {
        return copyComplete;
    }

    /**
     * @return {@code true} iff this {@link NioFileCopierWithProgressMeter} will overwrite {@link #dest} when copying.  {@code false} otherwise.
     */
    public boolean isOverwriteExisting() {
        return overwriteExisting;
    }

    /**
     * Sets whether this {@link NioFileCopierWithProgressMeter} will overwrite {@link #dest} when copying.
     */
    public NioFileCopierWithProgressMeter setOverwriteExisting(final boolean overwriteExisting) {
        this.overwriteExisting = overwriteExisting;

        return this;
    }

    /**
     * Sets the {@link #messageDigest} and expected checksum to be used by this {@link NioFileCopierWithProgressMeter} to validate the copied file.
     * NOTE: Setting these values will automatically cause the file to be checked for integrity upon copy completion.
     * @param algorithm {@link String} specifying the checksum algorithm to be used to calculate the checksum of the copied file.
     * @param expectedChecksum Expected value of the checksum calculated by the given {@code messageDigest} for the copied file.
     */
    public NioFileCopierWithProgressMeter setChecksumAlgorithmAndExpectedChecksum(final String algorithm,
                                                                                  final String expectedChecksum) {
        try {
            this.messageDigest = MessageDigest.getInstance(algorithm);
        }
        catch ( final NoSuchAlgorithmException ex ) {
            throw new IllegalArgumentException("Provided checksum algorithm does not exist: " + algorithm, ex);
        }
        this.expectedChecksum = expectedChecksum;

        return this;
    }

    /**
     * Sets the logger to log the time remaining in timestamp format ala 'DD:HH:MM:ss.SS'.
     */
    public NioFileCopierWithProgressMeter setFormatTimeRemainingAsTimestamp() {
        formatTimeRemainingAsTimestamp = true;

        return this;
    }

    /**
     * Sets the logger to log the time remaining in word format ala 'D days, H hours, M minutes, s seconds'.
     */
    public NioFileCopierWithProgressMeter setFormatTimeRemainingAsWords() {
        formatTimeRemainingAsTimestamp = false;

        return this;
    }

    /**
     * @return The {@link Verbosity} at which this {@link NioFileCopierWithProgressMeter} will log copy progress.
     */
    public Verbosity getVerbosity() {
        return verbosity;
    }

    /**
     * Sets the progress meter {@link #verbosity} of this {@link NioFileCopierWithProgressMeter}.
     */
    public NioFileCopierWithProgressMeter setVerbosity(final Verbosity verbosity) {
        this.verbosity = verbosity;

        return this;
    }

    //==================================================================================================================
    // Instance Methods:

    protected void updateMessageDigest(final byte[] copyBuffer, final int startIndex, final int endIndex) {
        if ( messageDigest != null ) {
            messageDigest.update(copyBuffer, startIndex, endIndex - startIndex);
        }
    }

    protected void calculateChecksumFromMessageDigest() {
        if ( messageDigest != null ) {
            checksum = DatatypeConverter.printHexBinary(messageDigest.digest());
        }
    }

    protected boolean isSilent() {
        return verbosity == Verbosity.SILENT;
    }

    protected String formatMillisecondsTime(final long time_ms) {
        if ( formatTimeRemainingAsTimestamp ) {
            return new AsTimeTimeFormatter(time_ms).format();
        }
        else {
            return new AsWordsTimeFormatter(time_ms).format();
        }
    }

    protected void logProgress(final double progressValue, final long totalBytesRead, final double bytesPerMillisecond) {
        if ( verbosity == Verbosity.VERBOSE ) {
            logProgressVerbose(progressValue, totalBytesRead, bytesPerMillisecond);
        }
        else if (verbosity.isAbove(Verbosity.MINIMAL)) {
            logProgressSimple(progressValue, totalBytesRead, bytesPerMillisecond);
        }
    }


    protected Duration getRemainingDuration(final long totalBytesRead, final double bytesPerMillisecond) {
        final long     remainingFileSize_bytes  = srcFileSize - totalBytesRead;
        final double   estTimeRemaining_ms      = remainingFileSize_bytes / bytesPerMillisecond;
        return Duration.ofMillis((long) estTimeRemaining_ms);
    }

    protected void logProgressSimple(final double progressValue, final long totalBytesRead, final double bytesPerMillisecond) {

        // Get the remaining time estimate:
        final Duration estTimeRemainingDuration = getRemainingDuration(totalBytesRead, bytesPerMillisecond);

        logger.info(
                String.format("    Transfer: % 2.2f%% complete.  Est. time remaining: %s (@%3.02f kbps)",
                        progressValue,
                        formatMillisecondsTime(estTimeRemainingDuration.toMillis()),
                        bytesPerMillisecond / KB_TO_BYTES * MS_TO_SEC
                )
        );
    }

    protected void logProgressVerbose(final double progressValue, final long totalBytesRead, final double bytesPerMillisecond) {

        // Get the remaining time estimate:
        final Duration estTimeRemainingDuration = getRemainingDuration(totalBytesRead, bytesPerMillisecond);

        final Instant endTime = Instant.now().plus(estTimeRemainingDuration);

        logger.info(
                String.format("    Transfer: % 2.2f%% complete (%" + srcFileSizeNumDigits + "d bytes; %6s).  Est. time remaining: %s (Complete time: %s) (@%3.02f kbps)",
                        progressValue,
                        totalBytesRead,
                        FileUtils.byteCountToDisplaySize(totalBytesRead),
                        formatMillisecondsTime(estTimeRemainingDuration.toMillis()),
                        endTime.atZone(ZoneId.systemDefault()).toLocalDateTime().toString(),
                        bytesPerMillisecond / KB_TO_BYTES * MS_TO_SEC
                )
        );
    }

    private void initializeCopyProgressTime(final long startTime_ns) {

        // Track the time it takes to download each chunk:
        lastProgressTime_ns = startTime_ns;
    }

    protected void updateCopyProgress(final int bytesRead) {

        // Only bother with all this if we're logging in the first place:
        if ( !isSilent() ) {

            // Update our progress counters:
            totalBytesRead += bytesRead;
            progressBytesRead += bytesRead;

            // Get our progress percentage:
            final double rawProgressValuePercent = ((double) totalBytesRead / (double) srcFileSize) * 100.0;

            // Round our progress to nearest PROGRESS_DISPLAY_PERCENT_INCREMENT:
            final double progressValue = progressPercentDisplayIncrement * (Math.floor(Math.abs(rawProgressValuePercent / progressPercentDisplayIncrement)));

            // Output our progress if we're ready for it:
            if ( progressValue != lastProgressValue ) {

                // Update our time:
                final long   currentProgressTime_ns = System.nanoTime();
                final long   dt_ms                  = (currentProgressTime_ns - lastProgressTime_ns) / NANOS_TO_MILLIS;
                final double bytesPerMs             = ((double) progressBytesRead) / ((double) dt_ms);
                lastProgressTime_ns = currentProgressTime_ns;

                // Add the bytes/ms to our queue:
                downloadBytesPerMilliSecond.add(bytesPerMs);

                // Log our progress so far:
                logProgress(progressValue, totalBytesRead, downloadBytesPerMilliSecond.stream().mapToDouble(x -> x).average().orElse(dt_ms));

                // Get ready for the next call:
                lastProgressValue = progressValue;
                progressBytesRead = 0;
            }
        }
    }

    protected void determineProgessDisplayIncrement(final long fileSize) {
        // Simple checks on input file size to make sure we don't overwhelm or underwhelm the user with updates:
        // TODO: Refactor class to have download and logger in separate threads and display on every percentage complete or delta-time.

        final long SIZE_STEP = 1024;
        final long KB        = 1024;
        final long MB        = KB * SIZE_STEP;
        final long GB        = MB * SIZE_STEP;

        // 100Gb or larger:
        if ( fileSize >= (100*GB) ) {
            progressPercentDisplayIncrement = 0.1;
        }
        // 10Gb or larger:
        else if ( fileSize >= (10*GB) ) {
            progressPercentDisplayIncrement = 0.25;
        }
        // 5Gb or larger:
        else if ( fileSize >= (5*GB) ) {
            progressPercentDisplayIncrement = 0.5;
        }
        // 1Gb or larger:
        else if ( fileSize >= GB ) {
            progressPercentDisplayIncrement = 1;
        }
        // 100Mb or larger:
        else if ( fileSize >= (MB*100) ) {
            progressPercentDisplayIncrement = 5;
        }
        // 1Mb or larger:
        else if ( fileSize >= MB ) {
            progressPercentDisplayIncrement = 10;
        }
        // Less than 1Mb
        else {
            progressPercentDisplayIncrement = 25;
        }
    }

    protected void doCopy() {

        try ( final InputStream inputStream = Files.newInputStream(getSource());
              final OutputStream outputStream = Files.newOutputStream(getDest()) ){

            // Get the file size of our source file:
            srcFileSize = Files.size(getSource());
            srcFileSizeNumDigits = (int)Math.ceil(Math.log10(srcFileSize));

            determineProgessDisplayIncrement(srcFileSize);

            if ( verbosity.isAbove(Verbosity.SILENT) ) {
                logger.info("Initiating copy from " + getSource().toUri().toString() + " to " + getDest().toUri().toString());
                logger.info("File size: " + srcFileSize + " bytes (" + FileUtils.byteCountToDisplaySize(srcFileSize) + ").");
                logger.info("Please wait.  This could take a while...");
            }

            // Perform the copy:
            while (true) {

                // Read from our input:
                final int bytesRead = inputStream.read(copyBuffer);
                if ( bytesRead == -1 ) {
                    break;
                }

                // Write to our output:
                outputStream.write(copyBuffer, 0, bytesRead);

                // Update the message digest so we can calculate the file checksum on the fly:
                updateMessageDigest(copyBuffer, 0, bytesRead);

                // Update our internal progress meter:
                updateCopyProgress(bytesRead);
            }

            // Calculate the checksum from the message digest:
            calculateChecksumFromMessageDigest();
        }
        catch (final IOException ex) {
            throw new UserException("Could not copy file: " + source.toUri().toString() + " -> " + dest.toUri().toString(), ex);
        }
    }

    /**
     * Initiate the copy from {@link #source} to {@link #dest}.
     */
    public NioFileCopierWithProgressMeterResults initiateCopy() {

        // Make sure we haven't copied the file already:
        if (copyComplete) {
            throw new GATKException("Attempted multiple file copies.  NioFileCopierWithProgressMeter can copy a file only once!");
        }

        // Do a quick existence check for safety:
        if ( Files.exists(getDest()) ) {
            if ( !isOverwriteExisting() ) {
                throw new UserException.CouldNotCreateOutputFile(getDest().toUri().toString(), "Download aborted!  Output data sources file already exists!");
            }
            else if ( verbosity.isAbove(Verbosity.SILENT) ) {
                logger.warn("Destination already exists.  Overwriting file at location: " + getDest().toUri().toString());
            }
        }

        // Keep track of the start time so we can estimate completion time:
        final long startTime_ns = System.nanoTime();

        // Initialize our internal progress meter:
        initializeCopyProgressTime(startTime_ns);

        // Now copy from our source to our destination:
        doCopy();

        // Let the world know the glory that is a complete file copy:
        if ( verbosity.isAbove(Verbosity.SILENT) ) {
            logger.info(String.format("Download Complete! - Total elapsed time: %ds", ((System.nanoTime() - startTime_ns) / NANOS_TO_SECONDS)));
        }

        // Make sure we don't copy this file more than once:
        copyComplete = true;

        return new NioFileCopierWithProgressMeterResults(
                source,
                dest,
                srcFileSize,
                messageDigest != null,
                checksum,
                messageDigest == null ? "" : messageDigest.getAlgorithm(),
                expectedChecksum
        );
    }

    //==================================================================================================================
    // Helper Data Types:

    /**
     * An enum to allow for verbosity of logging progress of an {@link NioFileCopierWithProgressMeter}.
     */
    public enum Verbosity {
        /**
         * Output no logging messages whatsoever.
         */
        SILENT(0),
        /**
         * Output logging messages at the start and end of the copy, but no progress during.
         */
        MINIMAL(1),
        /**
         * Output basic progress information during the copy.
         */
        MODERATE(2),
        /**
         * Output verbose progress information during the copy.
         */
        VERBOSE(3);

        final private int sev;

        Verbosity(final int sev) { this.sev = sev; }

        public boolean isAbove(final Verbosity other) {
            return this.sev > other.sev;
        }
    }

    /**
     * An interface that defines a method to use to calculate a checksum on an {@link InputStream}.
     * Used to verify file contents are correct and have not been corrupted in-transit.
     */
    public interface ChecksumCalculator {
        String calculateChecksumOnInputStream(InputStream data) throws IOException;
    }

    /**
     * Simple class to keep track of time information and format it.
     */
    private abstract class SimpleTimeFormatter {

        final long rawTime_ms;
        final long days;
        final long hours;
        final long minutes;
        final long seconds;
        final long millis;

        SimpleTimeFormatter(final long time_ms) {
            rawTime_ms = time_ms;

            long remainder = time_ms;

            days = formatTimeHelper(remainder, DAY_IN_MS);
            remainder -= days * DAY_IN_MS;

            hours = formatTimeHelper(remainder, HOUR_IN_MS);
            remainder -= hours * HOUR_IN_MS;

            minutes = formatTimeHelper(remainder, MINUTE_IN_MS);
            remainder -= minutes * MINUTE_IN_MS;

            seconds = formatTimeHelper(remainder, SECOND_IN_MS);
            remainder -= seconds * SECOND_IN_MS;

            millis = remainder;
        }

        private long formatTimeHelper(final long duration, final long conversionFactor ) {
            final long outTime;
            if ( duration > conversionFactor ) {
                outTime = Math.floorDiv(duration, conversionFactor);
            }
            else {
                outTime = 0;
            }

            return outTime;
        }

        protected String sHelper(final long value) {
            return (value == 1 ? "" : "s");
        }

        public abstract String format();
    }

    private class AsWordsTimeFormatter extends SimpleTimeFormatter {

        AsWordsTimeFormatter(final long time_ms){
            super(time_ms);
        }

        public String format() {
            if ( days > 0 ) {
                return String.format("%d day" + sHelper(days) + ", %02d hour" + sHelper(hours) + ", %02d minute" + sHelper(minutes) + ", %2d.%03d seconds", days, hours, minutes, seconds, millis);
            }
            if ( hours > 0 ) {
                return String.format("%02d hour" + sHelper(hours) + ", %02d minute" + sHelper(minutes) + ", %02d.%03d seconds", hours, minutes, seconds, millis);
            }
            if ( minutes > 0 ) {
                return String.format("%02d minute" + sHelper(minutes) + ", %02d.%03d seconds", minutes, seconds, millis);
            }
            if ( seconds > 0 ) {
                return String.format("%02d.%03d seconds", seconds, millis);
            }
            return String.format("0.%03d seconds", millis);
        }
    }
    private class AsTimeTimeFormatter extends SimpleTimeFormatter {

        AsTimeTimeFormatter(final long time_ms){
            super(time_ms);
        }
        public String format() {

            if ( days > 0 ) {
                return String.format("%d:%02d:%02d:%02d.%03d", days, hours, minutes, seconds, millis);
            }
            if ( hours > 0 ) {
                return String.format("%02d:%02d:%02d.%03d", hours, minutes, seconds, millis);
            }
            if ( minutes > 0 ) {
                return String.format("%02d:%02d.%03d", minutes, seconds, millis);
            }
            if ( seconds > 0 ) {
                return String.format("%02d.%03d", seconds, millis);
            }
            return String.format("00.%03d", millis);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy