All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.wikibrain.download.FileDownloader Maven / Gradle / Ivy

There is a newer version: 0.9.1
Show newest version
package org.wikibrain.download;

import com.github.axet.wget.WGet;
import com.github.axet.wget.info.DownloadInfo;
import com.github.axet.wget.info.URLInfo;
import com.github.axet.wget.info.ex.DownloadIOCodeError;

import java.io.File;
import java.net.URL;
import java.util.concurrent.atomic.AtomicBoolean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A utility class to download files from urls.
 * Prints useful logging messages and retries upon failure with exponential backoffs.
 *
 * @author Shilad Sen
 */
public class FileDownloader {
    public static final Logger LOG = LoggerFactory.getLogger(FileDownloader.class);

    private static final int SLEEP_TIME = 500;     // getOneFile takes a break from downloading
    private static final int MAX_ATTEMPT = 10;      // number of attempts before getOneFile gives up downloading the dump
    private static final int DISPLAY_INFO = 10000;  // amount of time between displaying download progress
    private static final int BACKOFF_TIME = 20000;


    private int sleepTime = SLEEP_TIME;
    private int maxAttempts = MAX_ATTEMPT;
    private int displayInfo = DISPLAY_INFO;
    private int backoffTime = BACKOFF_TIME;


    public FileDownloader() {
    }

    public File download(URL url, File file) throws InterruptedException {
        LOG.info("beginning download of " + url + " to " + file);
        for (int i=1; i <= maxAttempts; i++) {
            try {
                AtomicBoolean stop = new AtomicBoolean(false);
                DownloadInfo info = new DownloadInfo(url);
                DownloadMonitor monitor = new DownloadMonitor(info);
                info.extract(stop, monitor);
                file.getParentFile().mkdirs();
                WGet wget = new WGet(info, file);
                wget.download(stop, monitor);
                LOG.info("Download complete: " + file.getAbsolutePath());
                while (!monitor.isFinished()) {
                    Thread.sleep(sleepTime);
                }
                return file;
            } catch (DownloadIOCodeError e) {
                if (i < maxAttempts) {
                    LOG.info("Failed to download " + url +
                            ". Reconnecting in " + (i * backoffTime / 1000) +
                            " seconds (HTTP " + e.getCode() + "-Error " + url + ")");
                    Thread.sleep(backoffTime * i);
                } else {
                    LOG.warn("Failed to download " + file +
                            " (HTTP " + e.getCode() + "-Error " + url + ")");
                }
            }
        }
        return null;
    }

    class DownloadMonitor implements Runnable {
        private final DownloadInfo info;
        private long last;

        DownloadMonitor(DownloadInfo info) {
            this.info = info;
        }

        public boolean isFinished() {
            return info.getState() == URLInfo.States.STOP || info.getState() == URLInfo.States.ERROR || info.getState() == URLInfo.States.DONE;
        }

        @Override
        public void run() {
            switch (info.getState()) {
                case EXTRACTING:
                case EXTRACTING_DONE:
                case DONE:
                    LOG.info("" + info.getState());
                    break;
                case RETRYING:
                    LOG.info(info.getState() + " " + info.getDelay());
                    break;
                case DOWNLOADING:
                    long now = System.currentTimeMillis();
                    if (now > last + displayInfo) {
                        last = now;
                        LOG.info(String.format("%s %.1f of %.1f MB (%.1f%%)",
                                        info.getSource(),
                                        info.getCount() / (1024*1024.0),
                                        info.getLength() / (1024*1024.0),
                                        info.getCount() * 100.0 / info.getLength())
                        );
                    }
                    break;
                default:
                    break;
            }
        }
    }


    public void setSleepTime(int sleepTime) {
        this.sleepTime = sleepTime;
    }

    public void setMaxAttempts(int maxAttempts) {
        this.maxAttempts = maxAttempts;
    }

    public void setDisplayInfo(int displayInfo) {
        this.displayInfo = displayInfo;
    }

    public void setBackoffTime(int backoffTime) {
        this.backoffTime = backoffTime;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy