All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.deeplearning4j.datasets.fetchers.CacheableExtractableDataSetFetcher Maven / Gradle / Ivy

There is a newer version: 1.0.0-M2.1
Show newest version
package org.deeplearning4j.datasets.fetchers;

import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
import org.datavec.api.util.ArchiveUtils;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.zip.Adler32;
import java.util.zip.Checksum;

/**
 * Abstract class for enabling dataset downloading and local caching.
 *
 * @author Justin Long (crockpotveggies)
 */
@Slf4j
public abstract class CacheableExtractableDataSetFetcher implements CacheableDataSet {

    public final static String DL4J_DIR = "/.deeplearning4j/";
    public final static File ROOT_CACHE_DIR = new File(System.getProperty("user.home"), DL4J_DIR);
    public File LOCAL_CACHE = new File(ROOT_CACHE_DIR, localCacheName());

    @Override public String dataSetName(DataSetType set) { return ""; }
    @Override public String remoteDataUrl() { return remoteDataUrl(DataSetType.TRAIN); }
    @Override public long expectedChecksum() { return expectedChecksum(DataSetType.TRAIN); }
    public void downloadAndExtract() throws IOException { downloadAndExtract(DataSetType.TRAIN); }

    /**
     * Downloads and extracts the local dataset.
     *
     * @throws IOException
     */
    public void downloadAndExtract(DataSetType set) throws IOException {
        String localFilename = new File(remoteDataUrl(set)).getName();
        File tmpFile = new File(System.getProperty("java.io.tmpdir"), localFilename);
        File cachedFile = new File(ROOT_CACHE_DIR.getAbsolutePath(), localFilename);

        // check empty cache
        if(LOCAL_CACHE.exists()) {
            if(LOCAL_CACHE.listFiles().length<1) LOCAL_CACHE.delete();
        }

        if(!new File(LOCAL_CACHE, dataSetName(set)).exists()) {
            LOCAL_CACHE.mkdirs();
            tmpFile.delete();
            log.info("Downloading dataset to " + tmpFile.getAbsolutePath());
            FileUtils.copyURLToFile(new URL(remoteDataUrl(set)), tmpFile);
        } else {
            log.info("Using cached dataset at " + cachedFile.toString());
        }

        if(expectedChecksum(set) != 0L) {
            log.info("Verifying download...");
            Checksum adler = new Adler32();
            FileUtils.checksum(tmpFile, adler);
            long localChecksum = adler.getValue();
            log.info("Checksum local is " + localChecksum + ", expecting "+expectedChecksum(set));

            if(expectedChecksum(set) != localChecksum) {
                log.error("Checksums do not match. Cleaning up files and failing...");
                cachedFile.delete();
                throw new IllegalStateException(
                        "Dataset file failed checksum. If this error persists, please open an issue at https://github.com/deeplearning4j/deeplearning4j.");
            }
        }

        ArchiveUtils.unzipFileTo(tmpFile.getAbsolutePath(), LOCAL_CACHE.getAbsolutePath());
    }

    /**
     * Returns a boolean indicating if the dataset is already cached locally.
     *
     * @return boolean
     */
    @Override
    public boolean isCached() {
        return LOCAL_CACHE.exists();
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy