All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.deeplearning4j.base.EmnistFetcher Maven / Gradle / Ivy

There is a newer version: 1.0.0-M2.1
Show newest version
/*-
 *
 *  * Copyright 2015 Skymind,Inc.
 *  *
 *  *    Licensed under the Apache License, Version 2.0 (the "License");
 *  *    you may not use this file except in compliance with the License.
 *  *    You may obtain a copy of the License at
 *  *
 *  *        http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  *    Unless required by applicable law or agreed to in writing, software
 *  *    distributed under the License is distributed on an "AS IS" BASIS,
 *  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  *    See the License for the specific language governing permissions and
 *  *    limitations under the License.
 *
 */

package org.deeplearning4j.base;

import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.datasets.iterator.impl.EmnistDataSetIterator;

import java.io.File;

/**
 * Downloader for EMNIST dataset
 *
 * @author Alex Black
 */
@Slf4j
public class EmnistFetcher extends MnistFetcher {
    protected static final String LOCAL_DIR_NAME = "EMNIST";

    private static final String BASE_URL = "http://deeplearning4j-resources.westus2.cloudapp.azure.com/emnist/";

    private final EmnistDataSetIterator.Set ds;

    public EmnistFetcher(EmnistDataSetIterator.Set ds) {
        this.ds = ds;
        FILE_DIR = new File(BASE_DIR, LOCAL_DIR_NAME);
    }

    private static String getImagesFileName(EmnistDataSetIterator.Set ds, boolean train) {
        return "emnist-" + name(ds) + "-" + (train ? "train" : "test") + "-images-idx3-ubyte.gz";
    }

    private static String getImagesFileNameUnzipped(EmnistDataSetIterator.Set ds, boolean train) {
        return "emnist-" + name(ds) + "-" + (train ? "train" : "test") + "-images-idx3-ubyte";
    }

    private static String getLabelsFileName(EmnistDataSetIterator.Set ds, boolean train) {
        return "emnist-" + name(ds) + "-" + (train ? "train" : "test") + "-labels-idx1-ubyte.gz";
    }

    private static String getLabelsFileNameUnzipped(EmnistDataSetIterator.Set ds, boolean train) {
        return "emnist-" + name(ds) + "-" + (train ? "train" : "test") + "-labels-idx1-ubyte";
    }

    private static String getMappingFileName(EmnistDataSetIterator.Set ds, boolean train) {
        return "emnist-" + name(ds) + "-mapping.txt";
    }

    private static String name(EmnistDataSetIterator.Set ds) {
        switch (ds) {
            case COMPLETE:
                return "byclass";
            case MERGE:
                return "bymerge";
            case BALANCED:
                return "balanced";
            case LETTERS:
                return "letters";
            case DIGITS:
                return "digits";
            case MNIST:
                return "mnist";
            default:
                throw new UnsupportedOperationException("Unknown DataSet: " + ds);
        }
    }

    @Override
    public String getName() {
        return "EMNIST";
    }

    // --- Train files ---
    @Override
    public String getTrainingFilesURL() {
        return BASE_URL + getImagesFileName(ds, true);
    }

    @Override
    public String getTrainingFilesMD5() {
        switch (ds) {
            case COMPLETE:
                //byclass-train-images
                return "712dda0bd6f00690f32236ae4325c377";
            case MERGE:
                //bymerge-train-images
                return "4a792d4df261d7e1ba27979573bf53f3";
            case BALANCED:
                //balanced-train-images
                return "4041b0d6f15785d3fa35263901b5496b";
            case LETTERS:
                //letters-train-images
                return "8795078f199c478165fe18db82625747";
            case DIGITS:
                //digits-train-images
                return "d2662ecdc47895a6bbfce25de9e9a677";
            case MNIST:
                //mnist-train-images
                return "3663598a39195d030895b6304abb5065";
            default:
                throw new UnsupportedOperationException("Unknown DataSet: " + ds);
        }
    }

    @Override
    public String getTrainingFilesFilename() {
        return getImagesFileName(ds, true);
    }

    @Override
    public String getTrainingFilesFilename_unzipped() {
        return getImagesFileNameUnzipped(ds, true);
    }

    @Override
    public String getTrainingFileLabelsURL() {
        return BASE_URL + getLabelsFileName(ds, true);
    }

    @Override
    public String getTrainingFileLabelsMD5() {
        switch (ds) {
            case COMPLETE:
                //byclass-train-labels
                return "ee299a3ee5faf5c31e9406763eae7e43";
            case MERGE:
                //bymerge-train-labels
                return "491be69ef99e1ab1f5b7f9ccc908bb26";
            case BALANCED:
                //balanced-train-labels
                return "7a35cc7b2b7ee7671eddf028570fbd20";
            case LETTERS:
                //letters-train-labels
                return "c16de4f1848ddcdddd39ab65d2a7be52";
            case DIGITS:
                //digits-train-labels
                return "2223fcfee618ac9c89ef20b6e48bcf9e";
            case MNIST:
                //mnist-train-labels
                return "6c092f03c9bb63e678f80f8bc605fe37";
            default:
                throw new UnsupportedOperationException("Unknown DataSet: " + ds);
        }
    }

    @Override
    public String getTrainingFileLabelsFilename() {
        return getLabelsFileName(ds, true);
    }

    @Override
    public String getTrainingFileLabelsFilename_unzipped() {
        return getLabelsFileNameUnzipped(ds, true);
    }


    // --- Test files ---

    @Override
    public String getTestFilesURL() {
        return BASE_URL + getImagesFileName(ds, false);
    }

    @Override
    public String getTestFilesMD5() {
        switch (ds) {
            case COMPLETE:
                //byclass-test-images
                return "1435209e34070a9002867a9ab50160d7";
            case MERGE:
                //bymerge-test-images
                return "8eb5d34c91f1759a55831c37ec2a283f";
            case BALANCED:
                //balanced-test-images
                return "6818d20fe2ce1880476f747bbc80b22b";
            case LETTERS:
                //letters-test-images
                return "382093a19703f68edac6d01b8dfdfcad";
            case DIGITS:
                //digits-test-images
                return "a159b8b3bd6ab4ed4793c1cb71a2f5cc";
            case MNIST:
                //mnist-test-images
                return "fb51b6430fc4dd67deaada1bf25d4524";
            default:
                throw new UnsupportedOperationException("Unknown DataSet: " + ds);
        }
    }

    @Override
    public String getTestFilesFilename() {
        return getImagesFileName(ds, false);
    }

    @Override
    public String getTestFilesFilename_unzipped() {
        return getImagesFileNameUnzipped(ds, false);
    }

    @Override
    public String getTestFileLabelsURL() {
        return BASE_URL + getLabelsFileName(ds, false);
    }

    @Override
    public String getTestFileLabelsMD5() {
        switch (ds) {
            case COMPLETE:
                //byclass-test-labels
                return "7a0f934bd176c798ecba96b36fda6657";
            case MERGE:
                //bymerge-test-labels
                return "c13f4cd5211cdba1b8fa992dae2be992";
            case BALANCED:
                //balanced-test-labels
                return "acd3694070dcbf620e36670519d4b32f";
            case LETTERS:
                //letters-test-labels
                return "d4108920cd86601ec7689a97f2de7f59";
            case DIGITS:
                //digits-test-labels
                return "8afde66ea51d865689083ba6bb779fac";
            case MNIST:
                //mnist-test-labels
                return "ae7f6be798a9a5d5f2bd32e078a402dd";
            default:
                throw new UnsupportedOperationException("Unknown DataSet: " + ds);
        }
    }

    @Override
    public String getTestFileLabelsFilename() {
        return getLabelsFileName(ds, false);
    }

    @Override
    public String getTestFileLabelsFilename_unzipped() {
        return getLabelsFileNameUnzipped(ds, false);
    }

    public static int numLabels(EmnistDataSetIterator.Set dataSet) {
        switch (dataSet) {
            case COMPLETE:
                return 62;
            case MERGE:
                return 47;
            case BALANCED:
                return 47;
            case LETTERS:
                return 26;
            case DIGITS:
                return 10;
            case MNIST:
                return 10;
            default:
                throw new UnsupportedOperationException("Unknown Set: " + dataSet);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy