All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openimaj.image.annotation.evaluation.datasets.CIFAR10Dataset Maven / Gradle / Ivy

/**
 * Copyright (c) 2011, The University of Southampton and the individual contributors.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 *   * 	Redistributions of source code must retain the above copyright notice,
 * 	this list of conditions and the following disclaimer.
 *
 *   *	Redistributions in binary form must reproduce the above copyright notice,
 * 	this list of conditions and the following disclaimer in the documentation
 * 	and/or other materials provided with the distribution.
 *
 *   *	Neither the name of the University of Southampton nor the names of its
 * 	contributors may be used to endorse or promote products derived from this
 * 	software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.openimaj.image.annotation.evaluation.datasets;

import java.io.DataInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.List;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileSystemException;
import org.apache.commons.vfs2.FileSystemManager;
import org.apache.commons.vfs2.VFS;
import org.openimaj.citation.annotation.Reference;
import org.openimaj.citation.annotation.ReferenceType;
import org.openimaj.data.DataUtils;
import org.openimaj.data.dataset.GroupedDataset;
import org.openimaj.data.dataset.ListBackedDataset;
import org.openimaj.data.dataset.ListDataset;
import org.openimaj.data.dataset.MapBackedDataset;
import org.openimaj.experiment.annotations.DatasetDescription;
import org.openimaj.image.MBFImage;
import org.openimaj.image.annotation.evaluation.datasets.cifar.BinaryReader;

/**
 * CIFAR-10 Dataset. Contains 60000 tiny images in 10 classes (6000 per class).
 * Each image is 32x32 pixels.
 *
 * @author Jonathon Hare ([email protected])
 *
 */
@Reference(
		type = ReferenceType.Article,
		author = { "Krizhevsky, A.", "Hinton, G." },
		title = "Learning multiple layers of features from tiny images",
		year = "2009",
		journal = "Master's thesis, Department of Computer Science, University of Toronto",
		publisher = "Citeseer")
@DatasetDescription(
		name = "CIFAR-10",
		description = "The CIFAR-10 dataset consists of 60000 32x32 colour "
				+ "images in 10 classes, with 6000 images per class. There are "
				+ "50000 training images and 10000 test images. The dataset is "
				+ "divided into five training batches and one test batch, each "
				+ "with 10000 images. The test batch contains exactly 1000 "
				+ "randomly-selected images from each class. The training batches "
				+ "contain the remaining images in random order, but some training "
				+ "batches may contain more images from one class than another. "
				+ "Between them, the training batches contain exactly 5000 images "
				+ "from each class.",
		creator = "Alex Krizhevsky, Vinod Nair, and Geoffrey Hinton",
		url = "http://www.cs.toronto.edu/~kriz/cifar.html",
		downloadUrls = {
				"http://datasets.openimaj.org/cifar/cifar-10-binary.tar.gz",
		})
public class CIFAR10Dataset extends CIFARDataset {
	private static final String DATA_TGZ = "cifar/cifar-10-binary.tar.gz";
	private static final String DOWNLOAD_URL = "http://datasets.openimaj.org/cifar/cifar-10-binary.tar.gz";

	private static final String[] TRAINING_FILES = {
			"data_batch_1.bin",
			"data_batch_2.bin",
			"data_batch_3.bin",
			"data_batch_4.bin",
			"data_batch_5.bin" };
	private static final String TEST_FILE = "test_batch.bin";
	private static final String CLASSES_FILE = "batches.meta.txt";

	private CIFAR10Dataset() {
	}

	private static String downloadAndGetPath() throws IOException {
		final File dataset = DataUtils.getDataLocation(DATA_TGZ);

		if (!(dataset.exists())) {
			dataset.getParentFile().mkdirs();
			FileUtils.copyURLToFile(new URL(DOWNLOAD_URL), dataset);
		}

		return "tgz:file:" + dataset.toString() + "!cifar-10-batches-bin/";
	}

	/**
	 * Load the training images using the given reader. To load the images as
	 * {@link MBFImage}s, you would do the following: 
	 * CIFAR10Dataset.getTrainingImages(CIFAR10Dataset.MBFIMAGE_READER);
	 * 
	 *
	 * @param reader
	 *            the reader
	 * @return the training image dataset
	 * @throws IOException
	 */
	public static  GroupedDataset, IMAGE> getTrainingImages(BinaryReader reader)
			throws IOException
	{
		final MapBackedDataset, IMAGE> dataset = new MapBackedDataset, IMAGE>();

		final FileSystemManager fsManager = VFS.getManager();
		final FileObject base = fsManager.resolveFile(downloadAndGetPath());

		final List classList = loadClasses(dataset, base);

		for (final String t : TRAINING_FILES) {
			DataInputStream is = null;
			try {
				is = new DataInputStream(base.resolveFile(t).getContent().getInputStream());

				loadData(is, dataset, classList, reader);
			} finally {
				IOUtils.closeQuietly(is);
			}
		}

		return dataset;
	}

	private static  List loadClasses(final MapBackedDataset, IMAGE> dataset,
			final FileObject base) throws FileSystemException, IOException
			{
		InputStream classStream = null;
		List classList = null;
		try {
			classStream = base.resolveFile(CLASSES_FILE).getContent().getInputStream();
			classList = IOUtils.readLines(classStream);
		} finally {
			IOUtils.closeQuietly(classStream);
		}

		for (final String clz : classList)
			dataset.put(clz, new ListBackedDataset());
		return classList;
			}

	private static  void loadData(DataInputStream is,
			MapBackedDataset, IMAGE> dataset, List classList,
			BinaryReader reader) throws IOException
	{

		for (int i = 0; i < 10000; i++) {
			final int clz = is.read();
			final String clzStr = classList.get(clz);
			final byte[] record = new byte[WIDTH * HEIGHT * 3];
			is.readFully(record);

			dataset.get(clzStr).add(reader.read(record));
		}
	}

	/**
	 * Load the test images using the given reader. To load the images as
	 * {@link MBFImage}s, you would do the following: 
	 * CIFAR10Dataset.getTestImages(CIFAR10Dataset.MBFIMAGE_READER);
	 * 
	 *
	 * @param reader
	 *            the reader
	 * @return the test image dataset
	 * @throws IOException
	 */
	public static  GroupedDataset, IMAGE> getTestImages(BinaryReader reader)
			throws IOException
	{
		final MapBackedDataset, IMAGE> dataset = new MapBackedDataset, IMAGE>();

		final FileSystemManager fsManager = VFS.getManager();
		final FileObject base = fsManager.resolveFile(downloadAndGetPath());

		final List classList = loadClasses(dataset, base);

		DataInputStream is = null;
		try {
			is = new DataInputStream(base.resolveFile(TEST_FILE).getContent().getInputStream());
			loadData(is, dataset, classList, reader);
		} finally {
			IOUtils.closeQuietly(is);
		}

		return dataset;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy