All Downloads are FREE. Search and download functionalities are using the official Maven repository.

aima.core.learning.neural.NNDataSet Maven / Gradle / Ivy

Go to download

AIMA-Java Core Algorithms from the book Artificial Intelligence a Modern Approach 3rd Ed.

The newest version!
package aima.core.learning.neural;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import aima.core.learning.data.DataResource;
import aima.core.learning.framework.DataSet;
import aima.core.learning.framework.Example;
import aima.core.util.Util;
import aima.core.util.datastructure.Pair;

/**
 * @author Ravi Mohan
 * 
 */
public abstract class NNDataSet {
	/*
	 * This class represents a source of examples to the rest of the nn
	 * framework. Assumes only one function approximator works on an instance at
	 * a given point in time
	 */
	/*
	 * the parsed and preprocessed form of the dataset.
	 */
	private List dataset;
	/*
	 * a copy from which examples are drawn.
	 */
	private List presentlyProcessed = new ArrayList();;

	/*
	 * list of mean Values for all components of raw data set
	 */
	private List means;

	/*
	 * list of stdev Values for all components of raw data set
	 */
	private List stdevs;
	/*
	 * the normalized data set
	 */
	protected List> nds;

	/*
	 * the column numbers of the "target"
	 */

	protected List targetColumnNumbers;

	/*
	 * population delegated to subclass because only subclass knows which
	 * column(s) is target
	 */
	public abstract void setTargetColumns();

	/*
	 * create a normalized data "table" from the data in the file. At this
	 * stage, the data isnot split into input pattern and tragets
	 */
	public void createNormalizedDataFromFile(String filename) throws Exception {

		List> rds = new ArrayList>();

		// create raw data set
		BufferedReader reader = new BufferedReader(new InputStreamReader(
				DataResource.class.getResourceAsStream(filename + ".csv")));
		String line;
		while ((line = reader.readLine()) != null) {
			rds.add(exampleFromString(line, ","));
		}

		// normalize raw dataset
		nds = normalize(rds);
	}

	/*
	 * create a normalized data "table" from the DataSet using numerizer. At
	 * this stage, the data isnot split into input pattern and targets TODO
	 * remove redundancy of recreating the target columns. the numerizer has
	 * already isolated the targets
	 */
	public void createNormalizedDataFromDataSet(DataSet ds, Numerizer numerizer)
			throws Exception {

		List> rds = rawExamplesFromDataSet(ds, numerizer);
		// normalize raw dataset
		nds = normalize(rds);
	}

	/*
	 * Gets (and removes) a random example from the 'presentlyProcessed'
	 */
	public NNExample getExampleAtRandom() {

		int i = Util.randomNumberBetween(0, (presentlyProcessed.size() - 1));
		return presentlyProcessed.remove(i);
	}

	/*
	 * Gets (and removes) a random example from the 'presentlyProcessed'
	 */
	public NNExample getExample(int index) {

		return presentlyProcessed.remove(index);
	}

	/*
	 * check if any more examples remain to be processed
	 */
	public boolean hasMoreExamples() {
		return presentlyProcessed.size() > 0;
	}

	/*
	 * check how many examples remain to be processed
	 */
	public int howManyExamplesLeft() {
		return presentlyProcessed.size();
	}

	/*
	 * refreshes the presentlyProcessed dataset so it can be used for a new
	 * epoch of training.
	 */
	public void refreshDataset() {
		presentlyProcessed = new ArrayList();
		for (NNExample e : dataset) {
			presentlyProcessed.add(e.copyExample());
		}
	}

	/*
	 * method called by clients to set up data set and make it ready for
	 * processing
	 */
	public void createExamplesFromFile(String filename) throws Exception {
		createNormalizedDataFromFile(filename);
		setTargetColumns();
		createExamples();

	}

	/*
	 * method called by clients to set up data set and make it ready for
	 * processing
	 */
	public void createExamplesFromDataSet(DataSet ds, Numerizer numerizer)
			throws Exception {
		createNormalizedDataFromDataSet(ds, numerizer);
		setTargetColumns();
		createExamples();

	}

	public List> getNormalizedData() {
		return nds;
	}

	public List getMeans() {
		return means;
	}

	public List getStdevs() {
		return stdevs;
	}

	//
	// PRIVATE METHODS
	//

	/*
	 * create Example instances from a normalized data "table".
	 */
	private void createExamples() {
		dataset = new ArrayList();
		for (List dataLine : nds) {
			List input = new ArrayList();
			List target = new ArrayList();
			for (int i = 0; i < dataLine.size(); i++) {
				if (targetColumnNumbers.contains(i)) {
					target.add(dataLine.get(i));
				} else {
					input.add(dataLine.get(i));
				}
			}
			dataset.add(new NNExample(input, target));
		}
		refreshDataset();// to populate the preentlyProcessed dataset
	}

	private List> normalize(List> rds) {
		int rawDataLength = rds.get(0).size();
		List> nds = new ArrayList>();

		means = new ArrayList();
		stdevs = new ArrayList();

		List> normalizedColumns = new ArrayList>();
		// clculate means for each coponent of example data
		for (int i = 0; i < rawDataLength; i++) {
			List columnValues = new ArrayList();
			for (List rawDatum : rds) {
				columnValues.add(rawDatum.get(i));
			}
			double mean = Util.calculateMean(columnValues);
			means.add(mean);

			double stdev = Util.calculateStDev(columnValues, mean);
			stdevs.add(stdev);

			normalizedColumns.add(Util.normalizeFromMeanAndStdev(columnValues,
					mean, stdev));

		}
		// re arrange data from columns
		// TODO Assert normalized columns have same size etc

		int columnLength = normalizedColumns.get(0).size();
		int numberOfColumns = normalizedColumns.size();
		for (int i = 0; i < columnLength; i++) {
			List lst = new ArrayList();
			for (int j = 0; j < numberOfColumns; j++) {
				lst.add(normalizedColumns.get(j).get(i));
			}
			nds.add(lst);
		}
		return nds;
	}

	private List exampleFromString(String line, String separator) {
		// assumes all values for inout and target are doubles
		List rexample = new ArrayList();
		List attributeValues = Arrays.asList(line.split(separator));
		for (String valString : attributeValues) {
			rexample.add(Double.parseDouble(valString));
		}
		return rexample;
	}

	private List> rawExamplesFromDataSet(DataSet ds,
			Numerizer numerizer) {
		// assumes all values for inout and target are doubles
		List> rds = new ArrayList>();
		for (int i = 0; i < ds.size(); i++) {
			List rexample = new ArrayList();
			Example e = ds.getExample(i);
			Pair, List> p = numerizer.numerize(e);
			List attributes = p.getFirst();
			for (Double d : attributes) {
				rexample.add(d);
			}
			List targets = p.getSecond();
			for (Double d : targets) {
				rexample.add(d);
			}
			rds.add(rexample);
		}
		return rds;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy