
aima.core.learning.neural.NNDataSet Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aima-core Show documentation
Show all versions of aima-core Show documentation
AIMA-Java Core Algorithms from the book Artificial Intelligence a Modern Approach 3rd Ed.
package aima.core.learning.neural;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import aima.core.learning.data.DataResource;
import aima.core.learning.framework.DataSet;
import aima.core.learning.framework.Example;
import aima.core.util.Util;
import aima.core.util.datastructure.Pair;
/**
* @author Ravi Mohan
*
*/
public abstract class NNDataSet {
/*
* This class represents a source of examples to the rest of the nn
* framework. Assumes only one function approximator works on an instance at
* a given point in time
*/
/*
* the parsed and preprocessed form of the dataset.
*/
private List dataset;
/*
* a copy from which examples are drawn.
*/
private List presentlyProcessed = new ArrayList();;
/*
* list of mean Values for all components of raw data set
*/
private List means;
/*
* list of stdev Values for all components of raw data set
*/
private List stdevs;
/*
* the normalized data set
*/
protected List> nds;
/*
* the column numbers of the "target"
*/
protected List targetColumnNumbers;
/*
* population delegated to subclass because only subclass knows which
* column(s) is target
*/
public abstract void setTargetColumns();
/*
* create a normalized data "table" from the data in the file. At this
* stage, the data isnot split into input pattern and tragets
*/
public void createNormalizedDataFromFile(String filename) throws Exception {
List> rds = new ArrayList>();
// create raw data set
BufferedReader reader = new BufferedReader(new InputStreamReader(
DataResource.class.getResourceAsStream(filename + ".csv")));
String line;
while ((line = reader.readLine()) != null) {
rds.add(exampleFromString(line, ","));
}
// normalize raw dataset
nds = normalize(rds);
}
/*
* create a normalized data "table" from the DataSet using numerizer. At
* this stage, the data isnot split into input pattern and targets TODO
* remove redundancy of recreating the target columns. the numerizer has
* already isolated the targets
*/
public void createNormalizedDataFromDataSet(DataSet ds, Numerizer numerizer)
throws Exception {
List> rds = rawExamplesFromDataSet(ds, numerizer);
// normalize raw dataset
nds = normalize(rds);
}
/*
* Gets (and removes) a random example from the 'presentlyProcessed'
*/
public NNExample getExampleAtRandom() {
int i = Util.randomNumberBetween(0, (presentlyProcessed.size() - 1));
return presentlyProcessed.remove(i);
}
/*
* Gets (and removes) a random example from the 'presentlyProcessed'
*/
public NNExample getExample(int index) {
return presentlyProcessed.remove(index);
}
/*
* check if any more examples remain to be processed
*/
public boolean hasMoreExamples() {
return presentlyProcessed.size() > 0;
}
/*
* check how many examples remain to be processed
*/
public int howManyExamplesLeft() {
return presentlyProcessed.size();
}
/*
* refreshes the presentlyProcessed dataset so it can be used for a new
* epoch of training.
*/
public void refreshDataset() {
presentlyProcessed = new ArrayList();
for (NNExample e : dataset) {
presentlyProcessed.add(e.copyExample());
}
}
/*
* method called by clients to set up data set and make it ready for
* processing
*/
public void createExamplesFromFile(String filename) throws Exception {
createNormalizedDataFromFile(filename);
setTargetColumns();
createExamples();
}
/*
* method called by clients to set up data set and make it ready for
* processing
*/
public void createExamplesFromDataSet(DataSet ds, Numerizer numerizer)
throws Exception {
createNormalizedDataFromDataSet(ds, numerizer);
setTargetColumns();
createExamples();
}
public List> getNormalizedData() {
return nds;
}
public List getMeans() {
return means;
}
public List getStdevs() {
return stdevs;
}
//
// PRIVATE METHODS
//
/*
* create Example instances from a normalized data "table".
*/
private void createExamples() {
dataset = new ArrayList();
for (List dataLine : nds) {
List input = new ArrayList();
List target = new ArrayList();
for (int i = 0; i < dataLine.size(); i++) {
if (targetColumnNumbers.contains(i)) {
target.add(dataLine.get(i));
} else {
input.add(dataLine.get(i));
}
}
dataset.add(new NNExample(input, target));
}
refreshDataset();// to populate the preentlyProcessed dataset
}
private List> normalize(List> rds) {
int rawDataLength = rds.get(0).size();
List> nds = new ArrayList>();
means = new ArrayList();
stdevs = new ArrayList();
List> normalizedColumns = new ArrayList>();
// clculate means for each coponent of example data
for (int i = 0; i < rawDataLength; i++) {
List columnValues = new ArrayList();
for (List rawDatum : rds) {
columnValues.add(rawDatum.get(i));
}
double mean = Util.calculateMean(columnValues);
means.add(mean);
double stdev = Util.calculateStDev(columnValues, mean);
stdevs.add(stdev);
normalizedColumns.add(Util.normalizeFromMeanAndStdev(columnValues,
mean, stdev));
}
// re arrange data from columns
// TODO Assert normalized columns have same size etc
int columnLength = normalizedColumns.get(0).size();
int numberOfColumns = normalizedColumns.size();
for (int i = 0; i < columnLength; i++) {
List lst = new ArrayList();
for (int j = 0; j < numberOfColumns; j++) {
lst.add(normalizedColumns.get(j).get(i));
}
nds.add(lst);
}
return nds;
}
private List exampleFromString(String line, String separator) {
// assumes all values for inout and target are doubles
List rexample = new ArrayList();
List attributeValues = Arrays.asList(line.split(separator));
for (String valString : attributeValues) {
rexample.add(Double.parseDouble(valString));
}
return rexample;
}
private List> rawExamplesFromDataSet(DataSet ds,
Numerizer numerizer) {
// assumes all values for inout and target are doubles
List> rds = new ArrayList>();
for (int i = 0; i < ds.size(); i++) {
List rexample = new ArrayList();
Example e = ds.getExample(i);
Pair, List> p = numerizer.numerize(e);
List attributes = p.getFirst();
for (Double d : attributes) {
rexample.add(d);
}
List targets = p.getSecond();
for (Double d : targets) {
rexample.add(d);
}
rds.add(rexample);
}
return rds;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy