deepnetts.data.DataSets Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of deepnetts-core Show documentation
Show all versions of deepnetts-core Show documentation
Deep Learning Engine in pure Java. Base for reference implementation of JSR381
The newest version!
/**
* DeepNetts is pure Java Deep Learning Library with support for Backpropagation
* based learning and image recognition.
*
* Copyright (C) 2017 Zoran Sevarac
*
* This file is part of DeepNetts.
*
* DeepNetts is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
* Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .package
* deepnetts.core;
*/
package deepnetts.data;
import deepnetts.data.preprocessing.scale.MaxScaler;
import deepnetts.data.preprocessing.scale.Standardizer;
import deepnetts.util.ColumnType;
import deepnetts.util.CsvFormat;
import java.io.File;
import java.io.IOException;
import deepnetts.util.DeepNettsException;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.regex.Pattern;
import javax.visrec.ml.data.DataSet;
import javax.visrec.ml.data.preprocessing.Scaler;
/**
* Data set related utility methods.
*
* @author zoran
*/
public class DataSets {
public final static String DELIMITER_SPACE = " ";
public final static String DELIMITER_COMMA = ",";
public final static String DELIMITER_SEMICOLON = ";";
public final static String DELIMITER_TAB = "\t";
/**
* Creates and returns data set from specified CSV file. Empty lines are
* skipped
*
* @param csvFile CSV file
* @param numInputs number of input values in a row
* @param numOutputs number of output values in a row
* @param hasColumnNames true if first row contains column names
* @param delimiter delimiter used to separate values
* @return instance of data set with values loaded from file
*
* @throws FileNotFoundException if file was not found
* @throws IOException if there was an error reading file
*
* TODO: Detect if there are labels in the first line, if there are no
* labels, set class1, class2, class3 in classifier evaluation! and detect
* type of attributes Move this method to some factory class or something?
* or as a default method in data set?
*
* TODO: should I wrap IO with DeepNetts Exception?
* Autodetetect delimiter; header and column type
*
*/
public static TabularDataSet readCsv(File csvFile, int numInputs, int numOutputs, boolean hasColumnNames, String delimiter) throws FileNotFoundException, IOException {
TabularDataSet dataSet = new TabularDataSet(numInputs, numOutputs);
BufferedReader br = new BufferedReader(new FileReader(csvFile));
String line=null;
// auto detect column names - ako sadrzi slova onda ima imena. Sta ako su atributi nominalni? U ovoj fazi se pretpostavlja d anisu...
// i ako u redovima ispod takodje ima stringova u istoj koloni - detect header
if (hasColumnNames) { // get col names from the first line
line = br.readLine().trim();
String[] colNames = line.split(delimiter);
// todo checsk number of col names
dataSet.setColumnNames(colNames);
} else {
String[] colNames = new String[numInputs+numOutputs];
for(int i=0; i[] trainTestSplit(DataSet> dataSet, double split) {
dataSet.shuffle();
return dataSet.split(split, 1-split);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy