
org.riversun.ml.fakedatamaker.RegressionDataSetGeneratorCSV Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of fake-data-maker Show documentation
Show all versions of fake-data-maker Show documentation
Generate fake data for regression analysis
The newest version!
/* fake-data-maker : Generate fake data for machine learning
*
* Copyright (c) 2019 Tom Misawa, [email protected]
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*/
package org.riversun.ml.fakedatamaker;
import java.util.LinkedHashMap;
import java.util.Map;
import org.riversun.ml.fakedatamaker.AttributeNumeric.AttributeNumericValue;
/**
* Generate CSV data for Regression
*
* @author Tom Misawa ([email protected])
*
*/
public class RegressionDataSetGeneratorCSV extends DataSetGenerator {
public String generateCsvRandomValues(double baseValue) {
return generateCsvRandomValues(baseValue, 0.0);
}
private String toCsvLabels(String resultLabel) {
StringBuilder sb = new StringBuilder();
for (Attribute attr : attrs) {
sb.append(attr.label);
sb.append(",");
}
sb.append(resultLabel);
return sb.toString();
}
/**
*
* @param numOfLines
* Number of lines to generate as a dummy data set
* @param targetLabel
* Target to predict
* @param targetInitialValue
* The initial value that will be used for multiplication when
* calculating the target.
* @param valueVolatility
* A random value to be added to the computed value. computed by
* "volatility * Math.random () * targetInitialValue"
* @param withHeader
* true:generate csv with header row.
*
* @param withId
* true:generate autogenerated sequential id column
* @return
*/
public String generateCSV(int numOfLines, String targetLabel, double targetInitialValue, double valueVolatility,
boolean withHeader, boolean withId) {
StringBuilder sb = new StringBuilder();
if (withHeader) {
if (withId) {
sb.append("id,");
}
sb.append(toCsvLabels(targetLabel));
sb.append("\n");
}
for (int i = 0; i < numOfLines; i++) {
if (withId) {
sb.append(i).append(",");
}
sb.append(generateCsvRandomValues(targetInitialValue, valueVolatility));
sb.append("\n");
}
return sb.toString();
}
public String generateCsvRandomValues(double baseValue, double volatility) {
Map attributeValues = new LinkedHashMap<>();
final StringBuilder sb = new StringBuilder();
double computedValue = 0;
do {
computedValue = baseValue + (volatility * MyMath.random() * baseValue);
sb.setLength(0);
for (Attribute attr : attrs) {
if (attr.isNominal) {
AttributeNominal randomNominal = attr.generateRandomNominal();
sb.append(randomNominal.name);
sb.append(",");
computedValue *= randomNominal.coefficient;
attributeValues.put(attr.label, randomNominal);
} else {
AttributeNumericValue randomNumeric = attr.generateRandomNumeric();
sb.append((int) randomNumeric.numericValue);
sb.append(",");
computedValue *= randomNumeric.coefficient;
attributeValues.put(attr.label, randomNumeric);
}
}
} while (!compliantListener.isCompliant(new AttributeCheck(attributeValues)));
sb.append((int) computedValue);
return sb.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy