
org.riversun.ml.fakedatamaker.RegressionDataSetGeneratorARFF Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of fake-data-maker Show documentation
Show all versions of fake-data-maker Show documentation
Generate fake data for regression analysis
The newest version!
/* fake-data-maker : Generate fake data for machine learning
*
* Copyright (c) 2019 Tom Misawa, [email protected]
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*/
package org.riversun.ml.fakedatamaker;
import java.util.LinkedHashMap;
import java.util.Map;
import org.riversun.ml.fakedatamaker.AttributeNumeric.AttributeNumericValue;
/**
* Generate ARFF data for Regression
*
* @author Tom Misawa ([email protected])
*
*/
public class RegressionDataSetGeneratorARFF extends DataSetGenerator {
public String generateCsvRandomValues(double baseValue) {
return generateARFFRandomValues(baseValue, 0.0);
}
private String reformatARFFHeaders(String csvLabels) {
StringBuilder sb = new StringBuilder();
String[] lines = csvLabels.split("\n");
int maxLabelLength = 0;
for (String line : lines) {
String[] parts = line.split(" ");
if (parts.length == 3) {
int labelLength = parts[1].length();
if (maxLabelLength < labelLength) {
maxLabelLength = labelLength;
}
}
}
for (String line : lines) {
String[] parts = line.split(" ");
if (parts.length == 0 || parts.length == 1 || parts.length == 2) {
sb.append(line);
sb.append("\n");
}
if (parts.length == 3) {
sb.append(parts[0]);
sb.append(" ");
sb.append(parts[1]);
int numSpacer = maxLabelLength - parts[1].length() + 2;
for (int j = 0; j < numSpacer; j++) {
sb.append(" ");
}
sb.append(parts[2]);
sb.append("\n");
}
}
return sb.toString();
}
private String toARFFHeaders(String resultLabel) {
StringBuilder sb = new StringBuilder();
sb.append("@RELATION");
sb.append(" ");
sb.append(this.nameOfData);
sb.append("\n");
sb.append("\n");
for (Attribute attr : attrs) {
sb.append("@ATTRIBUTE");
sb.append(" ");
sb.append(attr.label);
sb.append(" ");
if (attr.isNominal) {
sb.append("{");
for (AttributeNominal nominal : attr.nominals) {
sb.append(nominal.name);
sb.append(",");
}
sb.delete(sb.length() - 1, sb.length());
sb.append("}");
} else {
sb.append("NUMERIC");
}
sb.append("\n");
}
sb.append("@ATTRIBUTE");
sb.append(" ");
sb.append(resultLabel);
sb.append(" ");
sb.append("NUMERIC");
sb.append("\n");
return sb.toString();
}
/**
*
* @param numOfLines
* Number of lines to generate as a dummy data set
* @param targetLabel
* Target to predict
* @param targetInitialValue
* The initial value that will be used for multiplication when
* calculating the target.
* @param valueVolatility
* A random value to be added to the computed value. computed by
* "volatility * Math.random () * targetInitialValue"
* @param withHeader
* true:generate csv with header row.
*
* @param withId
* true:generate autogenerated sequential id column
* @return
*/
public String generateCSV(int numOfLines, String targetLabel, double targetInitialValue, double valueVolatility,
boolean withHeader, boolean withId) {
StringBuilder sb = new StringBuilder();
if (withHeader) {
if (withId) {
// N/A
}
sb.append(reformatARFFHeaders(toARFFHeaders(targetLabel)));
sb.append("\n");
}
sb.append("@DATA");
sb.append("\n");
for (int i = 0; i < numOfLines; i++) {
if (withId) {
// N/A
}
sb.append(generateARFFRandomValues(targetInitialValue, valueVolatility));
sb.append("\n");
}
return sb.toString();
}
public String generateARFFRandomValues(double baseValue, double volatility) {
Map attributeValues = new LinkedHashMap<>();
final StringBuilder sb = new StringBuilder();
double computedValue = 0;
do {
computedValue = baseValue + (volatility * MyMath.random() * baseValue);
sb.setLength(0);
for (Attribute attr : attrs) {
if (attr.isNominal) {
AttributeNominal randomNominal = attr.generateRandomNominal();
sb.append(randomNominal.name);
sb.append(",");
computedValue *= randomNominal.coefficient;
attributeValues.put(attr.label, randomNominal);
} else {
AttributeNumericValue randomNumeric = attr.generateRandomNumeric();
sb.append((int) randomNumeric.numericValue);
sb.append(",");
computedValue *= randomNumeric.coefficient;
attributeValues.put(attr.label, randomNumeric);
}
}
} while (!compliantListener.isCompliant(new AttributeCheck(attributeValues)));
sb.append((int) computedValue);
return sb.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy