All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.riversun.ml.fakedatamaker.RegressionDataSetGeneratorCSV Maven / Gradle / Ivy

The newest version!

/* fake-data-maker : Generate fake data for machine learning
*
*  Copyright (c) 2019 Tom Misawa, [email protected]
*  
*  Permission is hereby granted, free of charge, to any person obtaining a
*  copy of this software and associated documentation files (the "Software"),
*  to deal in the Software without restriction, including without limitation
*  the rights to use, copy, modify, merge, publish, distribute, sublicense,
*  and/or sell copies of the Software, and to permit persons to whom the
*  Software is furnished to do so, subject to the following conditions:
*  
*  The above copyright notice and this permission notice shall be included in
*  all copies or substantial portions of the Software.
*  
*  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
*  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
*  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
*  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
*  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
*  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
*  DEALINGS IN THE SOFTWARE.
*  
*/

package org.riversun.ml.fakedatamaker;

import java.util.LinkedHashMap;
import java.util.Map;

import org.riversun.ml.fakedatamaker.AttributeNumeric.AttributeNumericValue;

/**
 * Generate CSV data for Regression
 * 
 * @author Tom Misawa ([email protected])
 *
 */
public class RegressionDataSetGeneratorCSV extends DataSetGenerator {

    public String generateCsvRandomValues(double baseValue) {
        return generateCsvRandomValues(baseValue, 0.0);
    }

    private String toCsvLabels(String resultLabel) {
        StringBuilder sb = new StringBuilder();

        for (Attribute attr : attrs) {
            sb.append(attr.label);
            sb.append(",");
        }
        sb.append(resultLabel);

        return sb.toString();
    }

    /**
     * 
     * @param numOfLines
     *                           Number of lines to generate as a dummy data set
     * @param targetLabel
     *                           Target to predict
     * @param targetInitialValue
     *                           The initial value that will be used for multiplication when
     *                           calculating the target.
     * @param valueVolatility
     *                           A random value to be added to the computed value. computed by
     *                           "volatility * Math.random () * targetInitialValue"
     * @param withHeader
     *                           true:generate csv with header row.
     * 
     * @param withId
     *                           true:generate autogenerated sequential id column
     * @return
     */
    public String generateCSV(int numOfLines, String targetLabel, double targetInitialValue, double valueVolatility,
            boolean withHeader, boolean withId) {

        StringBuilder sb = new StringBuilder();
        if (withHeader) {
            if (withId) {
                sb.append("id,");
            }
            sb.append(toCsvLabels(targetLabel));
            sb.append("\n");
        }

        for (int i = 0; i < numOfLines; i++) {
            if (withId) {
                sb.append(i).append(",");
            }
            sb.append(generateCsvRandomValues(targetInitialValue, valueVolatility));
            sb.append("\n");
        }
        return sb.toString();

    }

    public String generateCsvRandomValues(double baseValue, double volatility) {

        Map attributeValues = new LinkedHashMap<>();
        final StringBuilder sb = new StringBuilder();

        double computedValue = 0;

        do {
            computedValue = baseValue + (volatility * MyMath.random() * baseValue);
            sb.setLength(0);
            for (Attribute attr : attrs) {
                if (attr.isNominal) {
                    AttributeNominal randomNominal = attr.generateRandomNominal();
                    sb.append(randomNominal.name);
                    sb.append(",");
                    computedValue *= randomNominal.coefficient;

                    attributeValues.put(attr.label, randomNominal);
                } else {
                    AttributeNumericValue randomNumeric = attr.generateRandomNumeric();
                    sb.append((int) randomNumeric.numericValue);
                    sb.append(",");
                    computedValue *= randomNumeric.coefficient;
                    attributeValues.put(attr.label, randomNumeric);
                }

            }
        } while (!compliantListener.isCompliant(new AttributeCheck(attributeValues)));

        sb.append((int) computedValue);
        return sb.toString();
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy