All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.riversun.ml.fakedatamaker.RegressionDataSetGeneratorARFF Maven / Gradle / Ivy

The newest version!
/* fake-data-maker : Generate fake data for machine learning
*
*  Copyright (c) 2019 Tom Misawa, [email protected]
*  
*  Permission is hereby granted, free of charge, to any person obtaining a
*  copy of this software and associated documentation files (the "Software"),
*  to deal in the Software without restriction, including without limitation
*  the rights to use, copy, modify, merge, publish, distribute, sublicense,
*  and/or sell copies of the Software, and to permit persons to whom the
*  Software is furnished to do so, subject to the following conditions:
*  
*  The above copyright notice and this permission notice shall be included in
*  all copies or substantial portions of the Software.
*  
*  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
*  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
*  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
*  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
*  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
*  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
*  DEALINGS IN THE SOFTWARE.
*  
*/

package org.riversun.ml.fakedatamaker;

import java.util.LinkedHashMap;
import java.util.Map;

import org.riversun.ml.fakedatamaker.AttributeNumeric.AttributeNumericValue;

/**
 * Generate ARFF data for Regression
 * 
 * @author Tom Misawa ([email protected])
 *
 */
public class RegressionDataSetGeneratorARFF extends DataSetGenerator {

    public String generateCsvRandomValues(double baseValue) {
        return generateARFFRandomValues(baseValue, 0.0);
    }

    private String reformatARFFHeaders(String csvLabels) {
        StringBuilder sb = new StringBuilder();
        String[] lines = csvLabels.split("\n");
        int maxLabelLength = 0;
        for (String line : lines) {
            String[] parts = line.split(" ");
            if (parts.length == 3) {
                int labelLength = parts[1].length();
                if (maxLabelLength < labelLength) {
                    maxLabelLength = labelLength;
                }
            }
        }

        for (String line : lines) {
            String[] parts = line.split(" ");
            if (parts.length == 0 || parts.length == 1 || parts.length == 2) {
                sb.append(line);
                sb.append("\n");
            }

            if (parts.length == 3) {
                sb.append(parts[0]);
                sb.append(" ");
                sb.append(parts[1]);
                int numSpacer = maxLabelLength - parts[1].length() + 2;
                for (int j = 0; j < numSpacer; j++) {
                    sb.append(" ");
                }
                sb.append(parts[2]);

                sb.append("\n");
            }

        }
        return sb.toString();
    }

    private String toARFFHeaders(String resultLabel) {
        StringBuilder sb = new StringBuilder();

        sb.append("@RELATION");
        sb.append(" ");
        sb.append(this.nameOfData);
        sb.append("\n");
        sb.append("\n");
        for (Attribute attr : attrs) {
            sb.append("@ATTRIBUTE");
            sb.append(" ");
            sb.append(attr.label);
            sb.append(" ");
            if (attr.isNominal) {
                sb.append("{");

                for (AttributeNominal nominal : attr.nominals) {
                    sb.append(nominal.name);
                    sb.append(",");
                }
                sb.delete(sb.length() - 1, sb.length());
                sb.append("}");
            } else {
                sb.append("NUMERIC");
            }
            sb.append("\n");
        }
        sb.append("@ATTRIBUTE");
        sb.append(" ");
        sb.append(resultLabel);
        sb.append(" ");
        sb.append("NUMERIC");
        sb.append("\n");
        return sb.toString();
    }

    /**
     * 
     * @param numOfLines
     *                           Number of lines to generate as a dummy data set
     * @param targetLabel
     *                           Target to predict
     * @param targetInitialValue
     *                           The initial value that will be used for multiplication when
     *                           calculating the target.
     * @param valueVolatility
     *                           A random value to be added to the computed value. computed by
     *                           "volatility * Math.random () * targetInitialValue"
     * @param withHeader
     *                           true:generate csv with header row.
     * 
     * @param withId
     *                           true:generate autogenerated sequential id column
     * @return
     */
    public String generateCSV(int numOfLines, String targetLabel, double targetInitialValue, double valueVolatility,
            boolean withHeader, boolean withId) {

        StringBuilder sb = new StringBuilder();
        if (withHeader) {
            if (withId) {
                // N/A
            }
            sb.append(reformatARFFHeaders(toARFFHeaders(targetLabel)));
            sb.append("\n");
        }

        sb.append("@DATA");
        sb.append("\n");
        for (int i = 0; i < numOfLines; i++) {
            if (withId) {
                // N/A
            }
            sb.append(generateARFFRandomValues(targetInitialValue, valueVolatility));
            sb.append("\n");
        }
        return sb.toString();

    }

    public String generateARFFRandomValues(double baseValue, double volatility) {

        Map attributeValues = new LinkedHashMap<>();
        final StringBuilder sb = new StringBuilder();

        double computedValue = 0;

        do {
            computedValue = baseValue + (volatility * MyMath.random() * baseValue);

            sb.setLength(0);
            for (Attribute attr : attrs) {
                if (attr.isNominal) {
                    AttributeNominal randomNominal = attr.generateRandomNominal();
                    sb.append(randomNominal.name);
                    sb.append(",");
                    computedValue *= randomNominal.coefficient;

                    attributeValues.put(attr.label, randomNominal);
                } else {
                    AttributeNumericValue randomNumeric = attr.generateRandomNumeric();
                    sb.append((int) randomNumeric.numericValue);
                    sb.append(",");
                    computedValue *= randomNumeric.coefficient;
                    attributeValues.put(attr.label, randomNumeric);
                }

            }
        } while (!compliantListener.isCompliant(new AttributeCheck(attributeValues)));

        sb.append((int) computedValue);
        return sb.toString();
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy