All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.datagenerators.clusterers.SubspaceCluster Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    SubspaceCluster.java
 *    Copyright (C) 2001-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.datagenerators.clusterers;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.Tag;
import weka.core.Utils;
import weka.datagenerators.ClusterDefinition;
import weka.datagenerators.ClusterGenerator;

/**
 *  A data generator that produces data points in
 * hyperrectangular subspace clusters.
 * 

* * * Valid options are: *

* *

 * -h
 *  Prints this help.
 * 
* *
 * -o <file>
 *  The name of the output file, otherwise the generated data is
 *  printed to stdout.
 * 
* *
 * -r <name>
 *  The name of the relation.
 * 
* *
 * -d
 *  Whether to print debug informations.
 * 
* *
 * -S
 *  The seed for random function (default 1)
 * 
* *
 * -a <num>
 *  The number of attributes (default 1).
 * 
* *
 * -c
 *  Class Flag, if set, the cluster is listed in extra attribute.
 * 
* *
 * -b <range>
 *  The indices for boolean attributes.
 * 
* *
 * -m <range>
 *  The indices for nominal attributes.
 * 
* *
 * -P <num>
 *  The noise rate in percent (default 0.0).
 *  Can be between 0% and 30%. (Remark: The original 
 *  algorithm only allows noise up to 10%.)
 * 
* *
 * -C <cluster-definition>
 *  A cluster definition of class 'SubspaceClusterDefinition'
 *  (definition needs to be quoted to be recognized as 
 *  a single argument).
 * 
* *
 * Options specific to weka.datagenerators.clusterers.SubspaceClusterDefinition:
 * 
* *
 * -A <range>
 *  Generates randomly distributed instances in the cluster.
 * 
* *
 * -U <range>
 *  Generates uniformly distributed instances in the cluster.
 * 
* *
 * -G <range>
 *  Generates gaussian distributed instances in the cluster.
 * 
* *
 * -D <num>,<num>
 *  The attribute min/max (-A and -U) or mean/stddev (-G) for
 *  the cluster.
 * 
* *
 * -N <num>..<num>
 *  The range of number of instances per cluster (default 1..50).
 * 
* *
 * -I
 *  Uses integer instead of continuous values (default continuous).
 * 
* * * * @author Gabi Schmidberger ([email protected]) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 12478 $ */ public class SubspaceCluster extends ClusterGenerator { /** for serialization */ static final long serialVersionUID = -3454999858505621128L; /** noise rate in percent (option P, between 0 and 30) */ protected double m_NoiseRate; /** cluster list */ protected ClusterDefinition[] m_Clusters; /** if nominal, store number of values */ protected int[] m_numValues; /** cluster type: uniform/random */ public static final int UNIFORM_RANDOM = 0; /** cluster type: total uniform */ public static final int TOTAL_UNIFORM = 1; /** cluster type: gaussian */ public static final int GAUSSIAN = 2; /** the tags for the cluster types */ public static final Tag[] TAGS_CLUSTERTYPE = { new Tag(UNIFORM_RANDOM, "uniform/random"), new Tag(TOTAL_UNIFORM, "total uniform"), new Tag(GAUSSIAN, "gaussian") }; /** cluster subtype: continuous */ public static final int CONTINUOUS = 0; /** cluster subtype: integer */ public static final int INTEGER = 1; /** the tags for the cluster types */ public static final Tag[] TAGS_CLUSTERSUBTYPE = { new Tag(CONTINUOUS, "continuous"), new Tag(INTEGER, "integer") }; /** * initializes the generator, sets the number of clusters to 0, since user has * to specify them explicitly */ public SubspaceCluster() { super(); setNoiseRate(defaultNoiseRate()); } /** * Returns a string describing this data generator. * * @return a description of the data generator suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "A data generator that produces data points in " + "hyperrectangular subspace clusters."; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options */ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy