All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.datagenerators.classifiers.classification.Agrawal Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * Agrawal.java
 * Copyright (C) 2005-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.datagenerators.classifiers.classification;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.datagenerators.ClassificationGenerator;

/**
 *  Generates a people database and is based on the
 * paper by Agrawal et al.:
* R. Agrawal, T. Imielinski, A. Swami (1993). Database Mining: A Performance * Perspective. IEEE Transactions on Knowledge and Data Engineering. * 5(6):914-925. URL * http://www.almaden.ibm.com/software/quest/Publications/ByDate.html. *

* * * BibTeX: * *

 * @article{Agrawal1993,
 *    author = {R. Agrawal and T. Imielinski and A. Swami},
 *    journal = {IEEE Transactions on Knowledge and Data Engineering},
 *    note = {Special issue on Learning and Discovery in Knowledge-Based Databases},
 *    number = {6},
 *    pages = {914-925},
 *    title = {Database Mining: A Performance Perspective},
 *    volume = {5},
 *    year = {1993},
 *    URL = {http://www.almaden.ibm.com/software/quest/Publications/ByDate.html},
 *    PDF = {http://www.almaden.ibm.com/software/quest/Publications/papers/tkde93.pdf}
 * }
 * 
*

* * * Valid options are: *

* *

 * -h
 *  Prints this help.
 * 
* *
 * -o <file>
 *  The name of the output file, otherwise the generated data is
 *  printed to stdout.
 * 
* *
 * -r <name>
 *  The name of the relation.
 * 
* *
 * -d
 *  Whether to print debug informations.
 * 
* *
 * -S
 *  The seed for random function (default 1)
 * 
* *
 * -n <num>
 *  The number of examples to generate (default 100)
 * 
* *
 * -F <num>
 *  The function to use for generating the data. (default 1)
 * 
* *
 * -B
 *  Whether to balance the class.
 * 
* *
 * -P <num>
 *  The perturbation factor. (default 0.05)
 * 
* * * * @author Richard Kirkby (rkirkby at cs dot waikato dot ac dot nz) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 10203 $ */ public class Agrawal extends ClassificationGenerator implements TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = 2254651939636143025L; /** * the interface for the class functions */ protected interface ClassFunction { /** * returns a class value based on the given inputs * * @param salary the salary * @param commission the commission * @param age the age * @param elevel the education level * @param car * @param zipcode the zip code * @param hvalue * @param hyears * @param loan */ public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan); } /** * built in functions are based on the paper (page 924), which turn out to be * functions pred20 thru pred29 in the public c code */ protected static ClassFunction[] builtInFunctions = { // function 1 new ClassFunction() { @Override public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { if (age < 40 || 60 <= age) { return 0; } else { return 1; } } }, // function 2 new ClassFunction() { @Override public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { if (age < 40) { if (50000 <= salary && salary <= 100000) { return 0; } else { return 1; } } else if (age < 60) { if (75000 <= salary && salary <= 125000) { return 0; } else { return 1; } } else // age >= 60 if (25000 <= salary && salary <= 75000) { return 0; } else { return 1; } } }, // function 3 new ClassFunction() { @Override public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { if (age < 40) { if (elevel == 0 || elevel == 1) { return 0; } else { return 1; } } else if (age < 60) { if (elevel == 1 || elevel == 2 || elevel == 3) { return 0; } else { return 1; } } else // age >= 60 if (elevel == 2 || elevel == 3 || elevel == 4) { return 0; } else { return 1; } } }, // function 4 new ClassFunction() { @Override public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { if (age < 40) { if (elevel == 0 || elevel == 1) { if (25000 <= salary && salary <= 75000) { return 0; } else { return 1; } } else if (50000 <= salary && salary <= 100000) { return 0; } else { return 1; } } else if (age < 60) { if (elevel == 1 || elevel == 2 || elevel == 3) { if (50000 <= salary && salary <= 100000) { return 0; } else { return 1; } } else if (75000 <= salary && salary <= 125000) { return 0; } else { return 1; } } else // age >= 60 if (elevel == 2 || elevel == 3 || elevel == 4) { if (50000 <= salary && salary <= 100000) { return 0; } else { return 1; } } else if (25000 <= salary && salary <= 75000) { return 0; } else { return 1; } } }, // function 5 new ClassFunction() { @Override public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { if (age < 40) { if (50000 <= salary && salary <= 100000) { if (100000 <= loan && loan <= 300000) { return 0; } else { return 1; } } else if (200000 <= loan && loan <= 400000) { return 0; } else { return 1; } } else if (age < 60) { if (75000 <= salary && salary <= 125000) { if (200000 <= loan && loan <= 400000) { return 0; } else { return 1; } } else if (300000 <= loan && loan <= 500000) { return 0; } else { return 1; } } else // age >= 60 if (25000 <= salary && salary <= 75000) { if (300000 <= loan && loan <= 500000) { return 0; } else { return 1; } } else if (100000 <= loan && loan <= 300000) { return 0; } else { return 1; } } }, // function 6 new ClassFunction() { @Override public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { double totalSalary = salary + commission; if (age < 40) { if (50000 <= totalSalary && totalSalary <= 100000) { return 0; } else { return 1; } } else if (age < 60) { if (75000 <= totalSalary && totalSalary <= 125000) { return 0; } else { return 1; } } else // age >= 60 if (25000 <= totalSalary && totalSalary <= 75000) { return 0; } else { return 1; } } }, // function 7 new ClassFunction() { @Override public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { double disposable = (2.0 * (salary + commission) / 3.0 - loan / 5.0 - 20000.0); return disposable > 0 ? 0 : 1; } }, // function 8 new ClassFunction() { @Override public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { double disposable = (2.0 * (salary + commission) / 3.0 - 5000.0 * elevel - 20000.0); return disposable > 0 ? 0 : 1; } }, // function 9 new ClassFunction() { @Override public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { double disposable = (2.0 * (salary + commission) / 3.0 - 5000.0 * elevel - loan / 5.0 - 10000.0); return disposable > 0 ? 0 : 1; } }, // function 10 new ClassFunction() { @Override public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { double equity = 0.0; if (hyears >= 20) { equity = hvalue * (hyears - 20.0) / 10.0; } double disposable = (2.0 * (salary + commission) / 3.0 - 5000.0 * elevel + equity / 5.0 - 10000.0); return disposable > 0 ? 0 : 1; } } }; /** function 1 */ public final static int FUNCTION_1 = 1; /** function 2 */ public final static int FUNCTION_2 = 2; /** function 3 */ public final static int FUNCTION_3 = 3; /** function 4 */ public final static int FUNCTION_4 = 4; /** function 5 */ public final static int FUNCTION_5 = 5; /** function 6 */ public final static int FUNCTION_6 = 6; /** function 7 */ public final static int FUNCTION_7 = 7; /** function 8 */ public final static int FUNCTION_8 = 8; /** function 9 */ public final static int FUNCTION_9 = 9; /** function 10 */ public final static int FUNCTION_10 = 10; /** the funtion tags */ public static final Tag[] FUNCTION_TAGS = { new Tag(FUNCTION_1, "Function 1"), new Tag(FUNCTION_2, "Function 2"), new Tag(FUNCTION_3, "Function 3"), new Tag(FUNCTION_4, "Function 4"), new Tag(FUNCTION_5, "Function 5"), new Tag(FUNCTION_6, "Function 6"), new Tag(FUNCTION_7, "Function 7"), new Tag(FUNCTION_8, "Function 8"), new Tag(FUNCTION_9, "Function 9"), new Tag(FUNCTION_10, "Function 10"), }; /** the function to use for generating the data */ protected int m_Function; /** whether to balance the class */ protected boolean m_BalanceClass; /** the perturabation fraction */ protected double m_PerturbationFraction; /** used for balancing the class */ protected boolean m_nextClassShouldBeZero; /** the last class label that was generated */ protected double m_lastLabel; /** * initializes the generator with default values */ public Agrawal() { super(); setFunction(defaultFunction()); setBalanceClass(defaultBalanceClass()); setPerturbationFraction(defaultPerturbationFraction()); } /** * Returns a string describing this data generator. * * @return a description of the data generator suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Generates a people database and is based on the paper by Agrawal " + "et al.:\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ @Override public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.ARTICLE); result.setValue(Field.AUTHOR, "R. Agrawal and T. Imielinski and A. Swami"); result.setValue(Field.YEAR, "1993"); result.setValue(Field.TITLE, "Database Mining: A Performance Perspective"); result.setValue(Field.JOURNAL, "IEEE Transactions on Knowledge and Data Engineering"); result.setValue(Field.VOLUME, "5"); result.setValue(Field.NUMBER, "6"); result.setValue(Field.PAGES, "914-925"); result.setValue(Field.NOTE, "Special issue on Learning and Discovery in Knowledge-Based Databases"); result.setValue(Field.URL, "http://www.almaden.ibm.com/software/quest/Publications/ByDate.html"); result .setValue(Field.PDF, "http://www.almaden.ibm.com/software/quest/Publications/papers/tkde93.pdf"); return result; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options */ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy