Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* Agrawal.java
* Copyright (C) 2005-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.datagenerators.classifiers.classification;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.datagenerators.ClassificationGenerator;
/**
* Generates a people database and is based on the
* paper by Agrawal et al.:
* R. Agrawal, T. Imielinski, A. Swami (1993). Database Mining: A Performance
* Perspective. IEEE Transactions on Knowledge and Data Engineering.
* 5(6):914-925. URL
* http://www.almaden.ibm.com/software/quest/Publications/ByDate.html.
*
*
*
* BibTeX:
*
*
* @article{Agrawal1993,
* author = {R. Agrawal and T. Imielinski and A. Swami},
* journal = {IEEE Transactions on Knowledge and Data Engineering},
* note = {Special issue on Learning and Discovery in Knowledge-Based Databases},
* number = {6},
* pages = {914-925},
* title = {Database Mining: A Performance Perspective},
* volume = {5},
* year = {1993},
* URL = {http://www.almaden.ibm.com/software/quest/Publications/ByDate.html},
* PDF = {http://www.almaden.ibm.com/software/quest/Publications/papers/tkde93.pdf}
* }
*
*
*
*
* Valid options are:
*
*
*
* -h
* Prints this help.
*
*
*
* -o <file>
* The name of the output file, otherwise the generated data is
* printed to stdout.
*
*
*
* -r <name>
* The name of the relation.
*
*
*
* -d
* Whether to print debug informations.
*
*
*
* -S
* The seed for random function (default 1)
*
*
*
* -n <num>
* The number of examples to generate (default 100)
*
*
*
* -F <num>
* The function to use for generating the data. (default 1)
*
*
*
* -B
* Whether to balance the class.
*
*
*
* -P <num>
* The perturbation factor. (default 0.05)
*
*
*
*
* @author Richard Kirkby (rkirkby at cs dot waikato dot ac dot nz)
* @author FracPete (fracpete at waikato dot ac dot nz)
* @version $Revision: 10203 $
*/
public class Agrawal extends ClassificationGenerator implements
TechnicalInformationHandler {
/** for serialization */
static final long serialVersionUID = 2254651939636143025L;
/**
* the interface for the class functions
*/
protected interface ClassFunction {
/**
* returns a class value based on the given inputs
*
* @param salary the salary
* @param commission the commission
* @param age the age
* @param elevel the education level
* @param car
* @param zipcode the zip code
* @param hvalue
* @param hyears
* @param loan
*/
public long determineClass(double salary, double commission, int age,
int elevel, int car, int zipcode, double hvalue, int hyears, double loan);
}
/**
* built in functions are based on the paper (page 924), which turn out to be
* functions pred20 thru pred29 in the public c code
*/
protected static ClassFunction[] builtInFunctions = {
// function 1
new ClassFunction() {
@Override
public long determineClass(double salary, double commission, int age,
int elevel, int car, int zipcode, double hvalue, int hyears, double loan) {
if (age < 40 || 60 <= age) {
return 0;
} else {
return 1;
}
}
},
// function 2
new ClassFunction() {
@Override
public long determineClass(double salary, double commission, int age,
int elevel, int car, int zipcode, double hvalue, int hyears, double loan) {
if (age < 40) {
if (50000 <= salary && salary <= 100000) {
return 0;
} else {
return 1;
}
} else if (age < 60) {
if (75000 <= salary && salary <= 125000) {
return 0;
} else {
return 1;
}
} else // age >= 60
if (25000 <= salary && salary <= 75000) {
return 0;
} else {
return 1;
}
}
},
// function 3
new ClassFunction() {
@Override
public long determineClass(double salary, double commission, int age,
int elevel, int car, int zipcode, double hvalue, int hyears, double loan) {
if (age < 40) {
if (elevel == 0 || elevel == 1) {
return 0;
} else {
return 1;
}
} else if (age < 60) {
if (elevel == 1 || elevel == 2 || elevel == 3) {
return 0;
} else {
return 1;
}
} else // age >= 60
if (elevel == 2 || elevel == 3 || elevel == 4) {
return 0;
} else {
return 1;
}
}
},
// function 4
new ClassFunction() {
@Override
public long determineClass(double salary, double commission, int age,
int elevel, int car, int zipcode, double hvalue, int hyears, double loan) {
if (age < 40) {
if (elevel == 0 || elevel == 1) {
if (25000 <= salary && salary <= 75000) {
return 0;
} else {
return 1;
}
} else if (50000 <= salary && salary <= 100000) {
return 0;
} else {
return 1;
}
} else if (age < 60) {
if (elevel == 1 || elevel == 2 || elevel == 3) {
if (50000 <= salary && salary <= 100000) {
return 0;
} else {
return 1;
}
} else if (75000 <= salary && salary <= 125000) {
return 0;
} else {
return 1;
}
} else // age >= 60
if (elevel == 2 || elevel == 3 || elevel == 4) {
if (50000 <= salary && salary <= 100000) {
return 0;
} else {
return 1;
}
} else if (25000 <= salary && salary <= 75000) {
return 0;
} else {
return 1;
}
}
},
// function 5
new ClassFunction() {
@Override
public long determineClass(double salary, double commission, int age,
int elevel, int car, int zipcode, double hvalue, int hyears, double loan) {
if (age < 40) {
if (50000 <= salary && salary <= 100000) {
if (100000 <= loan && loan <= 300000) {
return 0;
} else {
return 1;
}
} else if (200000 <= loan && loan <= 400000) {
return 0;
} else {
return 1;
}
} else if (age < 60) {
if (75000 <= salary && salary <= 125000) {
if (200000 <= loan && loan <= 400000) {
return 0;
} else {
return 1;
}
} else if (300000 <= loan && loan <= 500000) {
return 0;
} else {
return 1;
}
} else // age >= 60
if (25000 <= salary && salary <= 75000) {
if (300000 <= loan && loan <= 500000) {
return 0;
} else {
return 1;
}
} else if (100000 <= loan && loan <= 300000) {
return 0;
} else {
return 1;
}
}
},
// function 6
new ClassFunction() {
@Override
public long determineClass(double salary, double commission, int age,
int elevel, int car, int zipcode, double hvalue, int hyears, double loan) {
double totalSalary = salary + commission;
if (age < 40) {
if (50000 <= totalSalary && totalSalary <= 100000) {
return 0;
} else {
return 1;
}
} else if (age < 60) {
if (75000 <= totalSalary && totalSalary <= 125000) {
return 0;
} else {
return 1;
}
} else // age >= 60
if (25000 <= totalSalary && totalSalary <= 75000) {
return 0;
} else {
return 1;
}
}
},
// function 7
new ClassFunction() {
@Override
public long determineClass(double salary, double commission, int age,
int elevel, int car, int zipcode, double hvalue, int hyears, double loan) {
double disposable = (2.0 * (salary + commission) / 3.0 - loan / 5.0 - 20000.0);
return disposable > 0 ? 0 : 1;
}
},
// function 8
new ClassFunction() {
@Override
public long determineClass(double salary, double commission, int age,
int elevel, int car, int zipcode, double hvalue, int hyears, double loan) {
double disposable = (2.0 * (salary + commission) / 3.0 - 5000.0
* elevel - 20000.0);
return disposable > 0 ? 0 : 1;
}
},
// function 9
new ClassFunction() {
@Override
public long determineClass(double salary, double commission, int age,
int elevel, int car, int zipcode, double hvalue, int hyears, double loan) {
double disposable = (2.0 * (salary + commission) / 3.0 - 5000.0
* elevel - loan / 5.0 - 10000.0);
return disposable > 0 ? 0 : 1;
}
},
// function 10
new ClassFunction() {
@Override
public long determineClass(double salary, double commission, int age,
int elevel, int car, int zipcode, double hvalue, int hyears, double loan) {
double equity = 0.0;
if (hyears >= 20) {
equity = hvalue * (hyears - 20.0) / 10.0;
}
double disposable = (2.0 * (salary + commission) / 3.0 - 5000.0
* elevel + equity / 5.0 - 10000.0);
return disposable > 0 ? 0 : 1;
}
} };
/** function 1 */
public final static int FUNCTION_1 = 1;
/** function 2 */
public final static int FUNCTION_2 = 2;
/** function 3 */
public final static int FUNCTION_3 = 3;
/** function 4 */
public final static int FUNCTION_4 = 4;
/** function 5 */
public final static int FUNCTION_5 = 5;
/** function 6 */
public final static int FUNCTION_6 = 6;
/** function 7 */
public final static int FUNCTION_7 = 7;
/** function 8 */
public final static int FUNCTION_8 = 8;
/** function 9 */
public final static int FUNCTION_9 = 9;
/** function 10 */
public final static int FUNCTION_10 = 10;
/** the funtion tags */
public static final Tag[] FUNCTION_TAGS = {
new Tag(FUNCTION_1, "Function 1"), new Tag(FUNCTION_2, "Function 2"),
new Tag(FUNCTION_3, "Function 3"), new Tag(FUNCTION_4, "Function 4"),
new Tag(FUNCTION_5, "Function 5"), new Tag(FUNCTION_6, "Function 6"),
new Tag(FUNCTION_7, "Function 7"), new Tag(FUNCTION_8, "Function 8"),
new Tag(FUNCTION_9, "Function 9"), new Tag(FUNCTION_10, "Function 10"), };
/** the function to use for generating the data */
protected int m_Function;
/** whether to balance the class */
protected boolean m_BalanceClass;
/** the perturabation fraction */
protected double m_PerturbationFraction;
/** used for balancing the class */
protected boolean m_nextClassShouldBeZero;
/** the last class label that was generated */
protected double m_lastLabel;
/**
* initializes the generator with default values
*/
public Agrawal() {
super();
setFunction(defaultFunction());
setBalanceClass(defaultBalanceClass());
setPerturbationFraction(defaultPerturbationFraction());
}
/**
* Returns a string describing this data generator.
*
* @return a description of the data generator suitable for displaying in the
* explorer/experimenter gui
*/
public String globalInfo() {
return "Generates a people database and is based on the paper by Agrawal "
+ "et al.:\n" + getTechnicalInformation().toString();
}
/**
* Returns an instance of a TechnicalInformation object, containing detailed
* information about the technical background of this class, e.g., paper
* reference or book this class is based on.
*
* @return the technical information about this class
*/
@Override
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.ARTICLE);
result.setValue(Field.AUTHOR, "R. Agrawal and T. Imielinski and A. Swami");
result.setValue(Field.YEAR, "1993");
result.setValue(Field.TITLE, "Database Mining: A Performance Perspective");
result.setValue(Field.JOURNAL,
"IEEE Transactions on Knowledge and Data Engineering");
result.setValue(Field.VOLUME, "5");
result.setValue(Field.NUMBER, "6");
result.setValue(Field.PAGES, "914-925");
result.setValue(Field.NOTE,
"Special issue on Learning and Discovery in Knowledge-Based Databases");
result.setValue(Field.URL,
"http://www.almaden.ibm.com/software/quest/Publications/ByDate.html");
result
.setValue(Field.PDF,
"http://www.almaden.ibm.com/software/quest/Publications/papers/tkde93.pdf");
return result;
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options
*/
@Override
public Enumeration