Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* WekaClassifierMapTask.java
* Copyright (C) 2013 University of Waikato, Hamilton, New Zealand
*
*/
package weka.distributed;
import distributed.core.DistributedJob;
import distributed.core.DistributedJobConfig;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.classifiers.UpdateableBatchProcessor;
import weka.classifiers.UpdateableClassifier;
import weka.classifiers.meta.AggregateableFilteredClassifier;
import weka.classifiers.meta.AggregateableFilteredClassifierUpdateable;
import weka.classifiers.meta.FilteredClassifier;
import weka.classifiers.meta.FilteredClassifierUpdateable;
import weka.core.Aggregateable;
import weka.core.Environment;
import weka.core.EnvironmentHandler;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.MakePreconstructedFilter;
import weka.filters.MultiFilter;
import weka.filters.PreconstructedFilter;
import weka.filters.StreamableFilter;
import weka.filters.unsupervised.instance.ReservoirSample;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.Random;
import java.util.Vector;
/**
* A map task for building classifiers. Can handle batch and incremental
* classifiers, which are either Aggregateable or not. Non-aggregateable
* classifiers are wrapped up in a Vote meta classifeir by the reduce task.
* Incremental classifiers are trained as instances are presented to the
* processInstance() method. Batch classifiers are trained when finalizeTask()
* is called. Instances are collected and held in memory for batch classifiers,
* although reservior sampling may be used to ensure that a fixed number of
* instances is used for batch learning. There are options to force batch
* learning for updateable classifiers and to force the generation of a Vote
* ensemble for Aggregateable classifiers.
*
*
* Classifiers may be trained on all the incoming data or on a particular
* cross-validation fold (this functionality is used directly by the evaluation
* map and reduce tasks). In the case of batch classifiers, the data for the map
* will be stratified (if the class is nominal) and randomized before extracting
* the fold to train on. In the case of incremental classifiers, a modulus
* operation is used to pull out the instance corresponding to the selected fold
* from the incoming instance stream.
*
*
* Classifiers can optionally have their training data passed through one or
* more filters as a pre-processing step. The class will determine how to wrap
* the base classifier and filters based on the nature of the filters specified
* and whether the classifier is batch/incremental and Aggregateable.
* Aggregateable classifiers (batch or incremental) can only be aggregated to
* one final model if the filters used with them (if using filters) are all
* StreamableFilters (i.e. they can determine their output structure immediately
* without having to see any instances).
*
*
* It is also possible to specify a special "preconstructed" filter to use in
* conjunction with, or instead of, regular filters. At present, there is just
* one Preconstructed filter implemented by the distributed system.
* PreConstructedPCA can produce a "trained" PCA filter using a correlation
* matrix produced by the CorrelationMatrixMap/Reduce tasks.
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
* @version $Revision: 12589 $
*/
public class WekaClassifierMapTask implements OptionHandler,
EnvironmentHandler, Serializable {
/**
* If this property is set then we can adjust the total number of requested
* iterations for IteratedSingleClassifierEnhancers according to the number of
* maps that are going to run. This is useful for schemes that build
* independent base models (e.g. Bagging) in order to get approximately the
* requested number of models in the end. For boosting methods it will be
* necessary to set their number of iterations to a higher value than required
* as it will be adjusted downwards for each map.
*/
public static final String TOTAL_NUMBER_OF_MAPS = "total.num.maps";
/** For serialization */
private static final long serialVersionUID = -5953696466790594368L;
/** The classifier to use */
protected Classifier m_classifier = new weka.classifiers.trees.REPTree();
/** If true then incremental classifiers will be batch trained */
protected boolean m_forceBatchForUpdateable;
/**
* Option value that is determined by whether the classifier is updateable and
* whether this iteration through the data is > 1
*/
protected boolean m_continueTrainingUpdateable;
/**
* Total folds - only used if m_foldNumber != -1. Use this to train the
* classifier on a particular fold of the incoming data set for this map
*/
protected int m_totalFolds = 1; // default = use all data
/**
* The fold number to train on. Use in conjunction with m_totalFolds. Default
* is to train on all the data entering this map
*/
protected int m_foldNumber = -1; // 1-based. default - use all data
/** Number of training instances processed by the classifier in this map */
protected int m_numTrainingInstances;
/** Total number of instances seen by this map */
protected int m_numInstances;
/** Training header */
protected Instances m_trainingHeader;
/** Environment variables */
protected transient Environment m_env = Environment.getSystemWide();
/** Whether to use reservoir sampling for batch learning */
protected boolean m_useReservoirSampling;
/** Reservoir sampling (if requested) for batch learning in this map */
protected ReservoirSample m_reservoir;
/** Sample size if reservoir sampling is being used for batch learning */
protected int m_sampleSize = -1;
/**
* True if a Vote ensemble is to be produced in the case when the base
* classifier is Aggregateable
*/
protected boolean m_forceVotedEnsemble;
/**
* Filters to use. How these are handled depends on whether the base
* classifier is Aggregateable, incremental etc. These only have an effect if
* not continueing the training of an updateable classifier - in this case it
* is assumed that the updatebble classifier would have been configured with
* these filters when first constructed.
*/
protected List m_filtersToUse = new ArrayList();
/** Random seed for fold generation */
protected String m_seed = "1";
public static void main(String[] args) {
try {
WekaClassifierMapTask task = new WekaClassifierMapTask();
if (Utils.getFlag('h', args)) {
String help = DistributedJob.makeOptionsStr(task);
System.err.println(help);
System.exit(1);
}
String trainingPath = Utils.getOption("t", args);
Instances train =
new Instances(new java.io.BufferedReader(new java.io.FileReader(
trainingPath)));
train.setClassIndex(train.numAttributes() - 1);
task.setOptions(args);
task.setup(new Instances(train, 0));
for (int i = 0; i < train.numInstances(); i++) {
task.processInstance(train.instance(i));
}
task.finalizeTask();
System.err.println("Batch trained classifier:\n"
+ task.getClassifier().toString());
// now configure for an incremental classifier and
// train it for two passes over the data
task = new WekaClassifierMapTask();
task.setClassifier(new weka.classifiers.bayes.NaiveBayesUpdateable());
task.setup(new Instances(train, 0));
for (int i = 0; i < train.numInstances(); i++) {
task.processInstance(train.instance(i));
}
// task.finalizeTask(); // not needed as training is done in
// processInstance()
System.err.println("Incremental training (iteration 1):\n"
+ task.getClassifier().toString());
task.setContinueTrainingUpdateableClassifier(true);
task.setup(new Instances(train, 0));
for (int i = 0; i < train.numInstances(); i++) {
task.processInstance(train.instance(i));
}
System.err.println("Incremental training (iteration 2):\n"
+ task.getClassifier().toString());
} catch (Exception ex) {
ex.printStackTrace();
}
}
@Override
public Enumeration