All Downloads are FREE. Search and download functionalities are using the official Maven repository.

moa.classifiers.meta.OzaBag Maven / Gradle / Ivy

Go to download

Massive On-line Analysis is an environment for massive data mining. MOA provides a framework for data stream mining and includes tools for evaluation and a collection of machine learning algorithms. Related to the WEKA project, also written in Java, while scaling to more demanding problems.

The newest version!
/*
 *    OzaBag.java
 *    Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
 *    @author Richard Kirkby ([email protected])
 *
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program. If not, see .
 *    
 */
package moa.classifiers.meta;

import moa.capabilities.CapabilitiesHandler;
import moa.capabilities.Capability;
import moa.capabilities.ImmutableCapabilities;
import moa.classifiers.AbstractClassifier;
import moa.classifiers.Classifier;
import com.yahoo.labs.samoa.instances.Instance;

import moa.classifiers.MultiClassClassifier;
import moa.core.DoubleVector;
import moa.core.Measurement;
import moa.core.MiscUtils;
import moa.options.ClassOption;
import com.github.javacliparser.IntOption;

/**
 * Incremental on-line bagging of Oza and Russell.
 *
 * 

Oza and Russell developed online versions of bagging and boosting for * Data Streams. They show how the process of sampling bootstrap replicates * from training data can be simulated in a data stream context. They observe * that the probability that any individual example will be chosen for a * replicate tends to a Poisson(1) distribution.

* *

[OR] N. Oza and S. Russell. Online bagging and boosting. * In Artificial Intelligence and Statistics 2001, pages 105–112. * Morgan Kaufmann, 2001.

* *

Parameters:

    *
  • -l : Classifier to train
  • *
  • -s : The number of models in the bag
* * @author Richard Kirkby ([email protected]) * @version $Revision: 7 $ */ public class OzaBag extends AbstractClassifier implements MultiClassClassifier, CapabilitiesHandler { @Override public String getPurposeString() { return "Incremental on-line bagging of Oza and Russell."; } private static final long serialVersionUID = 1L; public ClassOption baseLearnerOption = new ClassOption("baseLearner", 'l', "Classifier to train.", Classifier.class, "trees.HoeffdingTree"); public IntOption ensembleSizeOption = new IntOption("ensembleSize", 's', "The number of models in the bag.", 10, 1, Integer.MAX_VALUE); protected Classifier[] ensemble; @Override public void resetLearningImpl() { this.ensemble = new Classifier[this.ensembleSizeOption.getValue()]; Classifier baseLearner = (Classifier) getPreparedClassOption(this.baseLearnerOption); baseLearner.resetLearning(); for (int i = 0; i < this.ensemble.length; i++) { this.ensemble[i] = baseLearner.copy(); } } @Override public void trainOnInstanceImpl(Instance inst) { for (int i = 0; i < this.ensemble.length; i++) { int k = MiscUtils.poisson(1.0, this.classifierRandom); if (k > 0) { Instance weightedInst = (Instance) inst.copy(); weightedInst.setWeight(inst.weight() * k); this.ensemble[i].trainOnInstance(weightedInst); } } } @Override public double[] getVotesForInstance(Instance inst) { DoubleVector combinedVote = new DoubleVector(); for (int i = 0; i < this.ensemble.length; i++) { DoubleVector vote = new DoubleVector(this.ensemble[i].getVotesForInstance(inst)); if (vote.sumOfValues() > 0.0) { vote.normalize(); combinedVote.addValues(vote); } } return combinedVote.getArrayRef(); } @Override public boolean isRandomizable() { return true; } @Override public void getModelDescription(StringBuilder out, int indent) { // TODO Auto-generated method stub } @Override protected Measurement[] getModelMeasurementsImpl() { return new Measurement[]{new Measurement("ensemble size", this.ensemble != null ? this.ensemble.length : 0)}; } @Override public Classifier[] getSubClassifiers() { return this.ensemble.clone(); } @Override public ImmutableCapabilities defineImmutableCapabilities() { if (this.getClass() == OzaBag.class) return new ImmutableCapabilities(Capability.VIEW_STANDARD, Capability.VIEW_LITE); else return new ImmutableCapabilities(Capability.VIEW_STANDARD); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy