moa.classifiers.meta.DACC Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of moa Show documentation
Massive On-line Analysis is an environment for massive data mining. MOA provides a framework for data stream mining and includes tools for evaluation and a collection of machine learning algorithms. Related to the WEKA project, also written in Java, while scaling to more demanding problems.
There is a newer version: 2024.07.0
Show newest version
/*
 *    DACC.java
 *
 *    @author Ghazal Jaber ([email protected])
 * 
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 *    
 *    
 */

package moa.classifiers.meta;

import com.github.javacliparser.FloatOption;
import com.github.javacliparser.MultiChoiceOption;
import com.yahoo.labs.samoa.instances.Instance;
import moa.classifiers.AbstractClassifier;
import moa.classifiers.Classifier;
import moa.classifiers.MultiClassClassifier;
import moa.core.DoubleVector;
import moa.core.Measurement;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import moa.options.ClassOption;

/**
 * Dynamic Adaptation to Concept Changes. 
 * Ensemble method for data streams that adapts to concept changes.
 * 
 * Reference: JABER, G., CORNUEJOLS, A., and TARROUX, P. A New On-Line Learning Method 
 * for Coping with Recurring Concepts: The ADACC System. In : Neural Information 
 * Processing. Springer Berlin Heidelberg, 2013. p. 595-604.
 * 
 * @author Ghazal Jaber ([email protected])
 *
 */

public class DACC extends AbstractClassifier implements MultiClassClassifier {

	private static final long serialVersionUID = 1L;
	
	@Override
    public String getPurposeString() {
        return "Dynamic Adaptation to Concept Changes for data streams.";
    }
        
    /**
     * Base classifier
     */
    public ClassOption learnerOption = new ClassOption("baseLearner", 'l',
            "Classifier to train.", Classifier.class, "bayes.NaiveBayes");
    /**
     * Ensemble size
     */
    public FloatOption memberCountOption = new FloatOption("ensembleSize", 'n', "The maximum number of classifiers in an ensemble.", 20, 1, Integer.MAX_VALUE);
    /**
     * Maturity age of classifiers
     */
    public FloatOption maturityOption = new FloatOption("maturity", 'a',
            "The maturity age.", 20, 0, 100);
    /**
     * Size of the evaluation window for weights computing
     */
    public FloatOption evaluationSizeOption = new FloatOption("evalSize", 'e',
            "The size of the evaluation window.", 20, 1, 1000);
    /**
     * Combination functions: MAX and WVD (MAX leads to a faster reactivity to the change, WVD is more robust to noise) 
     */
    public MultiChoiceOption combinationOption= new MultiChoiceOption("cmb", 'c', "The combination function.",
            new String[]{"MAX","WVD"} , new String[] {"Maximum","Weighted Vote of the best"},
            0);
    /**
     * Ensemble of classifiers
     */
    protected Classifier[] ensemble;
    /**
     * Weights of classifiers 
     */
    protected Pair[] ensembleWeights;
    /**
     * Age of classifiers (to compare with maturity age)
     */
    protected double[] ensembleAges;    
    /**
     * Evaluation windows (recent classification errors)
     */
    protected int[][] ensembleWindows;
    /**
     * Number of instances from the stream 
     */
    protected int nbInstances = 0;
    

    /**
     * Initializes the method variables
     */
    protected void initVariables(){
    	int ensembleSize = (int)this.memberCountOption.getValue();
        this.ensemble = new Classifier[ensembleSize];
        this.ensembleAges = new double[ensembleSize];
        this.ensembleWindows = new int[ensembleSize][(int)this.evaluationSizeOption.getValue()];    
    }
    
    @Override
    public void resetLearningImpl() {
    
        Classifier learner = (Classifier) getPreparedClassOption(this.learnerOption);
        learner.resetLearning();

        initVariables();
         
        this.ensembleWeights = new Pair[this.ensemble.length];
        
        for (int i = 0; i < this.ensemble.length; i++) {
            this.ensemble[i] = learner.copy();
            this.ensembleAges[i] = 0;
            this.ensembleWeights[i] = new Pair(0.0,i);
            this.ensembleWindows[i] = new int[(int)this.evaluationSizeOption.getValue()];
        }
       
    }

    @Override
    public void trainOnInstanceImpl(Instance inst) {
    	trainAndClassify(inst);
    }

    @Override 
    public double[] getVotesForInstance(Instance inst) {

        DoubleVector combinedVote = new DoubleVector();
        ArrayList arr;
  
        int cmb = this.combinationOption.getChosenIndex();
        
        if (cmb == 0)    
        	arr = getMAXIndexes();
        else
        	arr = getWVDIndexes();
        
     
        if (this.trainingWeightSeenByModel > 0.0) {

            for (int i = 0; i < arr.size(); i++) {
                if (this.ensembleWeights[arr.get(i)].val > 0.0) {

                    DoubleVector vote = new DoubleVector(this.ensemble[arr.get(i)].getVotesForInstance(inst));

                    if (vote.sumOfValues() > 0.0) {
                        vote.normalize();
                        vote.scaleValues(this.ensembleWeights[arr.get(i)].val);
                        combinedVote.addValues(vote);
                    }
                }
            }
        }
        return combinedVote.getArrayRef();
    }

    
    /**
     * Receives a training instance from the stream and 
     * updates the adaptive classifiers accordingly
     * @param inst the instance from the stream
     */
    protected void trainAndClassify(Instance inst){
    	
        nbInstances++;
    	
        boolean mature = true;
        boolean unmature = true;
        
    	for (int i = 0; i < getNbActiveClassifiers(); i++) {
        	
    		// check if all adaptive learners are mature
    		if (this.ensembleAges[i] < this.maturityOption.getValue() && i= this.maturityOption.getValue() && i= this.ensembleWeights[i].index + 1){
        
        		// train adaptive learners
        		if (i < getNbAdaptiveClassifiers())
        			this.ensemble[i].trainOnInstance(inst);
        
        		int val = this.ensemble[i].correctlyClassifies(inst)?1:0;
            	double sum = updateEvaluationWindow(i, val);  
            	this.ensembleWeights[i].val = sum;
        		this.ensembleAges[i] = this.ensembleAges[i]+1;
        		
        	}
        	
        }
    	
    	// if all adaptive learners are not mature --> set weights to one 
    	if (unmature)
        	for (int i = 0; i < getNbAdaptiveClassifiers(); i++)
        		this.ensembleWeights[i].val=1;
        		
    	// if all adaptive learners are mature --> delete one learner
        if (mature){
        	Pair[] learners = getHalf(false);
        	
        	if (learners.length > 0){
        		double rand = classifierRandom.nextInt(learners.length);
        		discardModel(learners[(int)rand].index);		
        	}
        }
	
    }
    
    /**
     * Resets a classifier in the ensemble
     * @param index the index of the classifier in the ensemble
     */
    public void discardModel(int index) {
    	this.ensemble[index].resetLearning();
        this.ensembleWeights[index].val = 0;
        this.ensembleAges[index] = 0;
        this.ensembleWindows[index]=new int[(int)this.evaluationSizeOption.getValue()];
    }
    
    /**
     * Updates the evaluation window of a classifier and returns the
     * updated weight value.
     * @param index the index of the classifier in the ensemble
     * @param val the last evaluation record of the classifier 
     * @return the updated weight value of the classifier 
     */
    protected double updateEvaluationWindow(int index,int val){
    	
    	int[] newEnsembleWindows = new int[this.ensembleWindows[index].length]; 	
    	
    	int wsize = (int)Math.min(this.evaluationSizeOption.getValue(),this.ensembleAges[index]+1);
    	
    	int sum = 0;   
    	for (int i = 0; i < wsize-1 ; i++){
    		newEnsembleWindows[i+1] = this.ensembleWindows[index][i];
    		sum = sum + this.ensembleWindows[index][i];	
    	}
    	
    	newEnsembleWindows[0] = val; 
    	this.ensembleWindows[index] = newEnsembleWindows; 

    	if (this.ensembleAges[index] >= this.maturityOption.getValue())
    		return (sum + val) * 1.0/wsize;
    	else
    		return 0; 
    		
    }
    
    /** 
     * Returns the best (or worst) half of classifiers in the adaptive ensemble.
     * The best classifiers are used to compute the stability index in ADACC. The worst 
     * classifiers are returned in order to select a classifier for deletion.  
     * @param bestHalf boolean value set to true (false) if we want to return 
     * the best (worst) half of adaptive classifiers.
     * @return an array containing the weight values of the corresponding classifiers
     * and their indexes in the ensemble.
     */
    protected Pair[] getHalf(boolean bestHalf){
    	
    	Pair[] newEnsembleWeights = new Pair[getNbAdaptiveClassifiers()];
    	System.arraycopy(ensembleWeights, 0, newEnsembleWeights, 0, newEnsembleWeights.length);
    	
    	if (bestHalf)
    		Arrays.sort(newEnsembleWeights,Collections.reverseOrder());
    	else
    		Arrays.sort(newEnsembleWeights);
    		
    	Pair[] result = new Pair[(int)Math.floor(newEnsembleWeights.length/2)];
    	System.arraycopy(newEnsembleWeights, 0, result, 0, result.length);
    	
    	return result;
    }
    
    
    /**
     * Returns the classifiers that vote for the final prediction
     * when the MAX combination function is selected
     * @return the classifiers with the highest weight value
     */
    protected ArrayList getMAXIndexes(){
    	
    	ArrayList maxWIndex=new ArrayList();
    	Pair[] newEnsembleWeights = new Pair[getNbActiveClassifiers()];  	
    	System.arraycopy(ensembleWeights, 0, newEnsembleWeights, 0, newEnsembleWeights.length);
    	
    	Arrays.sort(newEnsembleWeights);
    	
    	double maxWVal = newEnsembleWeights[newEnsembleWeights.length-1].val;
    		
    	for (int i = newEnsembleWeights.length-1 ; i>=0 ; i--){
    		if (newEnsembleWeights[i].val!=maxWVal)
    			break;
    		else
    			maxWIndex.add(newEnsembleWeights[i].index);
    		
    	}
    	return maxWIndex;
    }

    /**
     * Returns the classifiers that vote for the final prediction
     * when the WVD combination function is selected
     * @return the classifiers whose weights lie in the higher half 
     * of the ensemble's weight interval.
     */
    protected ArrayList getWVDIndexes(){
    	
    	ArrayList maxWIndex = new ArrayList();
    	
    	Pair[] newEnsembleWeights = new Pair[getNbActiveClassifiers()];
    	
    	System.arraycopy(ensembleWeights, 0, newEnsembleWeights, 0, newEnsembleWeights.length);
    	
    	Arrays.sort(newEnsembleWeights);
    	
    	double minWVal = newEnsembleWeights[0].val;
    	double maxWVal = newEnsembleWeights[newEnsembleWeights.length-1].val;
    	double med = (maxWVal-minWVal)*1.0/2;
    
    	for (int i = newEnsembleWeights.length-1 ; i>=0 ; i--)
    		if (newEnsembleWeights[i].val < med)
    			break;		
    		else
    			maxWIndex.add(newEnsembleWeights[i].index);
    	
    	return maxWIndex;
    }
    
    
    /**
     * Returns the number of classifiers used for prediction
     * which includes the adaptive learners and the snapshots in ADACC 
     * @return the number of classifiers used for prediction
     */
    protected int getNbActiveClassifiers(){
    	return this.ensemble.length;
    }
    
	/** 
	 * Returns the number of adaptive classifiers in the ensemble 
	 * which excludes the static snapshots in ADACC  
	 * @return the number of adaptive classifiers
	 */
    protected int getNbAdaptiveClassifiers(){
    	return this.ensemble.length;
    }
    
    @Override
    public void getModelDescription(StringBuilder out, int indent) {
        // TODO Auto-generated method stub
    }

    @Override
    protected Measurement[] getModelMeasurementsImpl() {
        Measurement[] measurements  = new Measurement[4];
        measurements[0] = new Measurement("size ",
                    this.ensemble.length);
        measurements[1] = new Measurement("maturity ",
                   this.maturityOption.getValue());
        measurements[2] = new Measurement("evalsize ",
                   this.evaluationSizeOption.getValue());
        measurements[3] = new Measurement("cmb ",
                   this.combinationOption.getChosenIndex());      
       return measurements;
    }

    @Override
    public boolean isRandomizable() {
        return true;
    }

    @Override
    public Classifier[] getSubClassifiers() {
        return this.ensemble.clone();
    }
    
    /**
     * This helper class is used to sort an array of pairs of integers: val and index. 
     * The array is sorted based on the val field.
     * @author Ghazal Jaber
     *
     */
    protected class Pair implements Comparable, Serializable   {
 	   
		private static final long serialVersionUID = 1L;
		double val;
	    int index;

	    public Pair(double d, int i){
	        this.val = d;
	        this.index = i;
	    }

	    @Override
	    public int compareTo(Pair other){
	    	if (this.val - other.val > 0 )
		        	return 1;    
	    	else
	    		if (this.val == other.val)
		        		return 0;    
	    	return -1;
	    }
	    
	    public double getValue(){
	    	return val;	
	    }
	    
	}

}