All Downloads are FREE. Search and download functionalities are using the official Maven repository.

moa.classifiers.core.driftdetection.SeqDrift2ChangeDetector Maven / Gradle / Ivy

Go to download

Massive On-line Analysis is an environment for massive data mining. MOA provides a framework for data stream mining and includes tools for evaluation and a collection of machine learning algorithms. Related to the WEKA project, also written in Java, while scaling to more demanding problems.

There is a newer version: 2024.07.0
Show newest version
/*
 *    SeqDrift2ChangeDetector.java
 *    Copyright (C) 2011 University of Waikato, Hamilton, New Zealand
 *    @author Sakthithasan Sripirakas sripirakas363 at yahoo dot com
 *
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program. If not, see .
 */
package moa.classifiers.core.driftdetection;

import com.github.javacliparser.FloatOption;
import com.github.javacliparser.IntOption;
import moa.core.ObjectRepository;
import moa.tasks.TaskMonitor;
import java.util.ArrayList;
import java.util.List;
import moa.AbstractMOAObject;

/**
 * SeqDriftChangeDetector.java. This extends Abstract Change Detector 
 * as required by MOA.
 * 
 * Pears, R., Sakthithasan, S., & Koh, Y. (2014). Detecting concept change in 
 * dynamic data streams. Machine Learning, 97(3), 259-293. doi:10.1007/s10994-013-5433-9
 *
 * @author Sakthithasan Sripirakas sripirakas363 at yahoo dot com
 * @version $Revision: 7 $
 */
public class SeqDrift2ChangeDetector extends AbstractChangeDetector {

    protected SeqDrift2 seqdrift;

    public FloatOption deltaSeqDrift2Option = new FloatOption("deltaSeq2Drift", 'd',
            "Delta of SeqDrift2 change detection",0.01, 0.0, 1.0);
    
    public IntOption blockSeqDrift2Option = new IntOption("blockSeqDrift2Option",'b',"Block size of SeqDrift2 change detector", 200, 100, 10000);

    @Override
    public void input(double inputValue) {
        if (this.seqdrift == null) {
            resetLearning();
        }
        this.isChangeDetected = seqdrift.setInput(inputValue);
        this.isWarningZone = false;
        this.delay = 0.0;
        this.estimation = seqdrift.getEstimation();
    }

    @Override
    public void resetLearning() {
        seqdrift = new SeqDrift2((double) this.deltaSeqDrift2Option.getValue(),((int) this.blockSeqDrift2Option.getValue()));
    }

    @Override
    public void getDescription(StringBuilder sb, int indent) {
        // TODO Auto-generated method stub
    }

    @Override
    protected void prepareForUseImpl(TaskMonitor monitor,
            ObjectRepository repository) {
        // TODO Auto-generated method stub
    }
    

/**
 * SeqDrift2 uses reservoir sampling to build a sequential change detection
* model that uses statistically sound guarantees defined using Bernstein Bound 
* on false positive and false negative rates. This is a block based approach and checks 
* for changes in the data values only at block boundaries as opposed to the 
* methods on per instance basis. SeqDrift maintains a reservoir and a repository.
* Repository gathers the new instances and reservoir stores only the data values 
* that are statistically not different, in other words from the same distribution. 
* If the data values in the repository are consistent with the values in reservoir, 
* the data values of the repository are copied to the reservoir applying 
* reservoir algorithm.The hypothesis is that the mean values of the reservoir and right repository 
* are not statistically different
 * 
 * 
 * 
 * Pears, R., Sakthithasan, S., & Koh, Y. (2014). Detecting concept change in 
 * dynamic data streams. Machine Learning, 97(3), 259-293. doi:10.1007/s10994-013-5433-9
 * 
 * @author Sakthithasan Sripirakas sripirakas363 at yahoo dot com
 */
public class SeqDrift2 extends AbstractMOAObject {

    private final Reservoir rightRepository;
    private final Reservoir leftReservoir;

    //parameters
    private final int blockSize;

    //parameters that are optimized
    private final double significanceLevel;
    private int leftReservoirSize;
    private final int rightRepositorySize;
    private final double k;

    //variables
    private int instanceCount = 0;
    private double leftReservoirMean = 0.0;
    private double rightRepositoryMean = 0.0;
    private double variance = 0.0;
    private double total = 0.0;
    private double epsilon = 0.0;

    private final static int DRIFT = 0;
    private final static int NODRIFT = 2;
    private final static int INTERNAL_DRIFT = 3;

    /**
     * SeqDrift change detector requires two parameters: significance level and 
     * block size. Significance level controls the false positive rate and block 
     * size sets the interval of two consecutive hypothesis tests
     * Block Size is a positive integer and significance level is a double value
     * between 0 and 1
     * @param _significanceLevel
     * @param _blockSize
     */
    public SeqDrift2(double _significanceLevel, int _blockSize) {
        significanceLevel = _significanceLevel;
        blockSize = _blockSize;
        leftReservoirSize = _blockSize;
        rightRepositorySize = _blockSize;
        k = 0.5;

        instanceCount = 0;
        variance = 0;
        total = 0.0;
        epsilon = 0.0;

        //Data Structures        
        leftReservoir = new Reservoir(leftReservoirSize, blockSize);
        rightRepository = new Reservoir(rightRepositorySize, blockSize);
    }
    /**
     * This method can be used to directly interface with SeqDrift change
     * detector. This method requires a numerical value as an input. The return 
     * value indicates whether there is a change detected or not. 
     * @param input numerical value
     */
    public boolean setInput(double _inputValue) {
        ++instanceCount;
        //i_numInstances++;

        addToRightReservoir(_inputValue);
        total = total + _inputValue;

        if ((instanceCount % blockSize) == 0) //checking for drift at block boundary
        {
            int iDriftType = getDriftType();

            if (iDriftType == DRIFT) {
                clearLeftReservoir();
                moveFromRepositoryToReservoir();
                return true;
            } 
            /*
            else if (iDriftType == INTERNAL_DRIFT) { //Ignoring the changes where 
            mean values decrease
                clearLeftReservoir();
                moveFromRepositoryToReservoir();
                return false;
            }
            */
            else //No drift is detected
            {
                moveFromRepositoryToReservoir();
                return false;
            }
        }
        return false;
    }

    /**
     *
     * 

* This method adds new value to right reservoir * * @param _inputValue A double data value * @return void */ private void addToRightReservoir(double _inputValue) { rightRepository.addElement(_inputValue); } /** * *

* This method copies the data values of the repository to the reservoir * applying reservoir algorithm * * @param void * @return void */ private void moveFromRepositoryToReservoir() { leftReservoir.copy(rightRepository); } /** * *

* This method removes all elements from the reservoir after a drift is * detected. * * @param void * @return void */ private void clearLeftReservoir() { total = total - leftReservoir.getTotal(); leftReservoir.clear(); } /** * *

* This method returns the type of drift detected The possible values are: * DRIFT, INTERNAL_DRIFT and NODRIFT * * @param * @return boolean True - if drift is detected. False - otherwise */ private int getDriftType() { if (getWidth() > blockSize) { leftReservoirMean = getLeftReservoirMean(); rightRepositoryMean = getRightRepositoryMean(); optimizeEpsilon(); if ((instanceCount > blockSize) && (leftReservoir.getSize() > 0)) { if (epsilon <= Math.abs(rightRepositoryMean - leftReservoirMean)) { //if (rightRepositoryMean > leftReservoirMean) { return DRIFT; //} /*else { return INTERNAL_DRIFT; } */ } else { return NODRIFT; } } return NODRIFT; } else { return NODRIFT; } } private double getLeftReservoirMean() { return leftReservoir.getSampleMean(); } private double getRightRepositoryMean() { return rightRepository.getSampleMean(); } private double getVariance() { double mean = getMean(); double meanminum1 = mean - 1; double size = getWidth(); double x = getTotal() * meanminum1 * meanminum1 + (size - getTotal()) * mean * mean; double y = size - 1; return x / y; } private void optimizeEpsilon() { int tests = leftReservoir.getSize() / blockSize; if (tests >= 1) { variance = getVariance(); if (variance == 0) { variance = 0.0001; // to avoid divide by zero exception } //Drift epsilon double ddeltadash = getDriftEpsilon(tests); double x = Math.log(4.0 / ddeltadash); double ktemp = this.k; double previousStepEpsilon; double currentStepEpsilon; double squareRootValue = 0.0; boolean IsNotOptimized = true; while (IsNotOptimized) { squareRootValue = Math.sqrt(x * x + 18 * rightRepositorySize * x * variance); previousStepEpsilon = (1.0 / (3 * rightRepositorySize * (1 - ktemp))) * (x + squareRootValue); ktemp = 3 * ktemp / 4; currentStepEpsilon = (1.0 / (3 * rightRepositorySize * (1 - ktemp))) * (x + squareRootValue); if (((previousStepEpsilon - currentStepEpsilon) / previousStepEpsilon) < 0.0001) { IsNotOptimized = false; } } ktemp = 4 * ktemp / 3; ktemp = adjustForDataRate(ktemp); leftReservoirSize = (int) (rightRepositorySize * (1 - ktemp) / ktemp); leftReservoir.setMaxSize(leftReservoirSize); squareRootValue = Math.sqrt(x * x + 18 * rightRepositorySize * x * variance); currentStepEpsilon = (1.0 / (3 * rightRepositorySize * (1 - ktemp))) * (x + squareRootValue); epsilon = currentStepEpsilon; } } private double getDriftEpsilon(int _inumTests) { double dSeriesTotal = 2.0 * (1.0 - Math.pow(0.5, _inumTests)); double ddeltadash = significanceLevel / dSeriesTotal; return ddeltadash; } private double getMean() { return getTotal() / getWidth(); } private double getTotal() { return rightRepository.getTotal() + leftReservoir.getTotal(); } private double adjustForDataRate(double _dKr) { double meanIncrease = (rightRepository.getSampleMean() - leftReservoir.getSampleMean()); double dk = _dKr; if (meanIncrease > 0) { dk = dk + ((-1) * (meanIncrease * meanIncrease * meanIncrease * meanIncrease) + 1) * _dKr; } else if (meanIncrease <= 0) { dk = _dKr; } return dk; } private int getWidth() { return leftReservoir.getSize() + rightRepository.getSize(); } /** * Gets the prediction of next values. * @return Predicted value of next data value */ public double getEstimation() { int iWidth = getWidth(); if (iWidth != 0) { return getTotal() / getWidth(); } else { return 0; } } public void getDescription(StringBuilder sb, int indent) { } } public class Reservoir { private int size; private double total; private final int blocksize; private final Repository dataContainer; private int instanceCount; private int MAX_SIZE; public Reservoir(int _iSize, int _iBlockSize) { MAX_SIZE = _iSize; total = 0; blocksize = _iBlockSize; instanceCount = 0; dataContainer = new Repository(blocksize); } public double getSampleMean() { return total / size; } public void addElement(double _dValue) { try { if (size < MAX_SIZE) { dataContainer.add(new Double(_dValue)); total = total + _dValue; size++; } else { int irIndex = (int) (Math.random() * instanceCount); if (irIndex < MAX_SIZE) { total = total - dataContainer.get(irIndex); dataContainer.addAt(irIndex, _dValue); total = total + _dValue; } } instanceCount++; } catch (Exception e) { System.out.println("2 Exception" + e); } } public double get(int _iIndex) { return dataContainer.get(_iIndex); } public int getSize() { return size; } public void clear() { dataContainer.removeAll(); total = 0; size = 0; } public double getTotal() { return total; } public void copy(Reservoir _oSource) { for (int iIndex = 0; iIndex < _oSource.getSize(); iIndex++) { addElement(_oSource.get(iIndex)); } _oSource.clear(); } public void setMaxSize(int _iMaxSize) { MAX_SIZE = _iMaxSize; } } public class Repository { private final int blockSize; private final List blocks; private int indexOfLastBlock; private int instanceCount; private double total; public Repository(int _iBlockSize) { blockSize = _iBlockSize; indexOfLastBlock = -1; instanceCount = 0; total = 0; blocks = new ArrayList(); } public void add(double _dValue) { if ((instanceCount % blockSize) == 0) { blocks.add(new Block(blockSize)); indexOfLastBlock++; } blocks.get(indexOfLastBlock).add(_dValue); instanceCount++; total = total + _dValue; } public void add(double _dValue, boolean _isTested) { if((instanceCount % blockSize) == 0) { blocks.add(new Block(blockSize, _isTested)); indexOfLastBlock++; } blocks.get(indexOfLastBlock).add(_dValue); instanceCount++; total= total + _dValue; } public double get(int _iIndex) { return blocks.get(_iIndex / blockSize).data[(_iIndex % blockSize)]; } public void addAt(int _iIndex, double _dValue) { blocks.get(_iIndex / blockSize).addAt(_iIndex % blockSize, _dValue); } public int getSize() { return instanceCount; } public double getTotal() { double dTotal = 0.0; for (int iIndex = 0; iIndex < blocks.size(); iIndex++) { dTotal = dTotal + blocks.get(iIndex).total; } return dTotal; } public double getFirstBlockTotal() { return blocks.get(0).total; } public void markLastAddedBlock() { if(blocks.size() > 0) { blocks.get(blocks.size() - 1).setTested(true); } } public void removeFirstBlock() { total = total - blocks.get(0).total; blocks.remove(0); instanceCount = instanceCount - blockSize; indexOfLastBlock--; } public void removeAll() { blocks.clear(); indexOfLastBlock = -1; instanceCount = 0; total = 0; } public int getNumOfTests() { int iNumTests = 0; for(int iIndex = 0; iIndex < blocks.size(); iIndex++) { if(blocks.get(iIndex).IsTested()) iNumTests++; } return iNumTests; } } public class Block { public double[] data; public double total; private int indexOfLastValue; private boolean b_IsTested; Block(int _iLength) { data = new double[_iLength]; total = 0.0; indexOfLastValue = 0; for (int iIndex = 0; iIndex < data.length; iIndex++) { data[iIndex] = -1; } } Block(int _iLength, boolean _isTested) { data = new double[_iLength]; total = 0.0; indexOfLastValue = 0; b_IsTested = _isTested; for(int iIndex=0;iIndex < data.length;iIndex++) { data[iIndex] = -1; } } public void add(double _dValue) { if (indexOfLastValue < data.length) { data[indexOfLastValue] = _dValue; total = total + _dValue; indexOfLastValue++; } else { System.out.println("ERROR in adding to Block. Last Index :" + indexOfLastValue + " Total :" + total + " Array Length :" + data.length); System.exit(2); } } public void addAt(int _iIndex, double _dNewValue) { total = total - data[_iIndex] + _dNewValue; data[_iIndex] = _dNewValue; } public void setTested(boolean _isTested) { b_IsTested = _isTested; } public boolean IsTested() { return b_IsTested; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy