All Downloads are FREE. Search and download functionalities are using the official Maven repository.

moa.clusterers.outliers.AnyOut.AnyOutCore Maven / Gradle / Ivy

Go to download

Massive On-line Analysis is an environment for massive data mining. MOA provides a framework for data stream mining and includes tools for evaluation and a collection of machine learning algorithms. Related to the WEKA project, also written in Java, while scaling to more demanding problems.

There is a newer version: 2024.07.0
Show newest version
/*
 *    AnyOutCore.java
 *
 *    @author I. Assent, P. Kranen, C. Baldauf, T. Seidl
 *    @author G. Piskas, A. Gounaris
 * 
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 *    
 *    
 */

package moa.clusterers.outliers.AnyOut;

import com.github.javacliparser.FlagOption;
import com.github.javacliparser.FloatOption;
import com.github.javacliparser.IntOption;
import com.yahoo.labs.samoa.instances.DenseInstance;
import java.util.ArrayList;
import java.util.HashMap;
import moa.clusterers.clustree.ClusKernel;
import moa.clusterers.clustree.ClusTree;
import moa.clusterers.clustree.Entry;
import moa.clusterers.clustree.Node;
import moa.clusterers.outliers.AnyOut.util.DataObject;
import moa.clusterers.outliers.AnyOut.util.DataSet;


@SuppressWarnings("serial")
public class AnyOutCore extends ClusTree {
	
	///////////////////////////////////////////////
	// the variables all became HashMaps to easily map the values to the objectIds,
	// i.e. they exist once per object that is currently examined
	// the lists have then to be used by the methods from MultipleDetector (see below)
	// for the static manager, use ID=0 as default for the current object!
	private HashMap aggregatedOScoreResult, lastOScoreResult, lastConfidenceResult;
	private HashMap objectAsKernel;
	private HashMap> previousOScoreResultList;
	private HashMap descendToNode;
	private HashMap currentLevel;
	///////////////////////////////////////////////

	// Outlier score threshold.
	private double threshold;
	
	// Entry weight threshold.
    private double weightThreshold = 0.05;
	private int oScoreK;
	private int confK;

	public IntOption trainingSetSizeOption = new IntOption("TrainingSetSize", 't', "Training Set Size.", 1000, 0, 10000);
	//public FlagOption UseBulkLoadingOption = new FlagOption("UseBulkLoading", 'b', "Use Bulkloading or traditional learning.");
	public IntOption oScoreKOption = new IntOption("OScorek", 'o', "Size of Oscore aggregate.", 2, 1, 10);
	public IntOption confKOption = new IntOption("Confidencek", 'c', "Size of confidence aggregate.", 2, 1, 10);
	public IntOption confidenceChoiceOption = new IntOption("confidence", 'd', "Confidence Measure.", 4, 1, 6);
	public FlagOption UseMeanScoreOption = new FlagOption("UseMeanScore", 'm', "Use Mean score or Density score.");
	public FloatOption threshholdOption = new FloatOption("Threshold", 'z', "Threshold", 0.07, 0, 1);
	
	public AnyOutCore() {
		lastOScoreResult = new HashMap();
		lastConfidenceResult = new HashMap();
		objectAsKernel = new HashMap();
		aggregatedOScoreResult = new HashMap();
		previousOScoreResultList = new HashMap>();
		descendToNode = new HashMap();
		currentLevel = new HashMap();
	}
	
	public void resetLearning() {
		if (UseMeanScoreOption.isSet()) {
			threshold = threshholdOption.getValue();
		} else {
			threshold = 0.0;
		}
		oScoreK = oScoreKOption.getValue();
		confK = confKOption.getValue();
		super.resetLearningImpl();
	}
	
	public void train(DataSet trainingSet) {
		// TODO fix not working builder!
		// ClusTree private variables are not updated but are mandatory for the algorithm to function.
//		if (UseBulkLoadingOption.isSet()) { 
//			// Use BulkLoading
//			EMTopDownTreeBuilder builder = new EMTopDownTreeBuilder();
//			try {
//				this.root = builder.buildTree(trainingSet);
//			} catch (Exception e) {
//				e.printStackTrace();
//			}
//		} else {
			//Use traditional initialization
			for (DataObject o : trainingSet.getDataObjectArray()){
				DenseInstance inst = new DenseInstance(o.getFeatures().length);
				for(int i=0; i());
		currentLevel.put(objectId, 0);
		// process root of the tree and set score according to the closest entry
		ClusKernel newKernel = new ClusKernel(features, features.length);
		objectAsKernel.put(objectId, newKernel);
		
		Entry closestEntry = root.nearestEntry(newKernel);
		if (UseMeanScoreOption.isSet()) 
			lastOScoreResult.put(objectId, newKernel.calcDistance(closestEntry.data));
		else
			lastOScoreResult.put(objectId,getDensityOutlierScore(newKernel,closestEntry.data));
		
		aggregatedOScoreResult.put(objectId, lastOScoreResult.get(objectId));
		// remember (store) next Node to descend into for further processing 
		descendToNode.put(objectId, closestEntry.getChild());
    	//update confidence
		updateConfidence(objectId);
	}
	
	public void learnObject(double[] features){
		DenseInstance inst = new DenseInstance(features.length);
		for(int i=0; i threshold;
	}

	public double getOutlierScore(int id) {
		return aggregatedOScoreResult.get(id)/lastConfidenceResult.get(id);
	}

	public double getConfidence(int id) {
		return lastConfidenceResult.get(id);
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy