moa.clusterers.outliers.AnyOut.AnyOutCore Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of moa Show documentation
Show all versions of moa Show documentation
Massive On-line Analysis is an environment for massive data mining. MOA
provides a framework for data stream mining and includes tools for evaluation
and a collection of machine learning algorithms. Related to the WEKA project,
also written in Java, while scaling to more demanding problems.
/*
* AnyOutCore.java
*
* @author I. Assent, P. Kranen, C. Baldauf, T. Seidl
* @author G. Piskas, A. Gounaris
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*
*/
package moa.clusterers.outliers.AnyOut;
import com.github.javacliparser.FlagOption;
import com.github.javacliparser.FloatOption;
import com.github.javacliparser.IntOption;
import com.yahoo.labs.samoa.instances.DenseInstance;
import java.util.ArrayList;
import java.util.HashMap;
import moa.clusterers.clustree.ClusKernel;
import moa.clusterers.clustree.ClusTree;
import moa.clusterers.clustree.Entry;
import moa.clusterers.clustree.Node;
import moa.clusterers.outliers.AnyOut.util.DataObject;
import moa.clusterers.outliers.AnyOut.util.DataSet;
@SuppressWarnings("serial")
public class AnyOutCore extends ClusTree {
///////////////////////////////////////////////
// the variables all became HashMaps to easily map the values to the objectIds,
// i.e. they exist once per object that is currently examined
// the lists have then to be used by the methods from MultipleDetector (see below)
// for the static manager, use ID=0 as default for the current object!
private HashMap aggregatedOScoreResult, lastOScoreResult, lastConfidenceResult;
private HashMap objectAsKernel;
private HashMap> previousOScoreResultList;
private HashMap descendToNode;
private HashMap currentLevel;
///////////////////////////////////////////////
// Outlier score threshold.
private double threshold;
// Entry weight threshold.
private double weightThreshold = 0.05;
private int oScoreK;
private int confK;
public IntOption trainingSetSizeOption = new IntOption("TrainingSetSize", 't', "Training Set Size.", 1000, 0, 10000);
//public FlagOption UseBulkLoadingOption = new FlagOption("UseBulkLoading", 'b', "Use Bulkloading or traditional learning.");
public IntOption oScoreKOption = new IntOption("OScorek", 'o', "Size of Oscore aggregate.", 2, 1, 10);
public IntOption confKOption = new IntOption("Confidencek", 'c', "Size of confidence aggregate.", 2, 1, 10);
public IntOption confidenceChoiceOption = new IntOption("confidence", 'd', "Confidence Measure.", 4, 1, 6);
public FlagOption UseMeanScoreOption = new FlagOption("UseMeanScore", 'm', "Use Mean score or Density score.");
public FloatOption threshholdOption = new FloatOption("Threshold", 'z', "Threshold", 0.07, 0, 1);
public AnyOutCore() {
lastOScoreResult = new HashMap();
lastConfidenceResult = new HashMap();
objectAsKernel = new HashMap();
aggregatedOScoreResult = new HashMap();
previousOScoreResultList = new HashMap>();
descendToNode = new HashMap();
currentLevel = new HashMap();
}
public void resetLearning() {
if (UseMeanScoreOption.isSet()) {
threshold = threshholdOption.getValue();
} else {
threshold = 0.0;
}
oScoreK = oScoreKOption.getValue();
confK = confKOption.getValue();
super.resetLearningImpl();
}
public void train(DataSet trainingSet) {
// TODO fix not working builder!
// ClusTree private variables are not updated but are mandatory for the algorithm to function.
// if (UseBulkLoadingOption.isSet()) {
// // Use BulkLoading
// EMTopDownTreeBuilder builder = new EMTopDownTreeBuilder();
// try {
// this.root = builder.buildTree(trainingSet);
// } catch (Exception e) {
// e.printStackTrace();
// }
// } else {
//Use traditional initialization
for (DataObject o : trainingSet.getDataObjectArray()){
DenseInstance inst = new DenseInstance(o.getFeatures().length);
for(int i=0; i());
currentLevel.put(objectId, 0);
// process root of the tree and set score according to the closest entry
ClusKernel newKernel = new ClusKernel(features, features.length);
objectAsKernel.put(objectId, newKernel);
Entry closestEntry = root.nearestEntry(newKernel);
if (UseMeanScoreOption.isSet())
lastOScoreResult.put(objectId, newKernel.calcDistance(closestEntry.data));
else
lastOScoreResult.put(objectId,getDensityOutlierScore(newKernel,closestEntry.data));
aggregatedOScoreResult.put(objectId, lastOScoreResult.get(objectId));
// remember (store) next Node to descend into for further processing
descendToNode.put(objectId, closestEntry.getChild());
//update confidence
updateConfidence(objectId);
}
public void learnObject(double[] features){
DenseInstance inst = new DenseInstance(features.length);
for(int i=0; i threshold;
}
public double getOutlierScore(int id) {
return aggregatedOScoreResult.get(id)/lastConfidenceResult.get(id);
}
public double getConfidence(int id) {
return lastConfidenceResult.get(id);
}
}