![JAR search and dependency download from the Maven repository](/logo.png)
weka.classifiers.misc.LOF Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of localOutlierFactor Show documentation
Show all versions of localOutlierFactor Show documentation
A filter that applies the LOF (Local Outlier Factor) algorithm to compute an outlier score for each instance in the data. Can use multiple cores/cpus to speed up the LOF computation for large datasets. Nearest neighbor search methods and distance functions are pluggable.
For more information, see:
Markus M. Breunig, Hans-Peter Kriegel, Raymond T. Ng, Jorg Sander (2000). LOF: Identifying Density-Based Local Outliers. ACM SIGMOD Record. 29(2):93-104.
The newest version!
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* LOF.java
* Copyright (C) 1999-2013 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.misc;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.Vector;
import weka.classifiers.AbstractClassifier;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.CapabilitiesHandler;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.neighboursearch.LinearNNSearch;
import weka.core.neighboursearch.NearestNeighbourSearch;
import weka.filters.Filter;
/**
* A Classifier that applies the LOF (Local Outlier Factor) algorithm to compute an "outlier" score for each instance in the data. The data is expected to have a unary or binary class attribute, which is ignored at training time. The distributionForInstance() method returns the outlier score in the first element of the distribution. If the class attribute is binary, then the second element holds one minus this score. To evaluate performance of this method for a dataset where outliers/anomalies are known, simply code the outliers using the class attribute: normal cases should correspond to the second value of the class attribute; outliers to the first one.
*
* Can use multiple cores/cpus to speed up the LOF computation for large datasets. Nearest neighbor search methods and distance functions are pluggable.
*
* For more information, see:
*
* Markus M. Breunig, Hans-Peter Kriegel, Raymond T. Ng, Jorg Sander (2000). LOF: Identifying Density-Based Local Outliers. ACM SIGMOD Record. 29(2):93-104.
*
*
* BibTeX:
*
* @article{Breunig2000,
* author = {Markus M. Breunig and Hans-Peter Kriegel and Raymond T. Ng and Jorg Sander},
* journal = {ACM SIGMOD Record},
* number = {2},
* pages = {93-104},
* publisher = {ACM New York},
* title = {LOF: Identifying Density-Based Local Outliers},
* volume = {29},
* year = {2000}
* }
*
*
*
* Valid options are:
*
* -min <num>
* Lower bound on the k nearest neighbors for finding max LOF (minPtsLB)
* (default = 10)
*
* -max <num>
* Upper bound on the k nearest neighbors for finding max LOF (minPtsUB)
* (default = 40)
*
* -A
* The nearest neighbour search algorithm to use (default: weka.core.neighboursearch.LinearNNSearch).
*
*
* -num-slots <num>
* Number of execution slots.
* (default 1 - i.e. no parallelism)
*
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
* @version $Revision: $
*
*/
public class LOF extends AbstractClassifier implements Serializable,
CapabilitiesHandler, OptionHandler, TechnicalInformationHandler,
RevisionHandler {
/**
* For serialization
*/
private static final long serialVersionUID = -2736613569494944202L;
protected weka.filters.unsupervised.attribute.LOF m_lof;
/** The lower bound on the minimum number of points (k) */
protected String m_minPtsLB = "10";
/** The upper bound on the minimum number of points (k) */
protected String m_minPtsUB = "40";
/** The nearest neighbor search to use */
protected NearestNeighbourSearch m_nnTemplate = new LinearNNSearch();
protected String m_numSlots = "1";
protected double m_minScore;
protected double m_maxScore;
/**
* Returns a string describing this scheme
*
* @return a description of the scheme suitable for displaying in the
* explorer/experimenter gui
*/
public String globalInfo() {
return "A Classifier that applies the LOF (Local Outlier Factor) algorithm "
+ "to compute an \"outlier\" score for each instance in the data. "
+ "The data is expected to have a unary or binary class attribute, which "
+ "is ignored at training time. The distributionForInstance() method returns "
+ "the outlier score in the first element of the distribution. If the class "
+ "attribute is binary, then the second element holds one minus this score. "
+ "To evaluate performance of this method for a dataset where outliers/anomalies "
+ "are known, simply code the outliers using the class attribute: normal cases "
+ "should correspond to the second value of the class attribute; outliers to the "
+ "first one.\n\nCan use "
+ "multiple cores/cpus to speed up the LOF computation for large datasets. "
+ "Nearest neighbor search methods and distance functions are pluggable."
+ "\n\nFor more information, see:\n\n"
+ getTechnicalInformation().toString();
}
/**
* Returns an instance of a TechnicalInformation object, containing detailed
* information about the technical background of this class, e.g., paper
* reference or book this class is based on.
*
* @return the technical information about this class
*/
@Override
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.ARTICLE);
result.setValue(Field.AUTHOR, "Markus M. Breunig and Hans-Peter "
+ "Kriegel and Raymond T. Ng and Jorg Sander");
result.setValue(Field.TITLE,
"LOF: Identifying Density-Based Local Outliers");
result.setValue(Field.JOURNAL, "ACM SIGMOD Record");
result.setValue(Field.YEAR, "2000");
result.setValue(Field.VOLUME, "29");
result.setValue(Field.NUMBER, "2");
result.setValue(Field.PAGES, "93-104");
result.setValue(Field.PUBLISHER, "ACM New York");
return result;
}
/**
* Returns the Capabilities of this filter.
*
* @return the capabilities of this object
* @see Capabilities
*/
@Override
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.disableAll();
// attributes
result.enable(Capability.NOMINAL_ATTRIBUTES);
result.enable(Capability.NUMERIC_ATTRIBUTES);
result.enable(Capability.MISSING_VALUES);
// class
result.enable(Capability.UNARY_CLASS);
result.enable(Capability.BINARY_CLASS);
// instances
result.setMinimumNumberInstances(1);
return result;
}
/**
* Gets an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
@Override
public Enumeration listOptions() {
// TODO Auto-generated method stub
Vector
© 2015 - 2025 Weber Informatics LLC | Privacy Policy