All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.misc.LOF Maven / Gradle / Ivy

Go to download

A filter that applies the LOF (Local Outlier Factor) algorithm to compute an outlier score for each instance in the data. Can use multiple cores/cpus to speed up the LOF computation for large datasets. Nearest neighbor search methods and distance functions are pluggable. For more information, see: Markus M. Breunig, Hans-Peter Kriegel, Raymond T. Ng, Jorg Sander (2000). LOF: Identifying Density-Based Local Outliers. ACM SIGMOD Record. 29(2):93-104.

The newest version!
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    LOF.java
 *    Copyright (C) 1999-2013 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.misc;

import java.io.Serializable;
import java.util.Enumeration;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.CapabilitiesHandler;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.neighboursearch.LinearNNSearch;
import weka.core.neighboursearch.NearestNeighbourSearch;
import weka.filters.Filter;

/**
 
 * A Classifier that applies the LOF (Local Outlier Factor) algorithm to compute an "outlier" score for each instance in the data. The data is expected to have a unary or binary class attribute, which is ignored at training time. The distributionForInstance() method returns the outlier score in the first element of the distribution. If the class attribute is binary, then the second element holds one minus this score. To evaluate performance of this method for a dataset where outliers/anomalies are known, simply code the outliers using the class attribute: normal cases should correspond to the second value of the class attribute; outliers to the first one.
*
* Can use multiple cores/cpus to speed up the LOF computation for large datasets. Nearest neighbor search methods and distance functions are pluggable.
*
* For more information, see:
*
* Markus M. Breunig, Hans-Peter Kriegel, Raymond T. Ng, Jorg Sander (2000). LOF: Identifying Density-Based Local Outliers. ACM SIGMOD Record. 29(2):93-104. *

* * BibTeX: *

 * @article{Breunig2000,
 *    author = {Markus M. Breunig and Hans-Peter Kriegel and Raymond T. Ng and Jorg Sander},
 *    journal = {ACM SIGMOD Record},
 *    number = {2},
 *    pages = {93-104},
 *    publisher = {ACM New York},
 *    title = {LOF: Identifying Density-Based Local Outliers},
 *    volume = {29},
 *    year = {2000}
 * }
 * 
*

* * Valid options are:

* *

 -min <num>
 *  Lower bound on the k nearest neighbors for finding max LOF (minPtsLB)
 *  (default = 10)
* *
 -max <num>
 *  Upper bound on the k nearest neighbors for finding max LOF (minPtsUB)
 *  (default = 40)
* *
 -A
 *  The nearest neighbour search algorithm to use (default: weka.core.neighboursearch.LinearNNSearch).
 * 
* *
 -num-slots <num>
 *  Number of execution slots.
 *  (default 1 - i.e. no parallelism)
* * * @author Mark Hall (mhall{[at]}pentaho{[dot]}com) * @version $Revision: $ * */ public class LOF extends AbstractClassifier implements Serializable, CapabilitiesHandler, OptionHandler, TechnicalInformationHandler, RevisionHandler { /** * For serialization */ private static final long serialVersionUID = -2736613569494944202L; protected weka.filters.unsupervised.attribute.LOF m_lof; /** The lower bound on the minimum number of points (k) */ protected String m_minPtsLB = "10"; /** The upper bound on the minimum number of points (k) */ protected String m_minPtsUB = "40"; /** The nearest neighbor search to use */ protected NearestNeighbourSearch m_nnTemplate = new LinearNNSearch(); protected String m_numSlots = "1"; protected double m_minScore; protected double m_maxScore; /** * Returns a string describing this scheme * * @return a description of the scheme suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "A Classifier that applies the LOF (Local Outlier Factor) algorithm " + "to compute an \"outlier\" score for each instance in the data. " + "The data is expected to have a unary or binary class attribute, which " + "is ignored at training time. The distributionForInstance() method returns " + "the outlier score in the first element of the distribution. If the class " + "attribute is binary, then the second element holds one minus this score. " + "To evaluate performance of this method for a dataset where outliers/anomalies " + "are known, simply code the outliers using the class attribute: normal cases " + "should correspond to the second value of the class attribute; outliers to the " + "first one.\n\nCan use " + "multiple cores/cpus to speed up the LOF computation for large datasets. " + "Nearest neighbor search methods and distance functions are pluggable." + "\n\nFor more information, see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ @Override public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.ARTICLE); result.setValue(Field.AUTHOR, "Markus M. Breunig and Hans-Peter " + "Kriegel and Raymond T. Ng and Jorg Sander"); result.setValue(Field.TITLE, "LOF: Identifying Density-Based Local Outliers"); result.setValue(Field.JOURNAL, "ACM SIGMOD Record"); result.setValue(Field.YEAR, "2000"); result.setValue(Field.VOLUME, "29"); result.setValue(Field.NUMBER, "2"); result.setValue(Field.PAGES, "93-104"); result.setValue(Field.PUBLISHER, "ACM New York"); return result; } /** * Returns the Capabilities of this filter. * * @return the capabilities of this object * @see Capabilities */ @Override public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); result.disableAll(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.UNARY_CLASS); result.enable(Capability.BINARY_CLASS); // instances result.setMinimumNumberInstances(1); return result; } /** * Gets an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration listOptions() { // TODO Auto-generated method stub Vector




© 2015 - 2025 Weber Informatics LLC | Privacy Policy