All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.lmu.ifi.dbs.elki.math.linearalgebra.pca.RANSACCovarianceMatrixBuilder Maven / Gradle / Ivy

Go to download

ELKI - Main Module – Open-Source Data-Mining Framework with Index Acceleration

There is a newer version: 0.7.5
Show newest version
package de.lmu.ifi.dbs.elki.math.linearalgebra.pca;

/*
 This file is part of ELKI:
 Environment for Developing KDD-Applications Supported by Index-Structures

 Copyright (C) 2015
 Ludwig-Maximilians-Universität München
 Lehr- und Forschungseinheit für Datenbanksysteme
 ELKI Development Team

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.

 You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see .
 */

import java.util.Random;

import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;

/**
 * RANSAC based approach to a more robust covariance matrix computation.
 * 
 * This is an experimental adoption of RANSAC to this problem, not a
 * generic RANSAC implementation!
 * 
 * While using RANSAC for PCA at first sounds like a good idea, it does not
 * work very well in high-dimensional spaces. The problem is that PCA has
 * O(n^2) degrees of freedom, so we need to sample very many objects, then
 * perform an O(n^3) matrix operation to compute PCA, for each attempt.
 * 
 * References:
 * 
 * RANSAC for PCA was a side note in:
 * 

* Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek
* Outlier Detection in Arbitrarily Oriented Subspaces
* in: Proc. IEEE International Conference on Data Mining (ICDM 2012) *

* * The basic RANSAC idea was explained in: *

* Random sample consensus: a paradigm for model fitting with applications to * image analysis and automated cartography
* M.A. Fischler, R.C. Bolles
* Communications of the ACM, Vol. 24 Issue 6 *

* * @author Erich Schubert * @since 0.5.5 */ @Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek", // title = "Outlier Detection in Arbitrarily Oriented Subspaces", // booktitle = "Proc. IEEE International Conference on Data Mining (ICDM 2012)") public class RANSACCovarianceMatrixBuilder extends AbstractCovarianceMatrixBuilder { /** * Number of iterations to perform */ int iterations = 1000; /** * Random generator */ RandomFactory rnd; /** * Constructor. * * @param iterations Number of iterations (attempts) to try * @param rnd random generator */ public RANSACCovarianceMatrixBuilder(int iterations, RandomFactory rnd) { super(); this.iterations = iterations; this.rnd = rnd; } @Reference(title = "Random sample consensus: a paradigm for model fitting with applications to image analysis and automated cartography", authors = "M.A. Fischler, R.C. Bolles", booktitle = "Communications of the ACM, Vol. 24 Issue 6", url = "http://dx.doi.org/10.1145/358669.358692") @Override public Matrix processIds(DBIDs ids, Relation relation) { final int dim = RelationUtil.dimensionality(relation); DBIDs best = DBIDUtil.EMPTYDBIDS; double tresh = ChiSquaredDistribution.quantile(0.85, dim); Random random = rnd.getSingleThreadedRandom(); for(int i = 0; i < iterations; i++) { DBIDs sample = DBIDUtil.randomSample(ids, dim + 1, random); CovarianceMatrix cv = CovarianceMatrix.make(relation, sample); Vector centroid = cv.getMeanVector(); Matrix p = cv.destroyToSampleMatrix().inverse(); ModifiableDBIDs support = DBIDUtil.newHashSet(); for(DBIDIter id = ids.iter(); id.valid(); id.advance()) { Vector vec = relation.get(id).getColumnVector().minusEquals(centroid); double sqlen = vec.transposeTimesTimes(p, vec); if(sqlen < tresh) { support.add(id); } } if(support.size() > best.size()) { best = support; } if(support.size() >= ids.size()) { break; // Can't get better than this! } } // logger.warning("Consensus size: "+best.size()+" of "+ids.size()); // Fall back to regular PCA if(best.size() <= dim) { return CovarianceMatrix.make(relation, ids).destroyToSampleMatrix(); } // Return estimation based on consensus set. return CovarianceMatrix.make(relation, best).destroyToSampleMatrix(); } /** * Parameterization class * * @author Erich Schubert * * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { /** * Number of iterations. */ public static final OptionID ITER_ID = new OptionID("ransacpca.iterations", "The number of iterations to perform."); /** * Random seed */ public static final OptionID SEED_ID = new OptionID("ransacpca.seed", "Random seed (optional)."); /** * Number of iterations to perform */ int iterations = 1000; /** * Random generator */ RandomFactory rnd; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); IntParameter iterP = new IntParameter(ITER_ID, 1000); iterP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); if(config.grab(iterP)) { iterations = iterP.intValue(); } RandomParameter rndP = new RandomParameter(SEED_ID); if(config.grab(rndP)) { rnd = rndP.getValue(); } } @Override protected RANSACCovarianceMatrixBuilder makeInstance() { return new RANSACCovarianceMatrixBuilder(iterations, rnd); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy