org.nd4j.linalg.dimensionalityreduction.PCA Maven / Gradle / Ivy
/*
*
* * Copyright 2015 Skymind,Inc.
* *
* * Licensed under the Apache License, Version 2.0 (the "License");
* * you may not use this file except in compliance with the License.
* * You may obtain a copy of the License at
* *
* * http://www.apache.org/licenses/LICENSE-2.0
* *
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS,
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* * See the License for the specific language governing permissions and
* * limitations under the License.
*
*
*/
package org.nd4j.linalg.dimensionalityreduction;
import org.nd4j.linalg.api.complex.IComplexNDArray;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.eigen.Eigen;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
/**
* PCA class for dimensionality reduction
*
* @author Adam Gibson
*/
public class PCA {
private PCA() {
}
/**
* Calculates pca vectors of a matrix, for a fixed number of reduced features
* returns the reduced feature set
* The return is a projection of A onto principal nDims components
*
* To use the PCA: assume A is the original feature set
* then project A onto a reduced set of features. It is possible to
* reconstruct the original data ( losing information, but having the same
* dimensionality )
*
*
* {@code
*
* INDArray Areduced = A.mmul( factor ) ;
* INDArray Aoriginal = Areduced.mmul( factor.transpose() ) ;
*
* }
*
*
* @param A the array of features, rows are results, columns are features - will be changed
* @param nDims the number of components on which to project the features
* @param normalize whether to normalize (adjust each feature to have zero mean)
* @return the reduced parameters of A
*/
public static INDArray pca(INDArray A, int nDims, boolean normalize) {
INDArray factor = pca_factor( A, nDims, normalize ) ;
return A.mmul( factor ) ;
}
/**
* Calculates pca factors of a matrix, for a fixed number of reduced features
* returns the factors to scale observations
*
* The return is a factor matrix to reduce (normalized) feature sets
*
* @see pca(INDArray, int, boolean)
*
* @param A the array of features, rows are results, columns are features - will be changed
* @param nDims the number of components on which to project the features
* @param normalize whether to normalize (adjust each feature to have zero mean)
* @return the reduced feature set
*/
public static INDArray pca_factor(INDArray A, int nDims, boolean normalize) {
if( normalize ) {
// Normalize to mean 0 for each feature ( each column has 0 mean )
INDArray mean = A.mean(0) ;
A.subiRowVector( mean ) ;
}
int m = A.rows() ;
int n = A.columns() ;
// The prepare SVD results, we'll decomp A to UxSxV'
INDArray s = Nd4j.create( m= totalEigSum ) { // OK I know it's a float, but what else can we do ?
k = i+1 ; // we will keep this many features to preserve the reqd. variance
break ;
}
}
if( k == -1 ) { // if we need everything
throw new RuntimeException( "No reduction possible for reqd. variance - use smaller variance" ) ;
}
// So now let's rip out the appropriate number of left singular vectors from
// the V output (note we pulls rows since VT is a transpose of V)
INDArray V = VT.transpose() ;
INDArray factor = Nd4j.create( n, k, 'f' ) ;
for( int i=0 ; i