cern.colt.matrix.tfloat.algo.FloatStatistic Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of parallelcolt Show documentation
Parallel Colt is a multithreaded version of Colt - a library for high performance scientific computing in Java. It contains efficient algorithms for data analysis, linear algebra, multi-dimensional arrays, Fourier transforms, statistics and histogramming.
The newest version!
/*
Copyright (C) 1999 CERN - European Organization for Nuclear Research.
Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
is hereby granted without fee, provided that the above copyright notice appear in all copies and 
that both that copyright notice and this permission notice appear in supporting documentation. 
CERN makes no representations about the suitability of this software for any purpose. 
It is provided "as is" without expressed or implied warranty.
 */
package cern.colt.matrix.tfloat.algo;

import hep.aida.tfloat.bin.DynamicFloatBin1D;

import java.util.concurrent.Future;

import cern.colt.function.tfloat.FloatFloatFunction;
import cern.colt.matrix.tfloat.FloatFactory1D;
import cern.colt.matrix.tfloat.FloatFactory2D;
import cern.colt.matrix.tfloat.FloatMatrix1D;
import cern.colt.matrix.tfloat.FloatMatrix2D;
import cern.colt.matrix.tfloat.FloatMatrix3D;
import cern.jet.math.tfloat.FloatFunctions;
import cern.jet.random.tfloat.engine.FloatRandomEngine;
import edu.emory.mathcs.utils.ConcurrencyUtils;

/**
 * Basic statistics operations on matrices. Computation of covariance,
 * correlation, distance matrix. Random sampling views. Conversion to histograms
 * with and without OLAP cube operators. Conversion to bins with retrieval of
 * statistical bin measures. Also see {@link cern.jet.stat} and
 * {@link hep.aida.tfloat.bin}, in particular
 * {@link hep.aida.tfloat.bin.DynamicFloatBin1D}.
 * 
 * Examples:
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * A covariance(A) correlation(covariance(A)) distance(A,EUCLID)
 4 x 3 matrix

 1  2   3

 2  4   6

 3  6   9

 4 -8 -10  3 x 3 matrix

  1.25 -3.5 -4.5

 -3.5  29   39  

 -4.5  39   52.5  3 x 3 matrix

  1        -0.581318 -0.555492

 -0.581318  1         0.999507

 -0.555492  0.999507  1        
  3 x 3 matrix

  0        12.569805 15.874508

 12.569805  0         4.242641

 15.874508  4.242641  0        
        
 * 
 * @author [email protected]
 * @version 1.0, 09/24/99
 */
public class FloatStatistic extends Object {
    private static final cern.jet.math.tfloat.FloatFunctions F = cern.jet.math.tfloat.FloatFunctions.functions;

    /**
     * Euclidean distance function; Sqrt(Sum( (x[i]-y[i])^2 )).
     */
    public static final VectorVectorFunction EUCLID = new VectorVectorFunction() {
        public final float apply(FloatMatrix1D a, FloatMatrix1D b) {
            return (float) Math.sqrt(a.aggregate(b, FloatFunctions.plus, FloatFunctions.chain(FloatFunctions.square,
                    FloatFunctions.minus)));
        }
    };

    /**
     * Bray-Curtis distance function;
     * Sum( abs(x[i]-y[i]) )  /  Sum( x[i]+y[i] ).
     */
    public static final VectorVectorFunction BRAY_CURTIS = new VectorVectorFunction() {
        public final float apply(FloatMatrix1D a, FloatMatrix1D b) {
            return a.aggregate(b, FloatFunctions.plus, FloatFunctions.chain(FloatFunctions.abs, FloatFunctions.minus))
                    / a.aggregate(b, FloatFunctions.plus, FloatFunctions.plus);
        }
    };

    /**
     * Canberra distance function;
     * Sum( abs(x[i]-y[i]) / abs(x[i]+y[i]) ).
     */
    public static final VectorVectorFunction CANBERRA = new VectorVectorFunction() {
        FloatFloatFunction fun = new FloatFloatFunction() {
            public final float apply(float a, float b) {
                return Math.abs(a - b) / Math.abs(a + b);
            }
        };

        public final float apply(FloatMatrix1D a, FloatMatrix1D b) {
            return a.aggregate(b, FloatFunctions.plus, fun);
        }
    };

    /**
     * Maximum distance function; Max( abs(x[i]-y[i]) ).
     */
    public static final VectorVectorFunction MAXIMUM = new VectorVectorFunction() {
        public final float apply(FloatMatrix1D a, FloatMatrix1D b) {
            return a.aggregate(b, FloatFunctions.max, FloatFunctions.chain(FloatFunctions.abs, FloatFunctions.minus));
        }
    };

    /**
     * Manhattan distance function; Sum( abs(x[i]-y[i]) ).
     */
    public static final VectorVectorFunction MANHATTAN = new VectorVectorFunction() {
        public final float apply(FloatMatrix1D a, FloatMatrix1D b) {
            return a.aggregate(b, FloatFunctions.plus, FloatFunctions.chain(FloatFunctions.abs, FloatFunctions.minus));
        }
    };

    /**
     * Interface that represents a function object: a function that takes two
     * argument vectors and returns a single value.
     */
    public interface VectorVectorFunction {
        /**
         * Applies a function to two argument vectors.
         * 
         * @param x
         *            the first argument vector passed to the function.
         * @param y
         *            the second argument vector passed to the function.
         * @return the result of the function.
         */
        abstract public float apply(cern.colt.matrix.tfloat.FloatMatrix1D x, cern.colt.matrix.tfloat.FloatMatrix1D y);
    }

    /**
     * Makes this class non instantiable, but still let's others inherit from
     * it.
     */
    protected FloatStatistic() {
    }

    /**
     * Applies the given aggregation functions to each column and stores the
     * results in a the result matrix. If matrix has shape m x n, then
     * result must have shape aggr.length x n. Tip: To do aggregations
     * on rows use dice views (transpositions), as in
     * aggregate(matrix.viewDice(),aggr,result.viewDice()).
     * 
     * @param matrix
     *            any matrix; a column holds the values of a given variable.
     * @param aggr
     *            the aggregation functions to be applied to each column.
     * @param result
     *            the matrix to hold the aggregation results.
     * @return result (for convenience only).
     * @see FloatFormatter
     * @see hep.aida.tfloat.bin.FloatBinFunction1D
     * @see hep.aida.tfloat.bin.FloatBinFunctions1D
     */
    public static FloatMatrix2D aggregate(FloatMatrix2D matrix, hep.aida.tfloat.bin.FloatBinFunction1D[] aggr,
            FloatMatrix2D result) {
        DynamicFloatBin1D bin = new DynamicFloatBin1D();
        float[] elements = new float[matrix.rows()];
        cern.colt.list.tfloat.FloatArrayList values = new cern.colt.list.tfloat.FloatArrayList(elements);
        for (int column = matrix.columns(); --column >= 0;) {
            matrix.viewColumn(column).toArray(elements); // copy column into
            // values
            bin.clear();
            bin.addAllOf(values);
            for (int i = aggr.length; --i >= 0;) {
                result.set(i, column, aggr[i].apply(bin));
            }
        }
        return result;
    }

    /**
     * Fills all cell values of the given vector into a bin from which
     * statistics measures can be retrieved efficiently. Cells values are
     * copied. 

     * Tip: Use System.out.println(bin(vector)) to print most measures
     * computed by the bin. Example:
     * 
     * 
     * 
     * 
     *      * 	 Size: 20000
     * 	 Sum: 299858.02350278624
     * 	 SumOfSquares: 5399184.154095971
     * 	 Min: 0.8639113139711261
     * 	 Max: 59.75331890541892
     * 	 Mean: 14.992901175139313
     * 	 RMS: 16.43043540825375
     * 	 Variance: 45.17438077634358
     * 	 Standard deviation: 6.721188940681818
     * 	 Standard error: 0.04752598277592142
     * 	 Geometric mean: 13.516615397064466
     * 	 Product: Infinity
     * 	 Harmonic mean: 11.995174297952191
     * 	 Sum of inversions: 1667.337172700724
     * 	 Skew: 0.8922838940067878
     * 	 Kurtosis: 1.1915828121825598
     * 	 Sum of powers(3): 1.1345828465808412E8
     * 	 Sum of powers(4): 2.7251055344494686E9
     * 	 Sum of powers(5): 7.367125643433887E10
     * 	 Sum of powers(6): 2.215370909100143E12
     * 	 Moment(0,0): 1.0
     * 	 Moment(1,0): 14.992901175139313
     * 	 Moment(2,0): 269.95920770479853
     * 	 Moment(3,0): 5672.914232904206
     * 	 Moment(4,0): 136255.27672247344
     * 	 Moment(5,0): 3683562.8217169433
     * 	 Moment(6,0): 1.1076854545500715E8
     * 	 Moment(0,mean()): 1.0
     * 	 Moment(1,mean()): -2.0806734113421045E-14
     * 	 Moment(2,mean()): 45.172122057305664
     * 	 Moment(3,mean()): 270.92018671421
     * 	 Moment(4,mean()): 8553.8664869067
     * 	 Moment(5,mean()): 153357.41712233616
     * 	 Moment(6,mean()): 4273757.570142922
     * 	 25%, 50% and 75% Quantiles: 10.030074811938091, 13.977982089912224,
     * 	 18.86124362967137
     * 	 quantileInverse(mean): 0.559163335012079
     * 	 Distinct elements & frequencies not printed (too many).
     * 
     * 
     * 
     * 
     * 
     * @param vector
     *            the vector to analyze.
     * @return a bin holding the statistics measures of the vector.
     */
    public static DynamicFloatBin1D bin(FloatMatrix1D vector) {
        DynamicFloatBin1D bin = new DynamicFloatBin1D();
        bin.addAllOf(FloatFactory1D.dense.toList(vector));
        return bin;
    }

    /**
     * Modifies the given covariance matrix to be a correlation matrix
     * (in-place). The correlation matrix is a square, symmetric matrix
     * consisting of nothing but correlation coefficients. The rows and the
     * columns represent the variables, the cells represent correlation
     * coefficients. The diagonal cells (i.e. the correlation between a variable
     * and itself) will equal 1, for the simple reason that the correlation
     * coefficient of a variable with itself equals 1. The correlation of two
     * column vectors x and y is given by
     * corr(x,y) = cov(x,y) / (stdDev(x)*stdDev(y)) (Pearson's
     * correlation coefficient). A correlation coefficient varies between -1
     * (for a perfect negative relationship) to +1 (for a perfect positive
     * relationship). See the 
     * math definition and  another def. Compares two column vectors at a time. Use dice views
     * to compare two row vectors at a time.
     * 
     * @param covariance
     *            a covariance matrix, as, for example, returned by method
     *            {@link #covariance(FloatMatrix2D)}.
     * @return the modified covariance, now correlation matrix (for convenience
     *         only).
     */
    public static FloatMatrix2D correlation(FloatMatrix2D covariance) {
        for (int i = covariance.columns(); --i >= 0;) {
            for (int j = i; --j >= 0;) {
                float stdDev1 = (float) Math.sqrt(covariance.getQuick(i, i));
                float stdDev2 = (float) Math.sqrt(covariance.getQuick(j, j));
                float cov = covariance.getQuick(i, j);
                float corr = cov / (stdDev1 * stdDev2);

                covariance.setQuick(i, j, corr);
                covariance.setQuick(j, i, corr); // symmetric
            }
        }
        for (int i = covariance.columns(); --i >= 0;)
            covariance.setQuick(i, i, 1);

        return covariance;
    }

    /**
     * Constructs and returns the covariance matrix of the given matrix. The
     * covariance matrix is a square, symmetric matrix consisting of nothing but
     * covariance coefficients. The rows and the columns represent the
     * variables, the cells represent covariance coefficients. The diagonal
     * cells (i.e. the covariance between a variable and itself) will equal the
     * variances. The covariance of two column vectors x and y is given by
     * cov(x,y) = (1/n) * Sum((x[i]-mean(x)) * (y[i]-mean(y))). See the
     * 
     * math definition. Compares two column vectors at a time. Use dice
     * views to compare two row vectors at a time.
     * 
     * @param matrix
     *            any matrix; a column holds the values of a given variable.
     * @return the covariance matrix (n x n, n=matrix.columns).
     */
    public static FloatMatrix2D covariance(FloatMatrix2D matrix) {
        int rows = matrix.rows();
        int columns = matrix.columns();
        FloatMatrix2D covariance = new cern.colt.matrix.tfloat.impl.DenseFloatMatrix2D(columns, columns);

        float[] sums = new float[columns];
        FloatMatrix1D[] cols = new FloatMatrix1D[columns];
        for (int i = columns; --i >= 0;) {
            cols[i] = matrix.viewColumn(i);
            sums[i] = cols[i].zSum();
        }

        for (int i = columns; --i >= 0;) {
            for (int j = i + 1; --j >= 0;) {
                float sumOfProducts = cols[i].zDotProduct(cols[j]);
                float cov = (sumOfProducts - sums[i] * sums[j] / rows) / rows;
                covariance.setQuick(i, j, cov);
                covariance.setQuick(j, i, cov); // symmetric
            }
        }
        return covariance;
    }

    /**
     * 2-d OLAP cube operator; Fills all cells of the given vectors into the
     * given histogram. If you use hep.aida.ref.Converter.toString(histo) on the
     * result, the OLAP cube of x-"column" vs. y-"column" , summing the weights
     * "column" will be printed. For example, aggregate sales by product by
     * region.
     * 
     * Computes the distinct values of x and y, yielding histogram axes that
     * capture one distinct value per bin. Then fills the histogram.
     * 

     * Example output:
     * 
     * 
     * 
     * 
     *      * 	 Cube:
     * 	    Entries=5000, ExtraEntries=0
     * 	    MeanX=4.9838, RmsX=NaN
     * 	    MeanY=2.5304, RmsY=NaN
     * 	    xAxis: Min=0, Max=10, Bins=11
     * 	    yAxis: Min=0, Max=5, Bins=6
     * 	 Heights:
     * 	       | X
     * 	       | 0   1   2   3   4   5   6   7   8   9   10  | Sum 
     * 	 ----------------------------------------------------------
     * 	 Y 5   |  30  53  51  52  57  39  65  61  55  49  22 |  534
     * 	   4   |  43 106 112  96  92  94 107  98  98 110  47 | 1003
     * 	   3   |  39 134  87  93 102 103 110  90 114  98  51 | 1021
     * 	   2   |  44  81 113  96 101  86 109  83 111  93  42 |  959
     * 	   1   |  54  94 103  99 115  92  98  97 103  90  44 |  989
     * 	   0   |  24  54  52  44  42  56  46  47  56  53  20 |  494
     * 	 ----------------------------------------------------------
     * 	   Sum | 234 522 518 480 509 470 535 476 537 493 226 |
     * 
     * 
     * 
     * 
     * 
     * @return the histogram containing the cube.
     * @throws IllegalArgumentException
     *             if
     *             x.size() != y.size() || y.size() != weights.size().
     */
    public static hep.aida.tfloat.FloatIHistogram2D cube(FloatMatrix1D x, FloatMatrix1D y, FloatMatrix1D weights) {
        if (x.size() != y.size() || y.size() != weights.size())
            throw new IllegalArgumentException("vectors must have same size");

        float epsilon = 1.0E-5f;
        cern.colt.list.tfloat.FloatArrayList distinct = new cern.colt.list.tfloat.FloatArrayList();
        float[] vals = new float[(int) x.size()];
        cern.colt.list.tfloat.FloatArrayList sorted = new cern.colt.list.tfloat.FloatArrayList(vals);

        // compute distinct values of x
        x.toArray(vals); // copy x into vals
        sorted.sort();
        cern.jet.stat.tfloat.FloatDescriptive.frequencies(sorted, distinct, null);
        // since bins are right-open [from,to) we need an additional dummy bin
        // so that the last distinct value does not fall into the overflow bin
        if (distinct.size() > 0)
            distinct.add(distinct.get(distinct.size() - 1) + epsilon);
        distinct.trimToSize();
        hep.aida.tfloat.FloatIAxis xaxis = new hep.aida.tfloat.ref.FloatVariableAxis(distinct.elements());

        // compute distinct values of y
        y.toArray(vals);
        sorted.sort();
        cern.jet.stat.tfloat.FloatDescriptive.frequencies(sorted, distinct, null);
        // since bins are right-open [from,to) we need an additional dummy bin
        // so that the last distinct value does not fall into the overflow bin
        if (distinct.size() > 0)
            distinct.add(distinct.get(distinct.size() - 1) + epsilon);
        distinct.trimToSize();
        hep.aida.tfloat.FloatIAxis yaxis = new hep.aida.tfloat.ref.FloatVariableAxis(distinct.elements());

        hep.aida.tfloat.FloatIHistogram2D histo = new hep.aida.tfloat.ref.FloatHistogram2D("Cube", xaxis, yaxis);
        return histogram(histo, x, y, weights);
    }

    /**
     * 3-d OLAP cube operator; Fills all cells of the given vectors into the
     * given histogram. If you use hep.aida.ref.Converter.toString(histo) on the
     * result, the OLAP cube of x-"column" vs. y-"column" vs. z-"column",
     * summing the weights "column" will be printed. For example, aggregate
     * sales by product by region by time.
     * 
     * Computes the distinct values of x and y and z, yielding histogram axes
     * that capture one distinct value per bin. Then fills the histogram.
     * 
     * @return the histogram containing the cube.
     * @throws IllegalArgumentException
     *             if
     *             x.size() != y.size() || x.size() != z.size() || x.size() != weights.size()
     *             .
     */
    public static hep.aida.tfloat.FloatIHistogram3D cube(FloatMatrix1D x, FloatMatrix1D y, FloatMatrix1D z,
            FloatMatrix1D weights) {
        if (x.size() != y.size() || x.size() != z.size() || x.size() != weights.size())
            throw new IllegalArgumentException("vectors must have same size");

        float epsilon = 1.0E-5f;
        cern.colt.list.tfloat.FloatArrayList distinct = new cern.colt.list.tfloat.FloatArrayList();
        float[] vals = new float[(int) x.size()];
        cern.colt.list.tfloat.FloatArrayList sorted = new cern.colt.list.tfloat.FloatArrayList(vals);

        // compute distinct values of x
        x.toArray(vals); // copy x into vals
        sorted.sort();
        cern.jet.stat.tfloat.FloatDescriptive.frequencies(sorted, distinct, null);
        // since bins are right-open [from,to) we need an additional dummy bin
        // so that the last distinct value does not fall into the overflow bin
        if (distinct.size() > 0)
            distinct.add(distinct.get(distinct.size() - 1) + epsilon);
        distinct.trimToSize();
        hep.aida.tfloat.FloatIAxis xaxis = new hep.aida.tfloat.ref.FloatVariableAxis(distinct.elements());

        // compute distinct values of y
        y.toArray(vals);
        sorted.sort();
        cern.jet.stat.tfloat.FloatDescriptive.frequencies(sorted, distinct, null);
        // since bins are right-open [from,to) we need an additional dummy bin
        // so that the last distinct value does not fall into the overflow bin
        if (distinct.size() > 0)
            distinct.add(distinct.get(distinct.size() - 1) + epsilon);
        distinct.trimToSize();
        hep.aida.tfloat.FloatIAxis yaxis = new hep.aida.tfloat.ref.FloatVariableAxis(distinct.elements());

        // compute distinct values of z
        z.toArray(vals);
        sorted.sort();
        cern.jet.stat.tfloat.FloatDescriptive.frequencies(sorted, distinct, null);
        // since bins are right-open [from,to) we need an additional dummy bin
        // so that the last distinct value does not fall into the overflow bin
        if (distinct.size() > 0)
            distinct.add(distinct.get(distinct.size() - 1) + epsilon);
        distinct.trimToSize();
        hep.aida.tfloat.FloatIAxis zaxis = new hep.aida.tfloat.ref.FloatVariableAxis(distinct.elements());

        hep.aida.tfloat.FloatIHistogram3D histo = new hep.aida.tfloat.ref.FloatHistogram3D("Cube", xaxis, yaxis, zaxis);
        return histogram(histo, x, y, z, weights);
    }

    /**
     * Demonstrates usage of this class.
     */
    public static void demo1() {
        float[][] values = { { 1, 2, 3 }, { 2, 4, 6 }, { 3, 6, 9 }, { 4, -8, -10 } };
        FloatFactory2D factory = FloatFactory2D.dense;
        FloatMatrix2D A = factory.make(values);
        System.out.println("\n\nmatrix=" + A);
        System.out.println("\ncovar1=" + covariance(A));
        // System.out.println(correlation(covariance(A)));
        // System.out.println(distance(A,EUCLID));

        // System.out.println(cern.colt.matrixpattern.Converting.toHTML(A.toString()));
        // System.out.println(cern.colt.matrixpattern.Converting.toHTML(covariance(A).toString()));
        // System.out.println(cern.colt.matrixpattern.Converting.toHTML(correlation(covariance(A)).toString()));
        // System.out.println(cern.colt.matrixpattern.Converting.toHTML(distance(A,EUCLID).toString()));
    }

    /**
     * Demonstrates usage of this class.
     */
    public static void demo2(int rows, int columns, boolean print) {
        System.out.println("\n\ninitializing...");
        FloatFactory2D factory = FloatFactory2D.dense;
        FloatMatrix2D A = factory.ascending(rows, columns);
        // float value = 1;
        // FloatMatrix2D A = factory.make(rows,columns);
        // A.assign(value);

        System.out.println("benchmarking correlation...");

        cern.colt.Timer timer = new cern.colt.Timer().start();
        FloatMatrix2D corr = correlation(covariance(A));
        timer.stop().display();

        if (print) {
            System.out.println("printing result...");
            System.out.println(corr);
        }
        System.out.println("done.");
    }

    /**
     * Demonstrates usage of this class.
     */
    public static void demo3(VectorVectorFunction norm) {
        float[][] values = { { -0.9611052f, -0.25421095f }, { 0.4308269f, -0.69932648f }, { -1.2071029f, 0.62030596f },
                { 1.5345166f, 0.02135884f }, { -1.1341542f, 0.20388430f } };

        System.out.println("\n\ninitializing...");
        FloatFactory2D factory = FloatFactory2D.dense;
        FloatMatrix2D A = factory.make(values).viewDice();

        System.out.println("\nA=" + A.viewDice());
        System.out.println("\ndist=" + distance(A, norm).viewDice());
    }

    /**
     * Constructs and returns the distance matrix of the given matrix. The
     * distance matrix is a square, symmetric matrix consisting of nothing but
     * distance coefficients. The rows and the columns represent the variables,
     * the cells represent distance coefficients. The diagonal cells (i.e. the
     * distance between a variable and itself) will be zero. Compares two column
     * vectors at a time. Use dice views to compare two row vectors at a time.
     * 
     * @param matrix
     *            any matrix; a column holds the values of a given variable
     *            (vector).
     * @param distanceFunction
     *            (EUCLID, CANBERRA, ..., or any user defined distance function
     *            operating on two vectors).
     * @return the distance matrix (n x n, n=matrix.columns).
     */
    public static FloatMatrix2D distance(FloatMatrix2D matrix, VectorVectorFunction distanceFunction) {
        int columns = matrix.columns();
        FloatMatrix2D distance = new cern.colt.matrix.tfloat.impl.DenseFloatMatrix2D(columns, columns);

        // cache views
        FloatMatrix1D[] cols = new FloatMatrix1D[columns];
        for (int i = columns; --i >= 0;) {
            cols[i] = matrix.viewColumn(i);
        }

        // work out all permutations
        for (int i = columns; --i >= 0;) {
            for (int j = i; --j >= 0;) {
                float d = distanceFunction.apply(cols[i], cols[j]);
                distance.setQuick(i, j, d);
                distance.setQuick(j, i, d); // symmetric
            }
        }
        return distance;
    }

    /**
     * Fills all cells of the given vector into the given histogram.
     * 
     * @return histo (for convenience only).
     */
    public static hep.aida.tfloat.FloatIHistogram1D histogram(hep.aida.tfloat.FloatIHistogram1D histo,
            FloatMatrix1D vector) {
        for (int i = (int) vector.size(); --i >= 0;) {
            histo.fill(vector.getQuick(i));
        }
        return histo;
    }

    /**
     * Fills all cells of the given matrix into the given histogram.
     * 
     * @return histo (for convenience only).
     */
    public static hep.aida.tfloat.FloatIHistogram1D histogram(final hep.aida.tfloat.FloatIHistogram1D histo,
            final FloatMatrix2D matrix) {
        histo.fill_2D((float[]) matrix.elements(), matrix.rows(), matrix.columns(), (int) matrix.index(0, 0), matrix
                .rowStride(), matrix.columnStride());
        return histo;
    }

    /**
     * Splits the given matrix into m x n pieces and computes 1D histogram of
     * each piece.
     * 
     * @return histo (for convenience only).
     */
    public static hep.aida.tfloat.FloatIHistogram1D[][] histogram(final hep.aida.tfloat.FloatIHistogram1D[][] histo,
            final FloatMatrix2D matrix, final int m, final int n) {
        int rows = matrix.rows();
        int columns = matrix.columns();
        if (m >= rows) {
            throw new IllegalArgumentException("Parameter m must be smaller than the number of rows in the matrix");
        }
        if (n >= columns) {
            throw new IllegalArgumentException("Parameter n must be smaller than the number of columns in the matrix");
        }
        final int row_size = rows / m;
        final int col_size = columns / n;
        final int[] height = new int[m];
        final int[] width = new int[n];
        for (int r = 0; r < m - 1; r++) {
            height[r] = row_size;
        }
        height[m - 1] = rows - (m - 1) * row_size;
        for (int c = 0; c < n - 1; c++) {
            width[c] = col_size;
        }
        width[n - 1] = columns - (n - 1) * col_size;

        int nthreads = ConcurrencyUtils.getNumberOfThreads();
        if ((nthreads > 1) && (rows * columns >= ConcurrencyUtils.getThreadsBeginN_2D())) {
            nthreads = Math.min(nthreads, m);
            Future[] futures = new Future[nthreads];
            int k = m / nthreads;
            for (int j = 0; j < nthreads; j++) {
                final int firstRow = j * k;
                final int lastRow = (j == nthreads - 1) ? m : firstRow + k;
                futures[j] = ConcurrencyUtils.submit(new Runnable() {

                    public void run() {
                        FloatMatrix2D view = null;
                        for (int r = firstRow; r < lastRow; r++) {
                            for (int c = 0; c < n; c++) {
                                view = matrix.viewPart(r * row_size, c * col_size, height[r], width[c]);
                                histo[r][c].fill_2D((float[]) view.elements(), view.rows(), view.columns(), (int) view
                                        .index(0, 0), view.rowStride(), view.columnStride());
                            }
                        }
                    }
                });
            }
            ConcurrencyUtils.waitForCompletion(futures);
        } else {
            FloatMatrix2D view = null;
            for (int r = 0; r < m; r++) {
                for (int c = 0; c < n; c++) {
                    view = matrix.viewPart(r * row_size, c * col_size, height[r], width[c]);
                    histo[r][c].fill_2D((float[]) view.elements(), view.rows(), view.columns(), (int) view.index(0, 0),
                            view.rowStride(), view.columnStride());
                }
            }
        }
        return histo;
    }

    /**
     * Fills all cells of the given vectors into the given histogram.
     * 
     * @return histo (for convenience only).
     * @throws IllegalArgumentException
     *             if x.size() != y.size().
     */
    public static hep.aida.tfloat.FloatIHistogram2D histogram(hep.aida.tfloat.FloatIHistogram2D histo, FloatMatrix1D x,
            FloatMatrix1D y) {
        if (x.size() != y.size())
            throw new IllegalArgumentException("vectors must have same size");
        for (int i = (int) x.size(); --i >= 0;) {
            histo.fill(x.getQuick(i), y.getQuick(i));
        }
        return histo;
    }

    /**
     * Fills all cells of the given vectors into the given histogram.
     * 
     * @return histo (for convenience only).
     * @throws IllegalArgumentException
     *             if
     *             x.size() != y.size() || y.size() != weights.size().
     */
    public static hep.aida.tfloat.FloatIHistogram2D histogram(hep.aida.tfloat.FloatIHistogram2D histo, FloatMatrix1D x,
            FloatMatrix1D y, FloatMatrix1D weights) {
        if (x.size() != y.size() || y.size() != weights.size())
            throw new IllegalArgumentException("vectors must have same size");
        for (int i = (int) x.size(); --i >= 0;) {
            histo.fill(x.getQuick(i), y.getQuick(i), weights.getQuick(i));
        }
        return histo;
    }

    /**
     * Fills all cells of the given vectors into the given histogram.
     * 
     * @return histo (for convenience only).
     * @throws IllegalArgumentException
     *             if
     *             x.size() != y.size() || x.size() != z.size() || x.size() != weights.size()
     *             .
     */
    public static hep.aida.tfloat.FloatIHistogram3D histogram(hep.aida.tfloat.FloatIHistogram3D histo, FloatMatrix1D x,
            FloatMatrix1D y, FloatMatrix1D z, FloatMatrix1D weights) {
        if (x.size() != y.size() || x.size() != z.size() || x.size() != weights.size())
            throw new IllegalArgumentException("vectors must have same size");
        for (int i = (int) x.size(); --i >= 0;) {
            histo.fill(x.getQuick(i), y.getQuick(i), z.getQuick(i), weights.getQuick(i));
        }
        return histo;
    }

    /**
     * Benchmarks covariance computation.
     */
    public static void main(String[] args) {
        int rows = Integer.parseInt(args[0]);
        int columns = Integer.parseInt(args[1]);
        boolean print = args[2].equals("print");
        demo2(rows, columns, print);
    }

    /**
     * Constructs and returns a sampling view with a size of
     * round(matrix.size() * fraction). Samples "without replacement"
     * from the uniform distribution.
     * 
     * @param matrix
     *            any matrix.
     * @param fraction
     *            the percentage to be included in the view.
     * @param randomGenerator
     *            a uniform random number generator; set this parameter to
     *            null to use a default generator seeded with the
     *            current time.
     * @return the sampling view.
     * @throws IllegalArgumentException
     *             if
     *             ! (0 <= rowFraction <= 1 && 0 <= columnFraction <= 1)
     *             .
     * @see cern.jet.random.tfloat.sampling.FloatRandomSampler
     */
    public static FloatMatrix1D viewSample(FloatMatrix1D matrix, float fraction, FloatRandomEngine randomGenerator) {
        // check preconditions and allow for a little tolerance
        float epsilon = 1e-05f;
        if (fraction < 0 - epsilon || fraction > 1 + epsilon)
            throw new IllegalArgumentException();
        if (fraction < 0)
            fraction = 0;
        if (fraction > 1)
            fraction = 1;

        // random generator seeded with current time
        if (randomGenerator == null)
            randomGenerator = new cern.jet.random.tfloat.engine.FloatMersenneTwister((int) System.currentTimeMillis());

        int ncolumns = Math.round(matrix.size() * fraction);
        int max = ncolumns;
        long[] selected = new long[max]; // sampler works on long's, not
        // int's

        // sample
        int n = ncolumns;
        int N = (int) matrix.size();
        cern.jet.random.tfloat.sampling.FloatRandomSampler.sample(n, N, n, 0, selected, 0, randomGenerator);
        int[] selectedCols = new int[n];
        for (int i = 0; i < n; i++)
            selectedCols[i] = (int) selected[i];

        return matrix.viewSelection(selectedCols);
    }

    /**
     * Constructs and returns a sampling view with
     * round(matrix.rows() * rowFraction) rows and
     * round(matrix.columns() * columnFraction) columns. Samples
     * "without replacement". Rows and columns are randomly chosen from the
     * uniform distribution. Examples:
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     * matrix rowFraction=0.2

     columnFraction=0.2 rowFraction=0.2

     columnFraction=1.0 rowFraction=1.0

     columnFraction=0.2 
 10 x 10 matrix

      1  2  3  4  5  6  7  8  9  10

     11 12 13 14 15 16 17 18 19  20

     21 22 23 24 25 26 27 28 29  30

     31 32 33 34 35 36 37 38 39  40

     41 42 43 44 45 46 47 48 49  50

     51 52 53 54 55 56 57 58 59  60

     61 62 63 64 65 66 67 68 69  70

     71 72 73 74 75 76 77 78 79  80

     81 82 83 84 85 86 87 88 89  90

     91 92 93 94 95 96 97 98 99 100 
      2 x 2 matrix

     43 50

     53 60  2 x 10 matrix

     41 42 43 44 45 46 47 48 49  50

     91 92 93 94 95 96 97 98 99 100 
      10 x 2 matrix

      4  8

     14 18

     24 28

     34 38

     44 48

     54 58

     64 68

     74 78

     84 88

     94 98 
     * 
     * @param matrix
     *            any matrix.
     * @param rowFraction
     *            the percentage of rows to be included in the view.
     * @param columnFraction
     *            the percentage of columns to be included in the view.
     * @param randomGenerator
     *            a uniform random number generator; set this parameter to
     *            null to use a default generator seeded with the
     *            current time.
     * @return the sampling view.
     * @throws IllegalArgumentException
     *             if
     *             ! (0 <= rowFraction <= 1 && 0 <= columnFraction <= 1)
     *             .
     * @see cern.jet.random.tfloat.sampling.FloatRandomSampler
     */
    public static FloatMatrix2D viewSample(FloatMatrix2D matrix, float rowFraction, float columnFraction,
            FloatRandomEngine randomGenerator) {
        // check preconditions and allow for a little tolerance
        float epsilon = 1e-05f;
        if (rowFraction < 0 - epsilon || rowFraction > 1 + epsilon)
            throw new IllegalArgumentException();
        if (rowFraction < 0)
            rowFraction = 0;
        if (rowFraction > 1)
            rowFraction = 1;

        if (columnFraction < 0 - epsilon || columnFraction > 1 + epsilon)
            throw new IllegalArgumentException();
        if (columnFraction < 0)
            columnFraction = 0;
        if (columnFraction > 1)
            columnFraction = 1;

        // random generator seeded with current time
        if (randomGenerator == null)
            randomGenerator = new cern.jet.random.tfloat.engine.FloatMersenneTwister((int) System.currentTimeMillis());

        int nrows = Math.round(matrix.rows() * rowFraction);
        int ncolumns = Math.round(matrix.columns() * columnFraction);
        int max = Math.max(nrows, ncolumns);
        long[] selected = new long[max]; // sampler works on long's, not
        // int's

        // sample rows
        int n = nrows;
        int N = matrix.rows();
        cern.jet.random.tfloat.sampling.FloatRandomSampler.sample(n, N, n, 0, selected, 0, randomGenerator);
        int[] selectedRows = new int[n];
        for (int i = 0; i < n; i++)
            selectedRows[i] = (int) selected[i];

        // sample columns
        n = ncolumns;
        N = matrix.columns();
        cern.jet.random.tfloat.sampling.FloatRandomSampler.sample(n, N, n, 0, selected, 0, randomGenerator);
        int[] selectedCols = new int[n];
        for (int i = 0; i < n; i++)
            selectedCols[i] = (int) selected[i];

        return matrix.viewSelection(selectedRows, selectedCols);
    }

    /**
     * Constructs and returns a sampling view with
     * round(matrix.slices() * sliceFraction) slices and
     * round(matrix.rows() * rowFraction) rows and
     * round(matrix.columns() * columnFraction) columns. Samples
     * "without replacement". Slices, rows and columns are randomly chosen from
     * the uniform distribution.
     * 
     * @param matrix
     *            any matrix.
     * @param sliceFraction
     *            the percentage of slices to be included in the view.
     * @param rowFraction
     *            the percentage of rows to be included in the view.
     * @param columnFraction
     *            the percentage of columns to be included in the view.
     * @param randomGenerator
     *            a uniform random number generator; set this parameter to
     *            null to use a default generator seeded with the
     *            current time.
     * @return the sampling view.
     * @throws IllegalArgumentException
     *             if
     *             ! (0 <= sliceFraction <= 1 && 0 <= rowFraction <= 1 && 0 <= columnFraction <= 1)
     *             .
     * @see cern.jet.random.tfloat.sampling.FloatRandomSampler
     */
    public static FloatMatrix3D viewSample(FloatMatrix3D matrix, float sliceFraction, float rowFraction,
            float columnFraction, FloatRandomEngine randomGenerator) {
        // check preconditions and allow for a little tolerance
        float epsilon = 1e-05f;
        if (sliceFraction < 0 - epsilon || sliceFraction > 1 + epsilon)
            throw new IllegalArgumentException();
        if (sliceFraction < 0)
            sliceFraction = 0;
        if (sliceFraction > 1)
            sliceFraction = 1;

        if (rowFraction < 0 - epsilon || rowFraction > 1 + epsilon)
            throw new IllegalArgumentException();
        if (rowFraction < 0)
            rowFraction = 0;
        if (rowFraction > 1)
            rowFraction = 1;

        if (columnFraction < 0 - epsilon || columnFraction > 1 + epsilon)
            throw new IllegalArgumentException();
        if (columnFraction < 0)
            columnFraction = 0;
        if (columnFraction > 1)
            columnFraction = 1;

        // random generator seeded with current time
        if (randomGenerator == null)
            randomGenerator = new cern.jet.random.tfloat.engine.FloatMersenneTwister((int) System.currentTimeMillis());

        int nslices = Math.round(matrix.slices() * sliceFraction);
        int nrows = Math.round(matrix.rows() * rowFraction);
        int ncolumns = Math.round(matrix.columns() * columnFraction);
        int max = Math.max(nslices, Math.max(nrows, ncolumns));
        long[] selected = new long[max]; // sampler works on long's, not
        // int's

        // sample slices
        int n = nslices;
        int N = matrix.slices();
        cern.jet.random.tfloat.sampling.FloatRandomSampler.sample(n, N, n, 0, selected, 0, randomGenerator);
        int[] selectedSlices = new int[n];
        for (int i = 0; i < n; i++)
            selectedSlices[i] = (int) selected[i];

        // sample rows
        n = nrows;
        N = matrix.rows();
        cern.jet.random.tfloat.sampling.FloatRandomSampler.sample(n, N, n, 0, selected, 0, randomGenerator);
        int[] selectedRows = new int[n];
        for (int i = 0; i < n; i++)
            selectedRows[i] = (int) selected[i];

        // sample columns
        n = ncolumns;
        N = matrix.columns();
        cern.jet.random.tfloat.sampling.FloatRandomSampler.sample(n, N, n, 0, selected, 0, randomGenerator);
        int[] selectedCols = new int[n];
        for (int i = 0; i < n; i++)
            selectedCols[i] = (int) selected[i];

        return matrix.viewSelection(selectedSlices, selectedRows, selectedCols);
    }
}
`A`	`covariance(A)`	`correlation(covariance(A))`	`distance(A,EUCLID)`
`4 x 3 matrix 1 2 3 2 4 6 3 6 9 4 -8 -10`	`3 x 3 matrix 1.25 -3.5 -4.5 -3.5 29 39 -4.5 39 52.5`	`3 x 3 matrix 1 -0.581318 -0.555492 -0.581318 1 0.999507 -0.555492 0.999507 1`	`3 x 3 matrix 0 12.569805 15.874508 12.569805 0 4.242641 15.874508 4.242641 0`
`matrix`	`rowFraction=0.2 columnFraction=0.2`	`rowFraction=0.2 columnFraction=1.0`	`rowFraction=1.0 columnFraction=0.2`
`10 x 10 matrix 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100`	`2 x 2 matrix 43 50 53 60`	`2 x 10 matrix 41 42 43 44 45 46 47 48 49 50 91 92 93 94 95 96 97 98 99 100`	`10 x 2 matrix 4 8 14 18 24 28 34 38 44 48 54 58 64 68 74 78 84 88 94 98`