cern.colt.matrix.tdouble.algo.DoubleStatistic Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of parallelcolt Show documentation
Parallel Colt is a multithreaded version of Colt - a library for high performance scientific computing in Java. It contains efficient algorithms for data analysis, linear algebra, multi-dimensional arrays, Fourier transforms, statistics and histogramming.
The newest version!
/*
Copyright (C) 1999 CERN - European Organization for Nuclear Research.
Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
is hereby granted without fee, provided that the above copyright notice appear in all copies and 
that both that copyright notice and this permission notice appear in supporting documentation. 
CERN makes no representations about the suitability of this software for any purpose. 
It is provided "as is" without expressed or implied warranty.
 */
package cern.colt.matrix.tdouble.algo;

import hep.aida.tdouble.bin.DynamicDoubleBin1D;

import java.util.concurrent.Future;

import cern.colt.function.tdouble.DoubleDoubleFunction;
import cern.colt.matrix.tdouble.DoubleFactory1D;
import cern.colt.matrix.tdouble.DoubleFactory2D;
import cern.colt.matrix.tdouble.DoubleMatrix1D;
import cern.colt.matrix.tdouble.DoubleMatrix2D;
import cern.colt.matrix.tdouble.DoubleMatrix3D;
import cern.jet.math.tdouble.DoubleFunctions;
import cern.jet.random.tdouble.engine.DoubleRandomEngine;
import edu.emory.mathcs.utils.ConcurrencyUtils;

/**
 * Basic statistics operations on matrices. Computation of covariance,
 * correlation, distance matrix. Random sampling views. Conversion to histograms
 * with and without OLAP cube operators. Conversion to bins with retrieval of
 * statistical bin measures. Also see {@link cern.jet.stat} and
 * {@link hep.aida.tdouble.bin}, in particular
 * {@link hep.aida.tdouble.bin.DynamicDoubleBin1D}.
 * 
 * Examples:
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * A covariance(A) correlation(covariance(A)) distance(A,EUCLID)
 4 x 3 matrix

 1  2   3

 2  4   6

 3  6   9

 4 -8 -10  3 x 3 matrix

  1.25 -3.5 -4.5

 -3.5  29   39  

 -4.5  39   52.5  3 x 3 matrix

  1        -0.581318 -0.555492

 -0.581318  1         0.999507

 -0.555492  0.999507  1        
  3 x 3 matrix

  0        12.569805 15.874508

 12.569805  0         4.242641

 15.874508  4.242641  0        
        
 * 
 * @author [email protected]
 * @version 1.0, 09/24/99
 */
public class DoubleStatistic extends Object {
    private static final cern.jet.math.tdouble.DoubleFunctions F = cern.jet.math.tdouble.DoubleFunctions.functions;

    /**
     * Euclidean distance function; Sqrt(Sum( (x[i]-y[i])^2 )).
     */
    public static final VectorVectorFunction EUCLID = new VectorVectorFunction() {
        public final double apply(DoubleMatrix1D a, DoubleMatrix1D b) {
            return Math.sqrt(a.aggregate(b, DoubleFunctions.plus, DoubleFunctions.chain(DoubleFunctions.square,
                    DoubleFunctions.minus)));
        }
    };

    /**
     * Bray-Curtis distance function;
     * Sum( abs(x[i]-y[i]) )  /  Sum( x[i]+y[i] ).
     */
    public static final VectorVectorFunction BRAY_CURTIS = new VectorVectorFunction() {
        public final double apply(DoubleMatrix1D a, DoubleMatrix1D b) {
            return a.aggregate(b, DoubleFunctions.plus, DoubleFunctions.chain(DoubleFunctions.abs,
                    DoubleFunctions.minus))
                    / a.aggregate(b, DoubleFunctions.plus, DoubleFunctions.plus);
        }
    };

    /**
     * Canberra distance function;
     * Sum( abs(x[i]-y[i]) / abs(x[i]+y[i]) ).
     */
    public static final VectorVectorFunction CANBERRA = new VectorVectorFunction() {
        DoubleDoubleFunction fun = new DoubleDoubleFunction() {
            public final double apply(double a, double b) {
                return Math.abs(a - b) / Math.abs(a + b);
            }
        };

        public final double apply(DoubleMatrix1D a, DoubleMatrix1D b) {
            return a.aggregate(b, DoubleFunctions.plus, fun);
        }
    };

    /**
     * Maximum distance function; Max( abs(x[i]-y[i]) ).
     */
    public static final VectorVectorFunction MAXIMUM = new VectorVectorFunction() {
        public final double apply(DoubleMatrix1D a, DoubleMatrix1D b) {
            return a.aggregate(b, DoubleFunctions.max, DoubleFunctions
                    .chain(DoubleFunctions.abs, DoubleFunctions.minus));
        }
    };

    /**
     * Manhattan distance function; Sum( abs(x[i]-y[i]) ).
     */
    public static final VectorVectorFunction MANHATTAN = new VectorVectorFunction() {
        public final double apply(DoubleMatrix1D a, DoubleMatrix1D b) {
            return a.aggregate(b, DoubleFunctions.plus, DoubleFunctions.chain(DoubleFunctions.abs,
                    DoubleFunctions.minus));
        }
    };

    /**
     * Interface that represents a function object: a function that takes two
     * argument vectors and returns a single value.
     */
    public interface VectorVectorFunction {
        /**
         * Applies a function to two argument vectors.
         * 
         * @param x
         *            the first argument vector passed to the function.
         * @param y
         *            the second argument vector passed to the function.
         * @return the result of the function.
         */
        abstract public double apply(cern.colt.matrix.tdouble.DoubleMatrix1D x,
                cern.colt.matrix.tdouble.DoubleMatrix1D y);
    }

    /**
     * Makes this class non instantiable, but still let's others inherit from
     * it.
     */
    protected DoubleStatistic() {
    }

    /**
     * Applies the given aggregation functions to each column and stores the
     * results in a the result matrix. If matrix has shape m x n, then
     * result must have shape aggr.length x n. Tip: To do aggregations
     * on rows use dice views (transpositions), as in
     * aggregate(matrix.viewDice(),aggr,result.viewDice()).
     * 
     * @param matrix
     *            any matrix; a column holds the values of a given variable.
     * @param aggr
     *            the aggregation functions to be applied to each column.
     * @param result
     *            the matrix to hold the aggregation results.
     * @return result (for convenience only).
     * @see DoubleFormatter
     * @see hep.aida.tdouble.bin.DoubleBinFunction1D
     * @see hep.aida.tdouble.bin.DoubleBinFunctions1D
     */
    public static DoubleMatrix2D aggregate(DoubleMatrix2D matrix, hep.aida.tdouble.bin.DoubleBinFunction1D[] aggr,
            DoubleMatrix2D result) {
        DynamicDoubleBin1D bin = new DynamicDoubleBin1D();
        double[] elements = new double[matrix.rows()];
        cern.colt.list.tdouble.DoubleArrayList values = new cern.colt.list.tdouble.DoubleArrayList(elements);
        for (int column = matrix.columns(); --column >= 0;) {
            matrix.viewColumn(column).toArray(elements); // copy column into
            // values
            bin.clear();
            bin.addAllOf(values);
            for (int i = aggr.length; --i >= 0;) {
                result.set(i, column, aggr[i].apply(bin));
            }
        }
        return result;
    }

    /**
     * Fills all cell values of the given vector into a bin from which
     * statistics measures can be retrieved efficiently. Cells values are
     * copied. 

     * Tip: Use System.out.println(bin(vector)) to print most measures
     * computed by the bin. Example:
     * 
     * 
     * 
     * 
     *      * 	 Size: 20000
     * 	 Sum: 299858.02350278624
     * 	 SumOfSquares: 5399184.154095971
     * 	 Min: 0.8639113139711261
     * 	 Max: 59.75331890541892
     * 	 Mean: 14.992901175139313
     * 	 RMS: 16.43043540825375
     * 	 Variance: 45.17438077634358
     * 	 Standard deviation: 6.721188940681818
     * 	 Standard error: 0.04752598277592142
     * 	 Geometric mean: 13.516615397064466
     * 	 Product: Infinity
     * 	 Harmonic mean: 11.995174297952191
     * 	 Sum of inversions: 1667.337172700724
     * 	 Skew: 0.8922838940067878
     * 	 Kurtosis: 1.1915828121825598
     * 	 Sum of powers(3): 1.1345828465808412E8
     * 	 Sum of powers(4): 2.7251055344494686E9
     * 	 Sum of powers(5): 7.367125643433887E10
     * 	 Sum of powers(6): 2.215370909100143E12
     * 	 Moment(0,0): 1.0
     * 	 Moment(1,0): 14.992901175139313
     * 	 Moment(2,0): 269.95920770479853
     * 	 Moment(3,0): 5672.914232904206
     * 	 Moment(4,0): 136255.27672247344
     * 	 Moment(5,0): 3683562.8217169433
     * 	 Moment(6,0): 1.1076854545500715E8
     * 	 Moment(0,mean()): 1.0
     * 	 Moment(1,mean()): -2.0806734113421045E-14
     * 	 Moment(2,mean()): 45.172122057305664
     * 	 Moment(3,mean()): 270.92018671421
     * 	 Moment(4,mean()): 8553.8664869067
     * 	 Moment(5,mean()): 153357.41712233616
     * 	 Moment(6,mean()): 4273757.570142922
     * 	 25%, 50% and 75% Quantiles: 10.030074811938091, 13.977982089912224,
     * 	 18.86124362967137
     * 	 quantileInverse(mean): 0.559163335012079
     * 	 Distinct elements & frequencies not printed (too many).
     * 
     * 
     * 
     * 
     * 
     * @param vector
     *            the vector to analyze.
     * @return a bin holding the statistics measures of the vector.
     */
    public static DynamicDoubleBin1D bin(DoubleMatrix1D vector) {
        DynamicDoubleBin1D bin = new DynamicDoubleBin1D();
        bin.addAllOf(DoubleFactory1D.dense.toList(vector));
        return bin;
    }

    /**
     * Modifies the given covariance matrix to be a correlation matrix
     * (in-place). The correlation matrix is a square, symmetric matrix
     * consisting of nothing but correlation coefficients. The rows and the
     * columns represent the variables, the cells represent correlation
     * coefficients. The diagonal cells (i.e. the correlation between a variable
     * and itself) will equal 1, for the simple reason that the correlation
     * coefficient of a variable with itself equals 1. The correlation of two
     * column vectors x and y is given by
     * corr(x,y) = cov(x,y) / (stdDev(x)*stdDev(y)) (Pearson's
     * correlation coefficient). A correlation coefficient varies between -1
     * (for a perfect negative relationship) to +1 (for a perfect positive
     * relationship). See the 
     * math definition and  another def. Compares two column vectors at a time. Use dice views
     * to compare two row vectors at a time.
     * 
     * @param covariance
     *            a covariance matrix, as, for example, returned by method
     *            {@link #covariance(DoubleMatrix2D)}.
     * @return the modified covariance, now correlation matrix (for convenience
     *         only).
     */
    public static DoubleMatrix2D correlation(DoubleMatrix2D covariance) {
        for (int i = covariance.columns(); --i >= 0;) {
            for (int j = i; --j >= 0;) {
                double stdDev1 = Math.sqrt(covariance.getQuick(i, i));
                double stdDev2 = Math.sqrt(covariance.getQuick(j, j));
                double cov = covariance.getQuick(i, j);
                double corr = cov / (stdDev1 * stdDev2);

                covariance.setQuick(i, j, corr);
                covariance.setQuick(j, i, corr); // symmetric
            }
        }
        for (int i = covariance.columns(); --i >= 0;)
            covariance.setQuick(i, i, 1);

        return covariance;
    }

    /**
     * Constructs and returns the covariance matrix of the given matrix. The
     * covariance matrix is a square, symmetric matrix consisting of nothing but
     * covariance coefficients. The rows and the columns represent the
     * variables, the cells represent covariance coefficients. The diagonal
     * cells (i.e. the covariance between a variable and itself) will equal the
     * variances. The covariance of two column vectors x and y is given by
     * cov(x,y) = (1/n) * Sum((x[i]-mean(x)) * (y[i]-mean(y))). See the
     * 
     * math definition. Compares two column vectors at a time. Use dice
     * views to compare two row vectors at a time.
     * 
     * @param matrix
     *            any matrix; a column holds the values of a given variable.
     * @return the covariance matrix (n x n, n=matrix.columns).
     */
    public static DoubleMatrix2D covariance(DoubleMatrix2D matrix) {
        int rows = matrix.rows();
        int columns = matrix.columns();
        DoubleMatrix2D covariance = new cern.colt.matrix.tdouble.impl.DenseDoubleMatrix2D(columns, columns);

        double[] sums = new double[columns];
        DoubleMatrix1D[] cols = new DoubleMatrix1D[columns];
        for (int i = columns; --i >= 0;) {
            cols[i] = matrix.viewColumn(i);
            sums[i] = cols[i].zSum();
        }

        for (int i = columns; --i >= 0;) {
            for (int j = i + 1; --j >= 0;) {
                double sumOfProducts = cols[i].zDotProduct(cols[j]);
                double cov = (sumOfProducts - sums[i] * sums[j] / rows) / rows;
                covariance.setQuick(i, j, cov);
                covariance.setQuick(j, i, cov); // symmetric
            }
        }
        return covariance;
    }

    /**
     * 2-d OLAP cube operator; Fills all cells of the given vectors into the
     * given histogram. If you use hep.aida.ref.Converter.toString(histo) on the
     * result, the OLAP cube of x-"column" vs. y-"column" , summing the weights
     * "column" will be printed. For example, aggregate sales by product by
     * region.
     * 
     * Computes the distinct values of x and y, yielding histogram axes that
     * capture one distinct value per bin. Then fills the histogram.
     * 

     * Example output:
     * 
     * 
     * 
     * 
     *      * 	 Cube:
     * 	    Entries=5000, ExtraEntries=0
     * 	    MeanX=4.9838, RmsX=NaN
     * 	    MeanY=2.5304, RmsY=NaN
     * 	    xAxis: Min=0, Max=10, Bins=11
     * 	    yAxis: Min=0, Max=5, Bins=6
     * 	 Heights:
     * 	       | X
     * 	       | 0   1   2   3   4   5   6   7   8   9   10  | Sum 
     * 	 ----------------------------------------------------------
     * 	 Y 5   |  30  53  51  52  57  39  65  61  55  49  22 |  534
     * 	   4   |  43 106 112  96  92  94 107  98  98 110  47 | 1003
     * 	   3   |  39 134  87  93 102 103 110  90 114  98  51 | 1021
     * 	   2   |  44  81 113  96 101  86 109  83 111  93  42 |  959
     * 	   1   |  54  94 103  99 115  92  98  97 103  90  44 |  989
     * 	   0   |  24  54  52  44  42  56  46  47  56  53  20 |  494
     * 	 ----------------------------------------------------------
     * 	   Sum | 234 522 518 480 509 470 535 476 537 493 226 |
     * 
     * 
     * 
     * 
     * 
     * @return the histogram containing the cube.
     * @throws IllegalArgumentException
     *             if
     *             x.size() != y.size() || y.size() != weights.size().
     */
    public static hep.aida.tdouble.DoubleIHistogram2D cube(DoubleMatrix1D x, DoubleMatrix1D y, DoubleMatrix1D weights) {
        if (x.size() != y.size() || y.size() != weights.size())
            throw new IllegalArgumentException("vectors must have same size");

        double epsilon = 1.0E-5f;
        cern.colt.list.tdouble.DoubleArrayList distinct = new cern.colt.list.tdouble.DoubleArrayList();
        double[] vals = new double[(int) x.size()];
        cern.colt.list.tdouble.DoubleArrayList sorted = new cern.colt.list.tdouble.DoubleArrayList(vals);

        // compute distinct values of x
        x.toArray(vals); // copy x into vals
        sorted.sort();
        cern.jet.stat.tdouble.DoubleDescriptive.frequencies(sorted, distinct, null);
        // since bins are right-open [from,to) we need an additional dummy bin
        // so that the last distinct value does not fall into the overflow bin
        if (distinct.size() > 0)
            distinct.add(distinct.get(distinct.size() - 1) + epsilon);
        distinct.trimToSize();
        hep.aida.tdouble.DoubleIAxis xaxis = new hep.aida.tdouble.ref.DoubleVariableAxis(distinct.elements());

        // compute distinct values of y
        y.toArray(vals);
        sorted.sort();
        cern.jet.stat.tdouble.DoubleDescriptive.frequencies(sorted, distinct, null);
        // since bins are right-open [from,to) we need an additional dummy bin
        // so that the last distinct value does not fall into the overflow bin
        if (distinct.size() > 0)
            distinct.add(distinct.get(distinct.size() - 1) + epsilon);
        distinct.trimToSize();
        hep.aida.tdouble.DoubleIAxis yaxis = new hep.aida.tdouble.ref.DoubleVariableAxis(distinct.elements());

        hep.aida.tdouble.DoubleIHistogram2D histo = new hep.aida.tdouble.ref.DoubleHistogram2D("Cube", xaxis, yaxis);
        return histogram(histo, x, y, weights);
    }

    /**
     * 3-d OLAP cube operator; Fills all cells of the given vectors into the
     * given histogram. If you use hep.aida.ref.Converter.toString(histo) on the
     * result, the OLAP cube of x-"column" vs. y-"column" vs. z-"column",
     * summing the weights "column" will be printed. For example, aggregate
     * sales by product by region by time.
     * 
     * Computes the distinct values of x and y and z, yielding histogram axes
     * that capture one distinct value per bin. Then fills the histogram.
     * 
     * @return the histogram containing the cube.
     * @throws IllegalArgumentException
     *             if
     *             x.size() != y.size() || x.size() != z.size() || x.size() != weights.size()
     *             .
     */
    public static hep.aida.tdouble.DoubleIHistogram3D cube(DoubleMatrix1D x, DoubleMatrix1D y, DoubleMatrix1D z,
            DoubleMatrix1D weights) {
        if (x.size() != y.size() || x.size() != z.size() || x.size() != weights.size())
            throw new IllegalArgumentException("vectors must have same size");

        double epsilon = 1.0E-5f;
        cern.colt.list.tdouble.DoubleArrayList distinct = new cern.colt.list.tdouble.DoubleArrayList();
        double[] vals = new double[(int) x.size()];
        cern.colt.list.tdouble.DoubleArrayList sorted = new cern.colt.list.tdouble.DoubleArrayList(vals);

        // compute distinct values of x
        x.toArray(vals); // copy x into vals
        sorted.sort();
        cern.jet.stat.tdouble.DoubleDescriptive.frequencies(sorted, distinct, null);
        // since bins are right-open [from,to) we need an additional dummy bin
        // so that the last distinct value does not fall into the overflow bin
        if (distinct.size() > 0)
            distinct.add(distinct.get(distinct.size() - 1) + epsilon);
        distinct.trimToSize();
        hep.aida.tdouble.DoubleIAxis xaxis = new hep.aida.tdouble.ref.DoubleVariableAxis(distinct.elements());

        // compute distinct values of y
        y.toArray(vals);
        sorted.sort();
        cern.jet.stat.tdouble.DoubleDescriptive.frequencies(sorted, distinct, null);
        // since bins are right-open [from,to) we need an additional dummy bin
        // so that the last distinct value does not fall into the overflow bin
        if (distinct.size() > 0)
            distinct.add(distinct.get(distinct.size() - 1) + epsilon);
        distinct.trimToSize();
        hep.aida.tdouble.DoubleIAxis yaxis = new hep.aida.tdouble.ref.DoubleVariableAxis(distinct.elements());

        // compute distinct values of z
        z.toArray(vals);
        sorted.sort();
        cern.jet.stat.tdouble.DoubleDescriptive.frequencies(sorted, distinct, null);
        // since bins are right-open [from,to) we need an additional dummy bin
        // so that the last distinct value does not fall into the overflow bin
        if (distinct.size() > 0)
            distinct.add(distinct.get(distinct.size() - 1) + epsilon);
        distinct.trimToSize();
        hep.aida.tdouble.DoubleIAxis zaxis = new hep.aida.tdouble.ref.DoubleVariableAxis(distinct.elements());

        hep.aida.tdouble.DoubleIHistogram3D histo = new hep.aida.tdouble.ref.DoubleHistogram3D("Cube", xaxis, yaxis,
                zaxis);
        return histogram(histo, x, y, z, weights);
    }

    /**
     * Demonstrates usage of this class.
     */
    public static void demo1() {
        double[][] values = { { 1, 2, 3 }, { 2, 4, 6 }, { 3, 6, 9 }, { 4, -8, -10 } };
        DoubleFactory2D factory = DoubleFactory2D.dense;
        DoubleMatrix2D A = factory.make(values);
        System.out.println("\n\nmatrix=" + A);
        System.out.println("\ncovar1=" + covariance(A));
        // System.out.println(correlation(covariance(A)));
        // System.out.println(distance(A,EUCLID));

        // System.out.println(cern.colt.matrixpattern.Converting.toHTML(A.toString()));
        // System.out.println(cern.colt.matrixpattern.Converting.toHTML(covariance(A).toString()));
        // System.out.println(cern.colt.matrixpattern.Converting.toHTML(correlation(covariance(A)).toString()));
        // System.out.println(cern.colt.matrixpattern.Converting.toHTML(distance(A,EUCLID).toString()));
    }

    /**
     * Demonstrates usage of this class.
     */
    public static void demo2(int rows, int columns, boolean print) {
        System.out.println("\n\ninitializing...");
        DoubleFactory2D factory = DoubleFactory2D.dense;
        DoubleMatrix2D A = factory.ascending(rows, columns);
        // double value = 1;
        // DoubleMatrix2D A = factory.make(rows,columns);
        // A.assign(value);

        System.out.println("benchmarking correlation...");

        cern.colt.Timer timer = new cern.colt.Timer().start();
        DoubleMatrix2D corr = correlation(covariance(A));
        timer.stop().display();

        if (print) {
            System.out.println("printing result...");
            System.out.println(corr);
        }
        System.out.println("done.");
    }

    /**
     * Demonstrates usage of this class.
     */
    public static void demo3(VectorVectorFunction norm) {
        double[][] values = { { -0.9611052f, -0.25421095f }, { 0.4308269f, -0.69932648f },
                { -1.2071029f, 0.62030596f }, { 1.5345166f, 0.02135884f }, { -1.1341542f, 0.20388430f } };

        System.out.println("\n\ninitializing...");
        DoubleFactory2D factory = DoubleFactory2D.dense;
        DoubleMatrix2D A = factory.make(values).viewDice();

        System.out.println("\nA=" + A.viewDice());
        System.out.println("\ndist=" + distance(A, norm).viewDice());
    }

    /**
     * Constructs and returns the distance matrix of the given matrix. The
     * distance matrix is a square, symmetric matrix consisting of nothing but
     * distance coefficients. The rows and the columns represent the variables,
     * the cells represent distance coefficients. The diagonal cells (i.e. the
     * distance between a variable and itself) will be zero. Compares two column
     * vectors at a time. Use dice views to compare two row vectors at a time.
     * 
     * @param matrix
     *            any matrix; a column holds the values of a given variable
     *            (vector).
     * @param distanceFunction
     *            (EUCLID, CANBERRA, ..., or any user defined distance function
     *            operating on two vectors).
     * @return the distance matrix (n x n, n=matrix.columns).
     */
    public static DoubleMatrix2D distance(DoubleMatrix2D matrix, VectorVectorFunction distanceFunction) {
        int columns = matrix.columns();
        DoubleMatrix2D distance = new cern.colt.matrix.tdouble.impl.DenseDoubleMatrix2D(columns, columns);

        // cache views
        DoubleMatrix1D[] cols = new DoubleMatrix1D[columns];
        for (int i = columns; --i >= 0;) {
            cols[i] = matrix.viewColumn(i);
        }

        // work out all permutations
        for (int i = columns; --i >= 0;) {
            for (int j = i; --j >= 0;) {
                double d = distanceFunction.apply(cols[i], cols[j]);
                distance.setQuick(i, j, d);
                distance.setQuick(j, i, d); // symmetric
            }
        }
        return distance;
    }

    /**
     * Fills all cells of the given vector into the given histogram.
     * 
     * @return histo (for convenience only).
     */
    public static hep.aida.tdouble.DoubleIHistogram1D histogram(hep.aida.tdouble.DoubleIHistogram1D histo,
            DoubleMatrix1D vector) {
        for (int i = (int) vector.size(); --i >= 0;) {
            histo.fill(vector.getQuick(i));
        }
        return histo;
    }

    /**
     * Fills all cells of the given matrix into the given histogram.
     * 
     * @return histo (for convenience only).
     */
    public static hep.aida.tdouble.DoubleIHistogram1D histogram(final hep.aida.tdouble.DoubleIHistogram1D histo,
            final DoubleMatrix2D matrix) {
        histo.fill_2D((double[]) matrix.elements(), matrix.rows(), matrix.columns(), (int) matrix.index(0, 0), matrix
                .rowStride(), matrix.columnStride());
        return histo;
    }

    /**
     * Splits the given matrix into m x n pieces and computes 1D histogram of
     * each piece.
     * 
     * @return histo (for convenience only).
     */
    public static hep.aida.tdouble.DoubleIHistogram1D[][] histogram(
            final hep.aida.tdouble.DoubleIHistogram1D[][] histo, final DoubleMatrix2D matrix, final int m, final int n) {
        int rows = matrix.rows();
        int columns = matrix.columns();
        if (m >= rows) {
            throw new IllegalArgumentException("Parameter m must be smaller than the number of rows in the matrix");
        }
        if (n >= columns) {
            throw new IllegalArgumentException("Parameter n must be smaller than the number of columns in the matrix");
        }
        final int row_size = rows / m;
        final int col_size = columns / n;
        final int[] height = new int[m];
        final int[] width = new int[n];
        for (int r = 0; r < m - 1; r++) {
            height[r] = row_size;
        }
        height[m - 1] = rows - (m - 1) * row_size;
        for (int c = 0; c < n - 1; c++) {
            width[c] = col_size;
        }
        width[n - 1] = columns - (n - 1) * col_size;

        int nthreads = ConcurrencyUtils.getNumberOfThreads();
        if ((nthreads > 1) && (rows * columns >= ConcurrencyUtils.getThreadsBeginN_2D())) {
            nthreads = Math.min(nthreads, m);
            Future[] futures = new Future[nthreads];
            int k = m / nthreads;
            for (int j = 0; j < nthreads; j++) {
                final int firstRow = j * k;
                final int lastRow = (j == nthreads - 1) ? m : firstRow + k;
                futures[j] = ConcurrencyUtils.submit(new Runnable() {

                    public void run() {
                        DoubleMatrix2D view = null;
                        for (int r = firstRow; r < lastRow; r++) {
                            for (int c = 0; c < n; c++) {
                                view = matrix.viewPart(r * row_size, c * col_size, height[r], width[c]);
                                histo[r][c].fill_2D((double[]) view.elements(), view.rows(), view.columns(), (int) view
                                        .index(0, 0), view.rowStride(), view.columnStride());
                            }
                        }
                    }
                });
            }
            ConcurrencyUtils.waitForCompletion(futures);
        } else {
            DoubleMatrix2D view = null;
            for (int r = 0; r < m; r++) {
                for (int c = 0; c < n; c++) {
                    view = matrix.viewPart(r * row_size, c * col_size, height[r], width[c]);
                    histo[r][c].fill_2D((double[]) view.elements(), view.rows(), view.columns(),
                            (int) view.index(0, 0), view.rowStride(), view.columnStride());
                }
            }
        }
        return histo;
    }

    /**
     * Fills all cells of the given vectors into the given histogram.
     * 
     * @return histo (for convenience only).
     * @throws IllegalArgumentException
     *             if x.size() != y.size().
     */
    public static hep.aida.tdouble.DoubleIHistogram2D histogram(hep.aida.tdouble.DoubleIHistogram2D histo,
            DoubleMatrix1D x, DoubleMatrix1D y) {
        if (x.size() != y.size())
            throw new IllegalArgumentException("vectors must have same size");
        for (int i = (int) x.size(); --i >= 0;) {
            histo.fill(x.getQuick(i), y.getQuick(i));
        }
        return histo;
    }

    /**
     * Fills all cells of the given vectors into the given histogram.
     * 
     * @return histo (for convenience only).
     * @throws IllegalArgumentException
     *             if
     *             x.size() != y.size() || y.size() != weights.size().
     */
    public static hep.aida.tdouble.DoubleIHistogram2D histogram(hep.aida.tdouble.DoubleIHistogram2D histo,
            DoubleMatrix1D x, DoubleMatrix1D y, DoubleMatrix1D weights) {
        if (x.size() != y.size() || y.size() != weights.size())
            throw new IllegalArgumentException("vectors must have same size");
        for (int i = (int) x.size(); --i >= 0;) {
            histo.fill(x.getQuick(i), y.getQuick(i), weights.getQuick(i));
        }
        return histo;
    }

    /**
     * Fills all cells of the given vectors into the given histogram.
     * 
     * @return histo (for convenience only).
     * @throws IllegalArgumentException
     *             if
     *             x.size() != y.size() || x.size() != z.size() || x.size() != weights.size()
     *             .
     */
    public static hep.aida.tdouble.DoubleIHistogram3D histogram(hep.aida.tdouble.DoubleIHistogram3D histo,
            DoubleMatrix1D x, DoubleMatrix1D y, DoubleMatrix1D z, DoubleMatrix1D weights) {
        if (x.size() != y.size() || x.size() != z.size() || x.size() != weights.size())
            throw new IllegalArgumentException("vectors must have same size");
        for (int i = (int) x.size(); --i >= 0;) {
            histo.fill(x.getQuick(i), y.getQuick(i), z.getQuick(i), weights.getQuick(i));
        }
        return histo;
    }

    /**
     * Benchmarks covariance computation.
     */
    public static void main(String[] args) {
        int rows = Integer.parseInt(args[0]);
        int columns = Integer.parseInt(args[1]);
        boolean print = args[2].equals("print");
        demo2(rows, columns, print);
    }

    /**
     * Constructs and returns a sampling view with a size of
     * round(matrix.size() * fraction). Samples "without replacement"
     * from the uniform distribution.
     * 
     * @param matrix
     *            any matrix.
     * @param fraction
     *            the percentage to be included in the view.
     * @param randomGenerator
     *            a uniform random number generator; set this parameter to
     *            null to use a default generator seeded with the
     *            current time.
     * @return the sampling view.
     * @throws IllegalArgumentException
     *             if
     *             ! (0 <= rowFraction <= 1 && 0 <= columnFraction <= 1)
     *             .
     * @see cern.jet.random.tdouble.sampling.DoubleRandomSampler
     */
    public static DoubleMatrix1D viewSample(DoubleMatrix1D matrix, double fraction, DoubleRandomEngine randomGenerator) {
        // check preconditions and allow for a little tolerance
        double epsilon = 1e-05f;
        if (fraction < 0 - epsilon || fraction > 1 + epsilon)
            throw new IllegalArgumentException();
        if (fraction < 0)
            fraction = 0;
        if (fraction > 1)
            fraction = 1;

        // random generator seeded with current time
        if (randomGenerator == null)
            randomGenerator = new cern.jet.random.tdouble.engine.DoubleMersenneTwister((int) System.currentTimeMillis());

        int ncolumns = (int) Math.round(matrix.size() * fraction);
        int max = ncolumns;
        long[] selected = new long[max]; // sampler works on long's, not
        // int's

        // sample
        int n = ncolumns;
        int N = (int) matrix.size();
        cern.jet.random.tdouble.sampling.DoubleRandomSampler.sample(n, N, n, 0, selected, 0, randomGenerator);
        int[] selectedCols = new int[n];
        for (int i = 0; i < n; i++)
            selectedCols[i] = (int) selected[i];

        return matrix.viewSelection(selectedCols);
    }

    /**
     * Constructs and returns a sampling view with
     * round(matrix.rows() * rowFraction) rows and
     * round(matrix.columns() * columnFraction) columns. Samples
     * "without replacement". Rows and columns are randomly chosen from the
     * uniform distribution. Examples:
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     * matrix rowFraction=0.2

     columnFraction=0.2 rowFraction=0.2

     columnFraction=1.0 rowFraction=1.0

     columnFraction=0.2 
 10 x 10 matrix

      1  2  3  4  5  6  7  8  9  10

     11 12 13 14 15 16 17 18 19  20

     21 22 23 24 25 26 27 28 29  30

     31 32 33 34 35 36 37 38 39  40

     41 42 43 44 45 46 47 48 49  50

     51 52 53 54 55 56 57 58 59  60

     61 62 63 64 65 66 67 68 69  70

     71 72 73 74 75 76 77 78 79  80

     81 82 83 84 85 86 87 88 89  90

     91 92 93 94 95 96 97 98 99 100 
      2 x 2 matrix

     43 50

     53 60  2 x 10 matrix

     41 42 43 44 45 46 47 48 49  50

     91 92 93 94 95 96 97 98 99 100 
      10 x 2 matrix

      4  8

     14 18

     24 28

     34 38

     44 48

     54 58

     64 68

     74 78

     84 88

     94 98 
     * 
     * @param matrix
     *            any matrix.
     * @param rowFraction
     *            the percentage of rows to be included in the view.
     * @param columnFraction
     *            the percentage of columns to be included in the view.
     * @param randomGenerator
     *            a uniform random number generator; set this parameter to
     *            null to use a default generator seeded with the
     *            current time.
     * @return the sampling view.
     * @throws IllegalArgumentException
     *             if
     *             ! (0 <= rowFraction <= 1 && 0 <= columnFraction <= 1)
     *             .
     * @see cern.jet.random.tdouble.sampling.DoubleRandomSampler
     */
    public static DoubleMatrix2D viewSample(DoubleMatrix2D matrix, double rowFraction, double columnFraction,
            DoubleRandomEngine randomGenerator) {
        // check preconditions and allow for a little tolerance
        double epsilon = 1e-05f;
        if (rowFraction < 0 - epsilon || rowFraction > 1 + epsilon)
            throw new IllegalArgumentException();
        if (rowFraction < 0)
            rowFraction = 0;
        if (rowFraction > 1)
            rowFraction = 1;

        if (columnFraction < 0 - epsilon || columnFraction > 1 + epsilon)
            throw new IllegalArgumentException();
        if (columnFraction < 0)
            columnFraction = 0;
        if (columnFraction > 1)
            columnFraction = 1;

        // random generator seeded with current time
        if (randomGenerator == null)
            randomGenerator = new cern.jet.random.tdouble.engine.DoubleMersenneTwister((int) System.currentTimeMillis());

        int nrows = (int) Math.round(matrix.rows() * rowFraction);
        int ncolumns = (int) Math.round(matrix.columns() * columnFraction);
        int max = Math.max(nrows, ncolumns);
        long[] selected = new long[max]; // sampler works on long's, not
        // int's

        // sample rows
        int n = nrows;
        int N = matrix.rows();
        cern.jet.random.tdouble.sampling.DoubleRandomSampler.sample(n, N, n, 0, selected, 0, randomGenerator);
        int[] selectedRows = new int[n];
        for (int i = 0; i < n; i++)
            selectedRows[i] = (int) selected[i];

        // sample columns
        n = ncolumns;
        N = matrix.columns();
        cern.jet.random.tdouble.sampling.DoubleRandomSampler.sample(n, N, n, 0, selected, 0, randomGenerator);
        int[] selectedCols = new int[n];
        for (int i = 0; i < n; i++)
            selectedCols[i] = (int) selected[i];

        return matrix.viewSelection(selectedRows, selectedCols);
    }

    /**
     * Constructs and returns a sampling view with
     * round(matrix.slices() * sliceFraction) slices and
     * round(matrix.rows() * rowFraction) rows and
     * round(matrix.columns() * columnFraction) columns. Samples
     * "without replacement". Slices, rows and columns are randomly chosen from
     * the uniform distribution.
     * 
     * @param matrix
     *            any matrix.
     * @param sliceFraction
     *            the percentage of slices to be included in the view.
     * @param rowFraction
     *            the percentage of rows to be included in the view.
     * @param columnFraction
     *            the percentage of columns to be included in the view.
     * @param randomGenerator
     *            a uniform random number generator; set this parameter to
     *            null to use a default generator seeded with the
     *            current time.
     * @return the sampling view.
     * @throws IllegalArgumentException
     *             if
     *             ! (0 <= sliceFraction <= 1 && 0 <= rowFraction <= 1 && 0 <= columnFraction <= 1)
     *             .
     * @see cern.jet.random.tdouble.sampling.DoubleRandomSampler
     */
    public static DoubleMatrix3D viewSample(DoubleMatrix3D matrix, double sliceFraction, double rowFraction,
            double columnFraction, DoubleRandomEngine randomGenerator) {
        // check preconditions and allow for a little tolerance
        double epsilon = 1e-05f;
        if (sliceFraction < 0 - epsilon || sliceFraction > 1 + epsilon)
            throw new IllegalArgumentException();
        if (sliceFraction < 0)
            sliceFraction = 0;
        if (sliceFraction > 1)
            sliceFraction = 1;

        if (rowFraction < 0 - epsilon || rowFraction > 1 + epsilon)
            throw new IllegalArgumentException();
        if (rowFraction < 0)
            rowFraction = 0;
        if (rowFraction > 1)
            rowFraction = 1;

        if (columnFraction < 0 - epsilon || columnFraction > 1 + epsilon)
            throw new IllegalArgumentException();
        if (columnFraction < 0)
            columnFraction = 0;
        if (columnFraction > 1)
            columnFraction = 1;

        // random generator seeded with current time
        if (randomGenerator == null)
            randomGenerator = new cern.jet.random.tdouble.engine.DoubleMersenneTwister((int) System.currentTimeMillis());

        int nslices = (int) Math.round(matrix.slices() * sliceFraction);
        int nrows = (int) Math.round(matrix.rows() * rowFraction);
        int ncolumns = (int) Math.round(matrix.columns() * columnFraction);
        int max = Math.max(nslices, Math.max(nrows, ncolumns));
        long[] selected = new long[max]; // sampler works on long's, not
        // int's

        // sample slices
        int n = nslices;
        int N = matrix.slices();
        cern.jet.random.tdouble.sampling.DoubleRandomSampler.sample(n, N, n, 0, selected, 0, randomGenerator);
        int[] selectedSlices = new int[n];
        for (int i = 0; i < n; i++)
            selectedSlices[i] = (int) selected[i];

        // sample rows
        n = nrows;
        N = matrix.rows();
        cern.jet.random.tdouble.sampling.DoubleRandomSampler.sample(n, N, n, 0, selected, 0, randomGenerator);
        int[] selectedRows = new int[n];
        for (int i = 0; i < n; i++)
            selectedRows[i] = (int) selected[i];

        // sample columns
        n = ncolumns;
        N = matrix.columns();
        cern.jet.random.tdouble.sampling.DoubleRandomSampler.sample(n, N, n, 0, selected, 0, randomGenerator);
        int[] selectedCols = new int[n];
        for (int i = 0; i < n; i++)
            selectedCols[i] = (int) selected[i];

        return matrix.viewSelection(selectedSlices, selectedRows, selectedCols);
    }
}
`A`	`covariance(A)`	`correlation(covariance(A))`	`distance(A,EUCLID)`
`4 x 3 matrix 1 2 3 2 4 6 3 6 9 4 -8 -10`	`3 x 3 matrix 1.25 -3.5 -4.5 -3.5 29 39 -4.5 39 52.5`	`3 x 3 matrix 1 -0.581318 -0.555492 -0.581318 1 0.999507 -0.555492 0.999507 1`	`3 x 3 matrix 0 12.569805 15.874508 12.569805 0 4.242641 15.874508 4.242641 0`
`matrix`	`rowFraction=0.2 columnFraction=0.2`	`rowFraction=0.2 columnFraction=1.0`	`rowFraction=1.0 columnFraction=0.2`
`10 x 10 matrix 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100`	`2 x 2 matrix 43 50 53 60`	`2 x 10 matrix 41 42 43 44 45 46 47 48 49 50 91 92 93 94 95 96 97 98 99 100`	`10 x 2 matrix 4 8 14 18 24 28 34 38 44 48 54 58 64 68 74 78 84 88 94 98`