All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.dimajix.shaded.guava.math.PairedStats Maven / Gradle / Ivy

/*
 * Copyright (C) 2012 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.dimajix.shaded.guava.math;

import static com.dimajix.shaded.guava.base.Preconditions.checkArgument;
import static com.dimajix.shaded.guava.base.Preconditions.checkNotNull;
import static com.dimajix.shaded.guava.base.Preconditions.checkState;
import static java.lang.Double.NaN;
import static java.lang.Double.doubleToLongBits;
import static java.lang.Double.isNaN;

import com.dimajix.shaded.guava.annotations.GwtIncompatible;
import com.dimajix.shaded.guava.annotations.J2ktIncompatible;
import com.dimajix.shaded.guava.base.MoreObjects;
import com.dimajix.shaded.guava.base.Objects;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import javax.annotation.CheckForNull;

/**
 * An immutable value object capturing some basic statistics about a collection of paired double
 * values (e.g. points on a plane). Build instances with {@link PairedStatsAccumulator#snapshot}.
 *
 * @author Pete Gillin
 * @since 20.0
 */
@J2ktIncompatible
@GwtIncompatible
@ElementTypesAreNonnullByDefault
public final class PairedStats implements Serializable {

  private final Stats xStats;
  private final Stats yStats;
  private final double sumOfProductsOfDeltas;

  /**
   * Internal constructor. Users should use {@link PairedStatsAccumulator#snapshot}.
   *
   * 

To ensure that the created instance obeys its contract, the parameters should satisfy the * following constraints. This is the callers responsibility and is not enforced here. * *

    *
  • Both {@code xStats} and {@code yStats} must have the same {@code count}. *
  • If that {@code count} is 1, {@code sumOfProductsOfDeltas} must be exactly 0.0. *
  • If that {@code count} is more than 1, {@code sumOfProductsOfDeltas} must be finite. *
*/ PairedStats(Stats xStats, Stats yStats, double sumOfProductsOfDeltas) { this.xStats = xStats; this.yStats = yStats; this.sumOfProductsOfDeltas = sumOfProductsOfDeltas; } /** Returns the number of pairs in the dataset. */ public long count() { return xStats.count(); } /** Returns the statistics on the {@code x} values alone. */ public Stats xStats() { return xStats; } /** Returns the statistics on the {@code y} values alone. */ public Stats yStats() { return yStats; } /** * Returns the population covariance of the values. The count must be non-zero. * *

This is guaranteed to return zero if the dataset contains a single pair of finite values. It * is not guaranteed to return zero when the dataset consists of the same pair of values multiple * times, due to numerical errors. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty */ public double populationCovariance() { checkState(count() != 0); return sumOfProductsOfDeltas / count(); } /** * Returns the sample covariance of the values. The count must be greater than one. * *

This is not guaranteed to return zero when the dataset consists of the same pair of values * multiple times, due to numerical errors. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty or contains a single pair of values */ public double sampleCovariance() { checkState(count() > 1); return sumOfProductsOfDeltas / (count() - 1); } /** * Returns the Pearson's or * product-moment correlation coefficient of the values. The count must greater than one, and * the {@code x} and {@code y} values must both have non-zero population variance (i.e. {@code * xStats().populationVariance() > 0.0 && yStats().populationVariance() > 0.0}). The result is not * guaranteed to be exactly +/-1 even when the data are perfectly (anti-)correlated, due to * numerical errors. However, it is guaranteed to be in the inclusive range [-1, +1]. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty or contains a single pair of values, or * either the {@code x} and {@code y} dataset has zero population variance */ public double pearsonsCorrelationCoefficient() { checkState(count() > 1); if (isNaN(sumOfProductsOfDeltas)) { return NaN; } double xSumOfSquaresOfDeltas = xStats().sumOfSquaresOfDeltas(); double ySumOfSquaresOfDeltas = yStats().sumOfSquaresOfDeltas(); checkState(xSumOfSquaresOfDeltas > 0.0); checkState(ySumOfSquaresOfDeltas > 0.0); // The product of two positive numbers can be zero if the multiplication underflowed. We // force a positive value by effectively rounding up to MIN_VALUE. double productOfSumsOfSquaresOfDeltas = ensurePositive(xSumOfSquaresOfDeltas * ySumOfSquaresOfDeltas); return ensureInUnitRange(sumOfProductsOfDeltas / Math.sqrt(productOfSumsOfSquaresOfDeltas)); } /** * Returns a linear transformation giving the best fit to the data according to Ordinary Least Squares linear * regression of {@code y} as a function of {@code x}. The count must be greater than one, and * either the {@code x} or {@code y} data must have a non-zero population variance (i.e. {@code * xStats().populationVariance() > 0.0 || yStats().populationVariance() > 0.0}). The result is * guaranteed to be horizontal if there is variance in the {@code x} data but not the {@code y} * data, and vertical if there is variance in the {@code y} data but not the {@code x} data. * *

This fit minimizes the root-mean-square error in {@code y} as a function of {@code x}. This * error is defined as the square root of the mean of the squares of the differences between the * actual {@code y} values of the data and the values predicted by the fit for the {@code x} * values (i.e. it is the square root of the mean of the squares of the vertical distances between * the data points and the best fit line). For this fit, this error is a fraction {@code sqrt(1 - * R*R)} of the population standard deviation of {@code y}, where {@code R} is the Pearson's * correlation coefficient (as given by {@link #pearsonsCorrelationCoefficient()}). * *

The corresponding root-mean-square error in {@code x} as a function of {@code y} is a * fraction {@code sqrt(1/(R*R) - 1)} of the population standard deviation of {@code x}. This fit * does not normally minimize that error: to do that, you should swap the roles of {@code x} and * {@code y}. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link * LinearTransformation#forNaN()}. * * @throws IllegalStateException if the dataset is empty or contains a single pair of values, or * both the {@code x} and {@code y} dataset must have zero population variance */ public LinearTransformation leastSquaresFit() { checkState(count() > 1); if (isNaN(sumOfProductsOfDeltas)) { return LinearTransformation.forNaN(); } double xSumOfSquaresOfDeltas = xStats.sumOfSquaresOfDeltas(); if (xSumOfSquaresOfDeltas > 0.0) { if (yStats.sumOfSquaresOfDeltas() > 0.0) { return LinearTransformation.mapping(xStats.mean(), yStats.mean()) .withSlope(sumOfProductsOfDeltas / xSumOfSquaresOfDeltas); } else { return LinearTransformation.horizontal(yStats.mean()); } } else { checkState(yStats.sumOfSquaresOfDeltas() > 0.0); return LinearTransformation.vertical(xStats.mean()); } } /** * {@inheritDoc} * *

Note: This tests exact equality of the calculated statistics, including the floating * point values. Two instances are guaranteed to be considered equal if one is copied from the * other using {@code second = new PairedStatsAccumulator().addAll(first).snapshot()}, if both * were obtained by calling {@code snapshot()} on the same {@link PairedStatsAccumulator} without * adding any values in between the two calls, or if one is obtained from the other after * round-tripping through java serialization. However, floating point rounding errors mean that it * may be false for some instances where the statistics are mathematically equal, including * instances constructed from the same values in a different order... or (in the general case) * even in the same order. (It is guaranteed to return true for instances constructed from the * same values in the same order if {@code strictfp} is in effect, or if the system architecture * guarantees {@code strictfp}-like semantics.) */ @Override public boolean equals(@CheckForNull Object obj) { if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } PairedStats other = (PairedStats) obj; return xStats.equals(other.xStats) && yStats.equals(other.yStats) && doubleToLongBits(sumOfProductsOfDeltas) == doubleToLongBits(other.sumOfProductsOfDeltas); } /** * {@inheritDoc} * *

Note: This hash code is consistent with exact equality of the calculated statistics, * including the floating point values. See the note on {@link #equals} for details. */ @Override public int hashCode() { return Objects.hashCode(xStats, yStats, sumOfProductsOfDeltas); } @Override public String toString() { if (count() > 0) { return MoreObjects.toStringHelper(this) .add("xStats", xStats) .add("yStats", yStats) .add("populationCovariance", populationCovariance()) .toString(); } else { return MoreObjects.toStringHelper(this) .add("xStats", xStats) .add("yStats", yStats) .toString(); } } double sumOfProductsOfDeltas() { return sumOfProductsOfDeltas; } private static double ensurePositive(double value) { if (value > 0.0) { return value; } else { return Double.MIN_VALUE; } } private static double ensureInUnitRange(double value) { if (value >= 1.0) { return 1.0; } if (value <= -1.0) { return -1.0; } return value; } // Serialization helpers /** The size of byte array representation in bytes. */ private static final int BYTES = Stats.BYTES * 2 + Double.SIZE / Byte.SIZE; /** * Gets a byte array representation of this instance. * *

Note: No guarantees are made regarding stability of the representation between * versions. */ public byte[] toByteArray() { ByteBuffer buffer = ByteBuffer.allocate(BYTES).order(ByteOrder.LITTLE_ENDIAN); xStats.writeTo(buffer); yStats.writeTo(buffer); buffer.putDouble(sumOfProductsOfDeltas); return buffer.array(); } /** * Creates a {@link PairedStats} instance from the given byte representation which was obtained by * {@link #toByteArray}. * *

Note: No guarantees are made regarding stability of the representation between * versions. */ public static PairedStats fromByteArray(byte[] byteArray) { checkNotNull(byteArray); checkArgument( byteArray.length == BYTES, "Expected PairedStats.BYTES = %s, got %s", BYTES, byteArray.length); ByteBuffer buffer = ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN); Stats xStats = Stats.readFrom(buffer); Stats yStats = Stats.readFrom(buffer); double sumOfProductsOfDeltas = buffer.getDouble(); return new PairedStats(xStats, yStats, sumOfProductsOfDeltas); } private static final long serialVersionUID = 0; }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy