All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.verifier.checksum.FloatingPointColumnValidator Maven / Gradle / Ivy

There is a newer version: 0.290
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.verifier.checksum;

import com.facebook.presto.sql.tree.ArithmeticUnaryExpression;
import com.facebook.presto.sql.tree.Cast;
import com.facebook.presto.sql.tree.ComparisonExpression;
import com.facebook.presto.sql.tree.Expression;
import com.facebook.presto.sql.tree.FunctionCall;
import com.facebook.presto.sql.tree.QualifiedName;
import com.facebook.presto.sql.tree.SingleColumn;
import com.facebook.presto.verifier.framework.Column;
import com.facebook.presto.verifier.framework.VerifierConfig;
import com.google.common.collect.ImmutableList;

import javax.inject.Inject;

import java.util.List;
import java.util.Objects;
import java.util.Optional;

import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.sql.tree.ArithmeticUnaryExpression.Sign.MINUS;
import static com.facebook.presto.sql.tree.ComparisonExpression.Operator.EQUAL;
import static com.facebook.presto.verifier.framework.VerifierUtil.delimitedIdentifier;
import static com.google.common.base.Preconditions.checkArgument;
import static java.lang.Double.isInfinite;
import static java.lang.Double.isNaN;
import static java.lang.Math.abs;
import static java.lang.Math.min;

public class FloatingPointColumnValidator
        implements ColumnValidator
{
    private final double relativeErrorMargin;
    private final double absoluteErrorMargin;

    @Inject
    public FloatingPointColumnValidator(VerifierConfig config)
    {
        this.relativeErrorMargin = config.getRelativeErrorMargin();
        this.absoluteErrorMargin = config.getAbsoluteErrorMargin();
    }

    @Override
    public List generateChecksumColumns(Column column)
    {
        Expression doubleColumn = column.getType().equals(DOUBLE) ? column.getExpression() : new Cast(column.getExpression(), DOUBLE.getDisplayName());
        Expression positiveInfinity = new FunctionCall(QualifiedName.of("infinity"), ImmutableList.of());
        Expression negativeInfinity = new ArithmeticUnaryExpression(MINUS, positiveInfinity);

        return ImmutableList.of(
                new SingleColumn(
                        new FunctionCall(
                                QualifiedName.of("sum"),
                                Optional.empty(),
                                Optional.of(new FunctionCall(QualifiedName.of("is_finite"), ImmutableList.of(column.getExpression()))),
                                Optional.empty(),
                                false,
                                ImmutableList.of(doubleColumn)),
                        Optional.of(delimitedIdentifier(getSumColumnAlias(column)))),
                new SingleColumn(
                        new FunctionCall(
                                QualifiedName.of("count"),
                                Optional.empty(),
                                Optional.of(new FunctionCall(QualifiedName.of("is_nan"), ImmutableList.of(column.getExpression()))),
                                Optional.empty(),
                                false,
                                ImmutableList.of(column.getExpression())),
                        Optional.of(delimitedIdentifier(getNanCountColumnAlias(column)))),
                new SingleColumn(
                        new FunctionCall(
                                QualifiedName.of("count"),
                                Optional.empty(),
                                Optional.of(new ComparisonExpression(EQUAL, column.getExpression(), positiveInfinity)),
                                Optional.empty(),
                                false,
                                ImmutableList.of(column.getExpression())),
                        Optional.of(delimitedIdentifier(getPositiveInfinityCountColumnAlias(column)))),
                new SingleColumn(
                        new FunctionCall(
                                QualifiedName.of("count"),
                                Optional.empty(),
                                Optional.of(new ComparisonExpression(EQUAL, column.getExpression(), negativeInfinity)),
                                Optional.empty(),
                                false,
                                ImmutableList.of(column.getExpression())),
                        Optional.of(delimitedIdentifier(getNegativeInfinityCountColumnAlias(column)))));
    }

    @Override
    public List> validate(Column column, ChecksumResult controlResult, ChecksumResult testResult)
    {
        checkArgument(
                controlResult.getRowCount() == testResult.getRowCount(),
                "Test row count (%s) does not match control row count (%s)",
                testResult.getRowCount(),
                controlResult.getRowCount());

        long rowCount = controlResult.getRowCount();

        FloatingPointColumnChecksum controlChecksum = toColumnChecksum(column, controlResult, rowCount);
        FloatingPointColumnChecksum testChecksum = toColumnChecksum(column, testResult, rowCount);
        return ImmutableList.of(validate(column, controlChecksum, testChecksum, rowCount));
    }

    private static FloatingPointColumnChecksum toColumnChecksum(Column column, ChecksumResult checksumResult, long rowCount)
    {
        return new FloatingPointColumnChecksum(
                checksumResult.getChecksum(getSumColumnAlias(column)),
                (long) checksumResult.getChecksum(getNanCountColumnAlias(column)),
                (long) checksumResult.getChecksum(getPositiveInfinityCountColumnAlias(column)),
                (long) checksumResult.getChecksum(getNegativeInfinityCountColumnAlias(column)),
                rowCount);
    }

    private ColumnMatchResult validate(
            Column column,
            FloatingPointColumnChecksum controlChecksum,
            FloatingPointColumnChecksum testChecksum,
            long rowCount)
    {
        if (!Objects.equals(controlChecksum.getNanCount(), testChecksum.getNanCount()) ||
                !Objects.equals(controlChecksum.getPositiveInfinityCount(), testChecksum.getPositiveInfinityCount()) ||
                !Objects.equals(controlChecksum.getNegativeInfinityCount(), testChecksum.getNegativeInfinityCount())) {
            return new ColumnMatchResult<>(false, column, controlChecksum, testChecksum);
        }

        if (controlChecksum.getSum() == null || testChecksum.getSum() == null) {
            return new ColumnMatchResult<>(controlChecksum.getSum() == null && testChecksum.getSum() == null, column, controlChecksum, testChecksum);
        }

        // Implementation according to http://floating-point-gui.de/errors/comparison/
        double controlSum = (double) controlChecksum.getSum();
        double testSum = (double) testChecksum.getSum();

        // Fail if either sum is NaN or Infinity
        if (isNaN(controlSum) || isNaN(testSum) || isInfinite(controlSum) || isInfinite(testSum)) {
            return new ColumnMatchResult<>(false, column, controlChecksum, testChecksum);
        }

        // Use absolute error margin if either control sum or test sum is 0.
        // Row count won't be zero since otherwise controlSum and testSum will both be null, and this has already been handled above.
        double controlMean = controlSum / rowCount;
        double testMean = testSum / rowCount;
        if (abs(controlMean) < absoluteErrorMargin || abs(testMean) < absoluteErrorMargin) {
            return new ColumnMatchResult<>(abs(controlMean) < absoluteErrorMargin && abs(testMean) < absoluteErrorMargin, column, controlChecksum, testChecksum);
        }

        // Use relative error margin for the common cases
        double difference = abs(controlSum - testSum);
        double relativeError = difference / min((abs(controlSum) + abs(testSum)) / 2, Double.MAX_VALUE);
        return new ColumnMatchResult<>(relativeError < relativeErrorMargin, column, Optional.of("relative error: " + relativeError), controlChecksum, testChecksum);
    }

    private static String getSumColumnAlias(Column column)
    {
        return column.getName() + "$sum";
    }

    private static String getNanCountColumnAlias(Column column)
    {
        return column.getName() + "$nan_count";
    }

    private static String getPositiveInfinityCountColumnAlias(Column column)
    {
        return column.getName() + "$pos_inf_count";
    }

    private static String getNegativeInfinityCountColumnAlias(Column column)
    {
        return column.getName() + "$neg_inf_count";
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy