All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.verifier.framework.ExtendedVerification Maven / Gradle / Ivy

There is a newer version: 0.291
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.verifier.framework;

import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.jdbc.QueryStats;
import com.facebook.presto.sql.tree.QualifiedName;
import com.facebook.presto.sql.tree.Query;
import com.facebook.presto.verifier.checksum.ChecksumResult;
import com.facebook.presto.verifier.checksum.ChecksumValidator;
import com.facebook.presto.verifier.prestoaction.QueryActions;
import com.facebook.presto.verifier.prestoaction.SqlExceptionClassifier;
import com.facebook.presto.verifier.resolver.FailureResolverManager;
import com.facebook.presto.verifier.rewrite.QueryRewriter;
import com.facebook.presto.verifier.source.SnapshotQueryConsumer;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.util.concurrent.ListeningExecutorService;

import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.OptionalLong;
import java.util.Set;

import static com.facebook.presto.verifier.framework.DataMatchResult.DataType.BUCKET_DATA;
import static com.facebook.presto.verifier.framework.DataMatchResult.DataType.PARTITION_DATA;
import static com.facebook.presto.verifier.framework.DataMatchResult.MatchType.BUCKET_COUNT_MISMATCH;
import static com.facebook.presto.verifier.framework.DataMatchResult.MatchType.MATCH;
import static com.facebook.presto.verifier.framework.DataMatchResult.MatchType.PARTITION_COUNT_MISMATCH;
import static com.facebook.presto.verifier.framework.DataVerificationUtil.getColumns;
import static com.facebook.presto.verifier.framework.DataVerificationUtil.match;
import static com.facebook.presto.verifier.framework.QueryStage.CONTROL_BUCKET_CHECKSUM;
import static com.facebook.presto.verifier.framework.QueryStage.CONTROL_PARTITION_CHECKSUM;
import static com.facebook.presto.verifier.framework.QueryStage.TEST_BUCKET_CHECKSUM;
import static com.facebook.presto.verifier.framework.QueryStage.TEST_PARTITION_CHECKSUM;
import static com.facebook.presto.verifier.framework.VerifierConfig.QUERY_BANK_MODE;
import static com.facebook.presto.verifier.framework.VerifierUtil.callAndConsume;

public class ExtendedVerification
        extends DataVerification
{
    public ExtendedVerification(
            QueryActions queryActions,
            SourceQuery sourceQuery,
            QueryRewriter queryRewriter,
            DeterminismAnalyzer determinismAnalyzer,
            FailureResolverManager failureResolverManager,
            SqlExceptionClassifier exceptionClassifier,
            VerificationContext verificationContext,
            VerifierConfig verifierConfig,
            TypeManager typeManager,
            ChecksumValidator checksumValidator,
            ListeningExecutorService executor,
            SnapshotQueryConsumer snapshotQueryConsumer,
            Map snapshotQueries)
    {
        super(
                queryActions,
                sourceQuery,
                queryRewriter,
                determinismAnalyzer,
                failureResolverManager,
                exceptionClassifier,
                verificationContext,
                verifierConfig,
                typeManager,
                checksumValidator,
                executor,
                snapshotQueryConsumer,
                snapshotQueries);
    }

    @Override
    public DataMatchResult verify(
            QueryObjectBundle control,
            QueryObjectBundle test,
            Optional> controlQueryResult,
            Optional> testQueryResult,
            ChecksumQueryContext controlChecksumQueryContext,
            ChecksumQueryContext testChecksumQueryContext)
    {
        // 0. Data verification
        DataMatchResult dataMatchResult = super.verify(control, test, controlQueryResult, testQueryResult, controlChecksumQueryContext, testChecksumQueryContext);
        if (!dataMatchResult.isMatched()) {
            return dataMatchResult;
        }

        if (skipControl || saveSnapshot || runningMode.equals(QUERY_BANK_MODE)) {
            // Extended verification doesn't support query bank mode for now.
            return dataMatchResult;
        }
        List controlColumns = getColumns(getHelperAction(), typeManager, control.getObjectName());
        List testColumns = getColumns(getHelperAction(), typeManager, test.getObjectName());
        List controlPartitionColumns;
        List testPartitionColumns;
        try {
            controlPartitionColumns = getColumns(getHelperAction(), typeManager, formPartitionTableName(control.getObjectName()));
            testPartitionColumns = getColumns(getHelperAction(), typeManager, formPartitionTableName(test.getObjectName()));
        }
        catch (Throwable e) {
            return dataMatchResult;
        }
        List controlDataColumns = getDataColumn(controlColumns, ImmutableSet.copyOf(controlPartitionColumns));
        List testDataColumns = getDataColumn(testColumns, ImmutableSet.copyOf(testPartitionColumns));

        // 1. Partition verification
        Optional partitionMatchResult = verifyPartition(
                control,
                test,
                controlPartitionColumns,
                testPartitionColumns,
                controlDataColumns,
                testDataColumns,
                controlChecksumQueryContext,
                testChecksumQueryContext);

        // 2. Bucket verification
        Optional bucketMatchResult = verifyBucket(
                control,
                test,
                controlPartitionColumns,
                testPartitionColumns,
                controlDataColumns,
                testDataColumns,
                controlChecksumQueryContext,
                testChecksumQueryContext);

        if (partitionMatchResult.isPresent() && !partitionMatchResult.get().isMatched()) {
            return partitionMatchResult.get();
        }
        if (bucketMatchResult.isPresent() && !bucketMatchResult.get().isMatched()) {
            return bucketMatchResult.get();
        }
        return dataMatchResult;
    }

    private Optional verifyPartition(
            QueryObjectBundle control,
            QueryObjectBundle test,
            List controlPartitionColumns,
            List testPartitionColumns,
            List controlDataColumns,
            List testDataColumns,
            ChecksumQueryContext controlChecksumQueryContext,
            ChecksumQueryContext testChecksumQueryContext)
    {
        List controlPartitionChecksum = runPartitionChecksum(control, controlPartitionColumns, controlDataColumns, controlChecksumQueryContext, CONTROL_PARTITION_CHECKSUM);
        List testPartitionChecksum = runPartitionChecksum(test, testPartitionColumns, testDataColumns, testChecksumQueryContext, TEST_PARTITION_CHECKSUM);
        if (controlPartitionChecksum.size() != testPartitionChecksum.size()) {
            return Optional.of(new DataMatchResult(
                    PARTITION_DATA,
                    PARTITION_COUNT_MISMATCH,
                    Optional.empty(),
                    OptionalLong.of(controlPartitionChecksum.size()),
                    OptionalLong.of(testPartitionChecksum.size()),
                    ImmutableList.of()));
        }
        for (int i = 0; i < controlPartitionChecksum.size(); i++) {
            DataMatchResult partitionMatchResult = match(
                    PARTITION_DATA,
                    checksumValidator,
                    controlDataColumns,
                    testDataColumns,
                    controlPartitionChecksum.get(i),
                    testPartitionChecksum.get(i));
            if (!partitionMatchResult.isMatched()) {
                return Optional.of(partitionMatchResult);
            }
        }
        return Optional.of(new DataMatchResult(
                PARTITION_DATA,
                MATCH,
                Optional.empty(),
                OptionalLong.of(controlPartitionChecksum.size()),
                OptionalLong.of(testPartitionChecksum.size()),
                ImmutableList.of()));
    }

    private Optional verifyBucket(
            QueryObjectBundle control,
            QueryObjectBundle test,
            List controlPartitionColumns,
            List testPartitionColumns,
            List controlDataColumns,
            List testDataColumns,
            ChecksumQueryContext controlChecksumQueryContext,
            ChecksumQueryContext testChecksumQueryContext)
    {
        List controlBucketChecksum = null;
        List testBucketChecksum = null;
        try {
            controlBucketChecksum = runBucketChecksum(control, controlPartitionColumns, controlDataColumns, controlChecksumQueryContext, CONTROL_BUCKET_CHECKSUM);
            testBucketChecksum = runBucketChecksum(test, testPartitionColumns, testDataColumns, testChecksumQueryContext, TEST_BUCKET_CHECKSUM);
        }
        catch (Throwable e) {
            return Optional.empty();
        }
        if (controlBucketChecksum.size() != testBucketChecksum.size()) {
            return Optional.of(new DataMatchResult(
                    BUCKET_DATA,
                    BUCKET_COUNT_MISMATCH,
                    Optional.empty(),
                    OptionalLong.of(controlBucketChecksum.size()),
                    OptionalLong.of(testBucketChecksum.size()),
                    ImmutableList.of()));
        }
        for (int i = 0; i < controlBucketChecksum.size(); i++) {
            DataMatchResult bucketMatchResult = match(
                    BUCKET_DATA,
                    checksumValidator,
                    controlDataColumns,
                    testDataColumns,
                    controlBucketChecksum.get(i),
                    testBucketChecksum.get(i));
            if (!bucketMatchResult.isMatched()) {
                return Optional.of(bucketMatchResult);
            }
        }
        return Optional.of(new DataMatchResult(
                BUCKET_DATA,
                MATCH,
                Optional.empty(),
                OptionalLong.of(controlBucketChecksum.size()),
                OptionalLong.of(testBucketChecksum.size()),
                ImmutableList.of()));
    }

    // Returns the hidden system table name "tableName$partitions".
    private QualifiedName formPartitionTableName(QualifiedName tableName)
    {
        int nameSizes = tableName.getParts().size();
        ImmutableList.Builder nameBuilder = ImmutableList.builder();
        for (int index = 0; index < nameSizes; index++) {
            String part = null;
            if (index != nameSizes - 1) {
                part = tableName.getParts().get(index);
            }
            else {
                part = tableName.getParts().get(index) + "$partitions";
            }
            nameBuilder.add(part);
        }
        return QualifiedName.of(nameBuilder.build());
    }

    private List getDataColumn(List columns, Set partitionColumns)
    {
        ImmutableList.Builder dataColumns = ImmutableList.builder();
        for (Column column : columns) {
            if (!partitionColumns.contains(column)) {
                dataColumns.add(column);
            }
        }
        return dataColumns.build();
    }

    private List runPartitionChecksum(
            QueryObjectBundle bundle,
            List partitionColumns,
            List dataColumns,
            ChecksumQueryContext checksumQueryContext,
            QueryStage queryStage)
    {
        Query partitionChecksumQuery = checksumValidator.generatePartitionChecksumQuery(bundle.getObjectName(), dataColumns, partitionColumns, bundle.getPartitionsPredicate());
        checksumQueryContext.setPartitionChecksumQuery(formatSql(partitionChecksumQuery));
        return callAndConsume(
                () -> getHelperAction().execute(partitionChecksumQuery, queryStage, ChecksumResult::fromResultSet),
                stats -> stats.getQueryStats().map(QueryStats::getQueryId).ifPresent(checksumQueryContext::setPartitionChecksumQueryId)).getResults();
    }

    private List runBucketChecksum(
            QueryObjectBundle bundle,
            List partitionColumns,
            List dataColumns,
            ChecksumQueryContext checksumQueryContext,
            QueryStage queryStage)
    {
        Query bucketChecksumQuery = checksumValidator.generateBucketChecksumQuery(bundle.getObjectName(), partitionColumns, dataColumns, bundle.getPartitionsPredicate());
        List checksumResults = callAndConsume(
                () -> getHelperAction().execute(bucketChecksumQuery, queryStage, ChecksumResult::fromResultSet),
                stats -> stats.getQueryStats().map(QueryStats::getQueryId).ifPresent(checksumQueryContext::setBucketChecksumQueryId)).getResults();
        checksumQueryContext.setBucketChecksumQuery(formatSql(bucketChecksumQuery));
        return checksumResults;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy