All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.verifier.framework.DeterminismAnalyzer Maven / Gradle / Ivy

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.verifier.framework;

import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.jdbc.QueryStats;
import com.facebook.presto.sql.tree.AstVisitor;
import com.facebook.presto.sql.tree.Node;
import com.facebook.presto.sql.tree.Query;
import com.facebook.presto.sql.tree.Statement;
import com.facebook.presto.sql.tree.Table;
import com.facebook.presto.verifier.checksum.ChecksumResult;
import com.facebook.presto.verifier.checksum.ChecksumValidator;
import com.facebook.presto.verifier.event.DeterminismAnalysisDetails;
import com.facebook.presto.verifier.event.DeterminismAnalysisRun;
import com.facebook.presto.verifier.prestoaction.PrestoAction;
import com.facebook.presto.verifier.rewrite.QueryRewriter;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSet;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;

import static com.facebook.presto.verifier.framework.ClusterType.CONTROL;
import static com.facebook.presto.verifier.framework.DataVerificationUtil.getColumns;
import static com.facebook.presto.verifier.framework.DataVerificationUtil.match;
import static com.facebook.presto.verifier.framework.DataVerificationUtil.teardownSafely;
import static com.facebook.presto.verifier.framework.DeterminismAnalysis.ANALYSIS_FAILED_DATA_CHANGED;
import static com.facebook.presto.verifier.framework.DeterminismAnalysis.ANALYSIS_FAILED_INCONSISTENT_SCHEMA;
import static com.facebook.presto.verifier.framework.DeterminismAnalysis.ANALYSIS_FAILED_QUERY_FAILURE;
import static com.facebook.presto.verifier.framework.DeterminismAnalysis.DETERMINISTIC;
import static com.facebook.presto.verifier.framework.DeterminismAnalysis.NON_DETERMINISTIC_CATALOG;
import static com.facebook.presto.verifier.framework.DeterminismAnalysis.NON_DETERMINISTIC_COLUMNS;
import static com.facebook.presto.verifier.framework.DeterminismAnalysis.NON_DETERMINISTIC_LIMIT_CLAUSE;
import static com.facebook.presto.verifier.framework.DeterminismAnalysis.NON_DETERMINISTIC_ROW_COUNT;
import static com.facebook.presto.verifier.framework.QueryStage.DETERMINISM_ANALYSIS_CHECKSUM;
import static com.facebook.presto.verifier.framework.QueryStage.DETERMINISM_ANALYSIS_MAIN;
import static com.facebook.presto.verifier.framework.QueryStage.DETERMINISM_ANALYSIS_SETUP;
import static com.facebook.presto.verifier.framework.VerifierUtil.callAndConsume;
import static com.facebook.presto.verifier.framework.VerifierUtil.runAndConsume;
import static com.google.common.collect.Iterables.getOnlyElement;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;

public class DeterminismAnalyzer
{
    private final SourceQuery sourceQuery;
    private final PrestoAction prestoAction;
    private final QueryRewriter queryRewriter;
    private final ChecksumValidator checksumValidator;
    private final TypeManager typeManager;

    private final boolean runTeardown;
    private final int maxAnalysisRuns;
    private final Set nonDeterministicCatalogs;
    private final boolean handleLimitQuery;

    public DeterminismAnalyzer(
            SourceQuery sourceQuery,
            PrestoAction prestoAction,
            QueryRewriter queryRewriter,
            ChecksumValidator checksumValidator,
            TypeManager typeManager,
            DeterminismAnalyzerConfig config)
    {
        this.sourceQuery = requireNonNull(sourceQuery, "sourceQuery is null");
        this.prestoAction = requireNonNull(prestoAction, "prestoAction is null");
        this.queryRewriter = requireNonNull(queryRewriter, "queryRewriter is null");
        this.checksumValidator = requireNonNull(checksumValidator, "checksumValidator is null");
        this.typeManager = requireNonNull(typeManager, "typeManager is null");

        this.runTeardown = config.isRunTeardown();
        this.maxAnalysisRuns = config.getMaxAnalysisRuns();
        this.nonDeterministicCatalogs = ImmutableSet.copyOf(config.getNonDeterministicCatalogs());
        this.handleLimitQuery = config.isHandleLimitQuery();
    }

    protected DeterminismAnalysisDetails analyze(QueryObjectBundle control, ChecksumResult controlChecksum)
    {
        DeterminismAnalysisDetails.Builder determinismAnalysisDetails = DeterminismAnalysisDetails.builder();
        DeterminismAnalysis analysis = analyze(control, controlChecksum, determinismAnalysisDetails);
        return determinismAnalysisDetails.build(analysis);
    }

    private DeterminismAnalysis analyze(QueryObjectBundle control, ChecksumResult controlChecksum, DeterminismAnalysisDetails.Builder determinismAnalysisDetails)
    {
        // Handle mutable catalogs
        if (isNonDeterministicCatalogReferenced(control.getQuery())) {
            return NON_DETERMINISTIC_CATALOG;
        }

        // Handle limit query
        LimitQueryDeterminismAnalysis limitQueryAnalysis = new LimitQueryDeterminismAnalyzer(
                prestoAction,
                handleLimitQuery,
                control.getQuery(),
                controlChecksum.getRowCount(),
                determinismAnalysisDetails).analyze();

        switch (limitQueryAnalysis) {
            case NOT_RUN:
            case FAILED_QUERY_FAILURE:
            case DETERMINISTIC:
                // try the next analysis
                break;
            case NON_DETERMINISTIC:
                return NON_DETERMINISTIC_LIMIT_CLAUSE;
            case FAILED_DATA_CHANGED:
                return ANALYSIS_FAILED_DATA_CHANGED;
            default:
                throw new IllegalArgumentException(format("Invalid limitQueryAnalysis: %s", limitQueryAnalysis));
        }

        // Rerun control query multiple times
        List columns = getColumns(prestoAction, typeManager, control.getObjectName());
        Map queryRuns = new HashMap<>();
        try {
            for (int i = 0; i < maxAnalysisRuns; i++) {
                QueryObjectBundle queryBundle = queryRewriter.rewriteQuery(sourceQuery.getQuery(CONTROL), sourceQuery.getQueryConfiguration(CONTROL), CONTROL, false);
                DeterminismAnalysisRun.Builder run = determinismAnalysisDetails.addRun().setTableName(queryBundle.getObjectName().toString());
                queryRuns.put(queryBundle, run);

                // Rerun setup and main query
                queryBundle.getSetupQueries().forEach(query -> runAndConsume(
                        () -> prestoAction.execute(query, DETERMINISM_ANALYSIS_SETUP),
                        stats -> stats.getQueryStats().map(QueryStats::getQueryId).ifPresent(run::addSetupQueryId)));
                runAndConsume(
                        () -> prestoAction.execute(queryBundle.getQuery(), DETERMINISM_ANALYSIS_MAIN),
                        stats -> stats.getQueryStats().map(QueryStats::getQueryId).ifPresent(run::setQueryId));

                // Run checksum query
                Query checksumQuery = checksumValidator.generateChecksumQuery(queryBundle.getObjectName(), columns, Optional.empty());
                ChecksumResult testChecksum = getOnlyElement(callAndConsume(
                        () -> prestoAction.execute(checksumQuery, DETERMINISM_ANALYSIS_CHECKSUM, ChecksumResult::fromResultSet),
                        stats -> stats.getQueryStats().map(QueryStats::getQueryId).ifPresent(run::setChecksumQueryId)).getResults());

                DeterminismAnalysis analysis = matchResultToDeterminism(match(DataMatchResult.DataType.DATA, checksumValidator, columns, columns, controlChecksum, testChecksum));
                if (analysis != DETERMINISTIC) {
                    return analysis;
                }
            }

            return DETERMINISTIC;
        }
        catch (QueryException qe) {
            return ANALYSIS_FAILED_QUERY_FAILURE;
        }
        finally {
            if (runTeardown) {
                queryRuns.forEach((queryBundle, run) -> teardownSafely(
                        prestoAction,
                        Optional.of(queryBundle),
                        queryStats -> queryStats.getQueryStats().map(QueryStats::getQueryId).ifPresent(run::addTeardownQueryId)));
            }
        }
    }

    private DeterminismAnalysis matchResultToDeterminism(DataMatchResult matchResult)
    {
        switch (matchResult.getMatchType()) {
            case MATCH:
                return DETERMINISTIC;
            case SCHEMA_MISMATCH:
                return ANALYSIS_FAILED_INCONSISTENT_SCHEMA;
            case ROW_COUNT_MISMATCH:
                return NON_DETERMINISTIC_ROW_COUNT;
            case COLUMN_MISMATCH:
                return NON_DETERMINISTIC_COLUMNS;
            default:
                throw new IllegalArgumentException(format("Invalid MatchResult: %s", matchResult));
        }
    }

    @VisibleForTesting
    boolean isNonDeterministicCatalogReferenced(Statement statement)
    {
        if (nonDeterministicCatalogs.isEmpty()) {
            return false;
        }

        AtomicBoolean nonDeterministicCatalogReferenced = new AtomicBoolean();
        new NonDeterministicCatalogVisitor().process(statement, nonDeterministicCatalogReferenced);
        return nonDeterministicCatalogReferenced.get();
    }

    private class NonDeterministicCatalogVisitor
            extends AstVisitor
    {
        protected Void visitNode(Node node, AtomicBoolean context)
        {
            node.getChildren().forEach(child -> process(child, context));
            return null;
        }

        protected Void visitTable(Table node, AtomicBoolean nonDeterministicCatalogReferenced)
        {
            if (node.getName().getParts().size() == 3 && nonDeterministicCatalogs.contains(node.getName().getParts().get(0))) {
                nonDeterministicCatalogReferenced.set(true);
            }
            return null;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy